zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx512vlintrin.h (330339B) - Raw


      1 /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <avx512vlintrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __AVX512VLINTRIN_H
     15 #define __AVX512VLINTRIN_H
     16 
     17 #define __DEFAULT_FN_ATTRS128                                                  \
     18   __attribute__((__always_inline__, __nodebug__,                               \
     19                  __target__("avx512vl,no-evex512"),                            \
     20                  __min_vector_width__(128)))
     21 #define __DEFAULT_FN_ATTRS256                                                  \
     22   __attribute__((__always_inline__, __nodebug__,                               \
     23                  __target__("avx512vl,no-evex512"),                            \
     24                  __min_vector_width__(256)))
     25 
     26 typedef short __v2hi __attribute__((__vector_size__(4)));
     27 typedef char __v4qi __attribute__((__vector_size__(4)));
     28 typedef char __v2qi __attribute__((__vector_size__(2)));
     29 
     30 /* Integer compare */
     31 
     32 #define _mm_cmpeq_epi32_mask(A, B) \
     33     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
     34 #define _mm_mask_cmpeq_epi32_mask(k, A, B) \
     35     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
     36 #define _mm_cmpge_epi32_mask(A, B) \
     37     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
     38 #define _mm_mask_cmpge_epi32_mask(k, A, B) \
     39     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
     40 #define _mm_cmpgt_epi32_mask(A, B) \
     41     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
     42 #define _mm_mask_cmpgt_epi32_mask(k, A, B) \
     43     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
     44 #define _mm_cmple_epi32_mask(A, B) \
     45     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
     46 #define _mm_mask_cmple_epi32_mask(k, A, B) \
     47     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
     48 #define _mm_cmplt_epi32_mask(A, B) \
     49     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
     50 #define _mm_mask_cmplt_epi32_mask(k, A, B) \
     51     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
     52 #define _mm_cmpneq_epi32_mask(A, B) \
     53     _mm_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
     54 #define _mm_mask_cmpneq_epi32_mask(k, A, B) \
     55     _mm_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
     56 
     57 #define _mm256_cmpeq_epi32_mask(A, B) \
     58     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_EQ)
     59 #define _mm256_mask_cmpeq_epi32_mask(k, A, B) \
     60     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_EQ)
     61 #define _mm256_cmpge_epi32_mask(A, B) \
     62     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GE)
     63 #define _mm256_mask_cmpge_epi32_mask(k, A, B) \
     64     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GE)
     65 #define _mm256_cmpgt_epi32_mask(A, B) \
     66     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_GT)
     67 #define _mm256_mask_cmpgt_epi32_mask(k, A, B) \
     68     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_GT)
     69 #define _mm256_cmple_epi32_mask(A, B) \
     70     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LE)
     71 #define _mm256_mask_cmple_epi32_mask(k, A, B) \
     72     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LE)
     73 #define _mm256_cmplt_epi32_mask(A, B) \
     74     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_LT)
     75 #define _mm256_mask_cmplt_epi32_mask(k, A, B) \
     76     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_LT)
     77 #define _mm256_cmpneq_epi32_mask(A, B) \
     78     _mm256_cmp_epi32_mask((A), (B), _MM_CMPINT_NE)
     79 #define _mm256_mask_cmpneq_epi32_mask(k, A, B) \
     80     _mm256_mask_cmp_epi32_mask((k), (A), (B), _MM_CMPINT_NE)
     81 
     82 #define _mm_cmpeq_epu32_mask(A, B) \
     83     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
     84 #define _mm_mask_cmpeq_epu32_mask(k, A, B) \
     85     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
     86 #define _mm_cmpge_epu32_mask(A, B) \
     87     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
     88 #define _mm_mask_cmpge_epu32_mask(k, A, B) \
     89     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
     90 #define _mm_cmpgt_epu32_mask(A, B) \
     91     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
     92 #define _mm_mask_cmpgt_epu32_mask(k, A, B) \
     93     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
     94 #define _mm_cmple_epu32_mask(A, B) \
     95     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
     96 #define _mm_mask_cmple_epu32_mask(k, A, B) \
     97     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
     98 #define _mm_cmplt_epu32_mask(A, B) \
     99     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
    100 #define _mm_mask_cmplt_epu32_mask(k, A, B) \
    101     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
    102 #define _mm_cmpneq_epu32_mask(A, B) \
    103     _mm_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
    104 #define _mm_mask_cmpneq_epu32_mask(k, A, B) \
    105     _mm_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
    106 
    107 #define _mm256_cmpeq_epu32_mask(A, B) \
    108     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_EQ)
    109 #define _mm256_mask_cmpeq_epu32_mask(k, A, B) \
    110     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_EQ)
    111 #define _mm256_cmpge_epu32_mask(A, B) \
    112     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GE)
    113 #define _mm256_mask_cmpge_epu32_mask(k, A, B) \
    114     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GE)
    115 #define _mm256_cmpgt_epu32_mask(A, B) \
    116     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_GT)
    117 #define _mm256_mask_cmpgt_epu32_mask(k, A, B) \
    118     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_GT)
    119 #define _mm256_cmple_epu32_mask(A, B) \
    120     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LE)
    121 #define _mm256_mask_cmple_epu32_mask(k, A, B) \
    122     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LE)
    123 #define _mm256_cmplt_epu32_mask(A, B) \
    124     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_LT)
    125 #define _mm256_mask_cmplt_epu32_mask(k, A, B) \
    126     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_LT)
    127 #define _mm256_cmpneq_epu32_mask(A, B) \
    128     _mm256_cmp_epu32_mask((A), (B), _MM_CMPINT_NE)
    129 #define _mm256_mask_cmpneq_epu32_mask(k, A, B) \
    130     _mm256_mask_cmp_epu32_mask((k), (A), (B), _MM_CMPINT_NE)
    131 
    132 #define _mm_cmpeq_epi64_mask(A, B) \
    133     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
    134 #define _mm_mask_cmpeq_epi64_mask(k, A, B) \
    135     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
    136 #define _mm_cmpge_epi64_mask(A, B) \
    137     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
    138 #define _mm_mask_cmpge_epi64_mask(k, A, B) \
    139     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
    140 #define _mm_cmpgt_epi64_mask(A, B) \
    141     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
    142 #define _mm_mask_cmpgt_epi64_mask(k, A, B) \
    143     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
    144 #define _mm_cmple_epi64_mask(A, B) \
    145     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
    146 #define _mm_mask_cmple_epi64_mask(k, A, B) \
    147     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
    148 #define _mm_cmplt_epi64_mask(A, B) \
    149     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
    150 #define _mm_mask_cmplt_epi64_mask(k, A, B) \
    151     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
    152 #define _mm_cmpneq_epi64_mask(A, B) \
    153     _mm_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
    154 #define _mm_mask_cmpneq_epi64_mask(k, A, B) \
    155     _mm_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
    156 
    157 #define _mm256_cmpeq_epi64_mask(A, B) \
    158     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_EQ)
    159 #define _mm256_mask_cmpeq_epi64_mask(k, A, B) \
    160     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_EQ)
    161 #define _mm256_cmpge_epi64_mask(A, B) \
    162     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GE)
    163 #define _mm256_mask_cmpge_epi64_mask(k, A, B) \
    164     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GE)
    165 #define _mm256_cmpgt_epi64_mask(A, B) \
    166     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_GT)
    167 #define _mm256_mask_cmpgt_epi64_mask(k, A, B) \
    168     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_GT)
    169 #define _mm256_cmple_epi64_mask(A, B) \
    170     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LE)
    171 #define _mm256_mask_cmple_epi64_mask(k, A, B) \
    172     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LE)
    173 #define _mm256_cmplt_epi64_mask(A, B) \
    174     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_LT)
    175 #define _mm256_mask_cmplt_epi64_mask(k, A, B) \
    176     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_LT)
    177 #define _mm256_cmpneq_epi64_mask(A, B) \
    178     _mm256_cmp_epi64_mask((A), (B), _MM_CMPINT_NE)
    179 #define _mm256_mask_cmpneq_epi64_mask(k, A, B) \
    180     _mm256_mask_cmp_epi64_mask((k), (A), (B), _MM_CMPINT_NE)
    181 
    182 #define _mm_cmpeq_epu64_mask(A, B) \
    183     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
    184 #define _mm_mask_cmpeq_epu64_mask(k, A, B) \
    185     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
    186 #define _mm_cmpge_epu64_mask(A, B) \
    187     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
    188 #define _mm_mask_cmpge_epu64_mask(k, A, B) \
    189     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
    190 #define _mm_cmpgt_epu64_mask(A, B) \
    191     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
    192 #define _mm_mask_cmpgt_epu64_mask(k, A, B) \
    193     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
    194 #define _mm_cmple_epu64_mask(A, B) \
    195     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
    196 #define _mm_mask_cmple_epu64_mask(k, A, B) \
    197     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
    198 #define _mm_cmplt_epu64_mask(A, B) \
    199     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
    200 #define _mm_mask_cmplt_epu64_mask(k, A, B) \
    201     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
    202 #define _mm_cmpneq_epu64_mask(A, B) \
    203     _mm_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
    204 #define _mm_mask_cmpneq_epu64_mask(k, A, B) \
    205     _mm_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
    206 
    207 #define _mm256_cmpeq_epu64_mask(A, B) \
    208     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_EQ)
    209 #define _mm256_mask_cmpeq_epu64_mask(k, A, B) \
    210     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_EQ)
    211 #define _mm256_cmpge_epu64_mask(A, B) \
    212     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GE)
    213 #define _mm256_mask_cmpge_epu64_mask(k, A, B) \
    214     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GE)
    215 #define _mm256_cmpgt_epu64_mask(A, B) \
    216     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_GT)
    217 #define _mm256_mask_cmpgt_epu64_mask(k, A, B) \
    218     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_GT)
    219 #define _mm256_cmple_epu64_mask(A, B) \
    220     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LE)
    221 #define _mm256_mask_cmple_epu64_mask(k, A, B) \
    222     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LE)
    223 #define _mm256_cmplt_epu64_mask(A, B) \
    224     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_LT)
    225 #define _mm256_mask_cmplt_epu64_mask(k, A, B) \
    226     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_LT)
    227 #define _mm256_cmpneq_epu64_mask(A, B) \
    228     _mm256_cmp_epu64_mask((A), (B), _MM_CMPINT_NE)
    229 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
    230     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
    231 
    232 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    233 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    234 {
    235   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    236                                              (__v8si)_mm256_add_epi32(__A, __B),
    237                                              (__v8si)__W);
    238 }
    239 
    240 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    241 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    242 {
    243   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    244                                              (__v8si)_mm256_add_epi32(__A, __B),
    245                                              (__v8si)_mm256_setzero_si256());
    246 }
    247 
    248 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    249 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    250 {
    251   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    252                                              (__v4di)_mm256_add_epi64(__A, __B),
    253                                              (__v4di)__W);
    254 }
    255 
    256 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    257 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    258 {
    259   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    260                                              (__v4di)_mm256_add_epi64(__A, __B),
    261                                              (__v4di)_mm256_setzero_si256());
    262 }
    263 
    264 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    265 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    266 {
    267   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    268                                              (__v8si)_mm256_sub_epi32(__A, __B),
    269                                              (__v8si)__W);
    270 }
    271 
    272 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    273 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    274 {
    275   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    276                                              (__v8si)_mm256_sub_epi32(__A, __B),
    277                                              (__v8si)_mm256_setzero_si256());
    278 }
    279 
    280 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    281 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    282 {
    283   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    284                                              (__v4di)_mm256_sub_epi64(__A, __B),
    285                                              (__v4di)__W);
    286 }
    287 
    288 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    289 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    290 {
    291   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    292                                              (__v4di)_mm256_sub_epi64(__A, __B),
    293                                              (__v4di)_mm256_setzero_si256());
    294 }
    295 
    296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    297 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    298 {
    299   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    300                                              (__v4si)_mm_add_epi32(__A, __B),
    301                                              (__v4si)__W);
    302 }
    303 
    304 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    305 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    306 {
    307   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    308                                              (__v4si)_mm_add_epi32(__A, __B),
    309                                              (__v4si)_mm_setzero_si128());
    310 }
    311 
    312 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    313 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    314 {
    315   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    316                                              (__v2di)_mm_add_epi64(__A, __B),
    317                                              (__v2di)__W);
    318 }
    319 
    320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    321 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    322 {
    323   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    324                                              (__v2di)_mm_add_epi64(__A, __B),
    325                                              (__v2di)_mm_setzero_si128());
    326 }
    327 
    328 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    329 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    330 {
    331   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    332                                              (__v4si)_mm_sub_epi32(__A, __B),
    333                                              (__v4si)__W);
    334 }
    335 
    336 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    337 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    338 {
    339   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    340                                              (__v4si)_mm_sub_epi32(__A, __B),
    341                                              (__v4si)_mm_setzero_si128());
    342 }
    343 
    344 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    345 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    346 {
    347   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    348                                              (__v2di)_mm_sub_epi64(__A, __B),
    349                                              (__v2di)__W);
    350 }
    351 
    352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    353 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    354 {
    355   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    356                                              (__v2di)_mm_sub_epi64(__A, __B),
    357                                              (__v2di)_mm_setzero_si128());
    358 }
    359 
    360 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    361 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
    362 {
    363   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    364                                              (__v4di)_mm256_mul_epi32(__X, __Y),
    365                                              (__v4di)__W);
    366 }
    367 
    368 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    369 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
    370 {
    371   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    372                                              (__v4di)_mm256_mul_epi32(__X, __Y),
    373                                              (__v4di)_mm256_setzero_si256());
    374 }
    375 
    376 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    377 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
    378 {
    379   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    380                                              (__v2di)_mm_mul_epi32(__X, __Y),
    381                                              (__v2di)__W);
    382 }
    383 
    384 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    385 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
    386 {
    387   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    388                                              (__v2di)_mm_mul_epi32(__X, __Y),
    389                                              (__v2di)_mm_setzero_si128());
    390 }
    391 
    392 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    393 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
    394 {
    395   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    396                                              (__v4di)_mm256_mul_epu32(__X, __Y),
    397                                              (__v4di)__W);
    398 }
    399 
    400 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    401 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
    402 {
    403   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
    404                                              (__v4di)_mm256_mul_epu32(__X, __Y),
    405                                              (__v4di)_mm256_setzero_si256());
    406 }
    407 
    408 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    409 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
    410 {
    411   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    412                                              (__v2di)_mm_mul_epu32(__X, __Y),
    413                                              (__v2di)__W);
    414 }
    415 
    416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    417 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
    418 {
    419   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
    420                                              (__v2di)_mm_mul_epu32(__X, __Y),
    421                                              (__v2di)_mm_setzero_si128());
    422 }
    423 
    424 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    425 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
    426 {
    427   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
    428                                              (__v8si)_mm256_mullo_epi32(__A, __B),
    429                                              (__v8si)_mm256_setzero_si256());
    430 }
    431 
    432 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    433 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
    434 {
    435   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
    436                                              (__v8si)_mm256_mullo_epi32(__A, __B),
    437                                              (__v8si)__W);
    438 }
    439 
    440 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    441 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
    442 {
    443   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
    444                                              (__v4si)_mm_mullo_epi32(__A, __B),
    445                                              (__v4si)_mm_setzero_si128());
    446 }
    447 
    448 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    449 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
    450 {
    451   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
    452                                              (__v4si)_mm_mullo_epi32(__A, __B),
    453                                              (__v4si)__W);
    454 }
    455 
    456 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    457 _mm256_and_epi32(__m256i __a, __m256i __b)
    458 {
    459   return (__m256i)((__v8su)__a & (__v8su)__b);
    460 }
    461 
    462 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    463 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    464 {
    465   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    466                                              (__v8si)_mm256_and_epi32(__A, __B),
    467                                              (__v8si)__W);
    468 }
    469 
    470 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    471 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    472 {
    473   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
    474 }
    475 
    476 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    477 _mm_and_epi32(__m128i __a, __m128i __b)
    478 {
    479   return (__m128i)((__v4su)__a & (__v4su)__b);
    480 }
    481 
    482 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    483 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    484 {
    485   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    486                                              (__v4si)_mm_and_epi32(__A, __B),
    487                                              (__v4si)__W);
    488 }
    489 
    490 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    491 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    492 {
    493   return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
    494 }
    495 
    496 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    497 _mm256_andnot_epi32(__m256i __A, __m256i __B)
    498 {
    499   return (__m256i)(~(__v8su)__A & (__v8su)__B);
    500 }
    501 
    502 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    503 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    504 {
    505   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    506                                           (__v8si)_mm256_andnot_epi32(__A, __B),
    507                                           (__v8si)__W);
    508 }
    509 
    510 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    511 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    512 {
    513   return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
    514                                            __U, __A, __B);
    515 }
    516 
    517 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    518 _mm_andnot_epi32(__m128i __A, __m128i __B)
    519 {
    520   return (__m128i)(~(__v4su)__A & (__v4su)__B);
    521 }
    522 
    523 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    524 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    525 {
    526   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    527                                              (__v4si)_mm_andnot_epi32(__A, __B),
    528                                              (__v4si)__W);
    529 }
    530 
    531 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    532 _mm_maskz_andnot_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    533 {
    534   return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
    535 }
    536 
    537 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    538 _mm256_or_epi32(__m256i __a, __m256i __b)
    539 {
    540   return (__m256i)((__v8su)__a | (__v8su)__b);
    541 }
    542 
    543 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    544 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    545 {
    546   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    547                                              (__v8si)_mm256_or_epi32(__A, __B),
    548                                              (__v8si)__W);
    549 }
    550 
    551 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    552 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    553 {
    554   return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
    555 }
    556 
    557 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    558 _mm_or_epi32(__m128i __a, __m128i __b)
    559 {
    560   return (__m128i)((__v4su)__a | (__v4su)__b);
    561 }
    562 
    563 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    564 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    565 {
    566   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    567                                              (__v4si)_mm_or_epi32(__A, __B),
    568                                              (__v4si)__W);
    569 }
    570 
    571 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    572 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    573 {
    574   return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
    575 }
    576 
    577 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    578 _mm256_xor_epi32(__m256i __a, __m256i __b)
    579 {
    580   return (__m256i)((__v8su)__a ^ (__v8su)__b);
    581 }
    582 
    583 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    584 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    585 {
    586   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
    587                                              (__v8si)_mm256_xor_epi32(__A, __B),
    588                                              (__v8si)__W);
    589 }
    590 
    591 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    592 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
    593 {
    594   return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
    595 }
    596 
    597 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    598 _mm_xor_epi32(__m128i __a, __m128i __b)
    599 {
    600   return (__m128i)((__v4su)__a ^ (__v4su)__b);
    601 }
    602 
    603 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    604 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    605 {
    606   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
    607                                              (__v4si)_mm_xor_epi32(__A, __B),
    608                                              (__v4si)__W);
    609 }
    610 
    611 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    612 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
    613 {
    614   return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
    615 }
    616 
    617 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    618 _mm256_and_epi64(__m256i __a, __m256i __b)
    619 {
    620   return (__m256i)((__v4du)__a & (__v4du)__b);
    621 }
    622 
    623 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    624 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    625 {
    626   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    627                                              (__v4di)_mm256_and_epi64(__A, __B),
    628                                              (__v4di)__W);
    629 }
    630 
    631 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    632 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    633 {
    634   return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
    635 }
    636 
    637 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    638 _mm_and_epi64(__m128i __a, __m128i __b)
    639 {
    640   return (__m128i)((__v2du)__a & (__v2du)__b);
    641 }
    642 
    643 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    644 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    645 {
    646   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    647                                              (__v2di)_mm_and_epi64(__A, __B),
    648                                              (__v2di)__W);
    649 }
    650 
    651 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    652 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    653 {
    654   return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
    655 }
    656 
    657 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    658 _mm256_andnot_epi64(__m256i __A, __m256i __B)
    659 {
    660   return (__m256i)(~(__v4du)__A & (__v4du)__B);
    661 }
    662 
    663 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    664 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    665 {
    666   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    667                                           (__v4di)_mm256_andnot_epi64(__A, __B),
    668                                           (__v4di)__W);
    669 }
    670 
    671 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    672 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    673 {
    674   return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
    675                                            __U, __A, __B);
    676 }
    677 
    678 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    679 _mm_andnot_epi64(__m128i __A, __m128i __B)
    680 {
    681   return (__m128i)(~(__v2du)__A & (__v2du)__B);
    682 }
    683 
    684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    685 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    686 {
    687   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    688                                              (__v2di)_mm_andnot_epi64(__A, __B),
    689                                              (__v2di)__W);
    690 }
    691 
    692 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    693 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    694 {
    695   return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
    696 }
    697 
    698 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    699 _mm256_or_epi64(__m256i __a, __m256i __b)
    700 {
    701   return (__m256i)((__v4du)__a | (__v4du)__b);
    702 }
    703 
    704 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    705 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    706 {
    707   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    708                                              (__v4di)_mm256_or_epi64(__A, __B),
    709                                              (__v4di)__W);
    710 }
    711 
    712 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    713 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    714 {
    715   return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
    716 }
    717 
    718 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    719 _mm_or_epi64(__m128i __a, __m128i __b)
    720 {
    721   return (__m128i)((__v2du)__a | (__v2du)__b);
    722 }
    723 
    724 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    725 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
    726 {
    727   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    728                                              (__v2di)_mm_or_epi64(__A, __B),
    729                                              (__v2di)__W);
    730 }
    731 
    732 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    733 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    734 {
    735   return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
    736 }
    737 
    738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    739 _mm256_xor_epi64(__m256i __a, __m256i __b)
    740 {
    741   return (__m256i)((__v4du)__a ^ (__v4du)__b);
    742 }
    743 
    744 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    745 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
    746 {
    747   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
    748                                              (__v4di)_mm256_xor_epi64(__A, __B),
    749                                              (__v4di)__W);
    750 }
    751 
    752 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    753 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
    754 {
    755   return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
    756 }
    757 
    758 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    759 _mm_xor_epi64(__m128i __a, __m128i __b)
    760 {
    761   return (__m128i)((__v2du)__a ^ (__v2du)__b);
    762 }
    763 
    764 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    765 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
    766         __m128i __B)
    767 {
    768   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
    769                                              (__v2di)_mm_xor_epi64(__A, __B),
    770                                              (__v2di)__W);
    771 }
    772 
    773 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    774 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
    775 {
    776   return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
    777 }
    778 
    779 #define _mm_cmp_epi32_mask(a, b, p) \
    780   ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
    781                                          (__v4si)(__m128i)(b), (int)(p), \
    782                                          (__mmask8)-1))
    783 
    784 #define _mm_mask_cmp_epi32_mask(m, a, b, p) \
    785   ((__mmask8)__builtin_ia32_cmpd128_mask((__v4si)(__m128i)(a), \
    786                                          (__v4si)(__m128i)(b), (int)(p), \
    787                                          (__mmask8)(m)))
    788 
    789 #define _mm_cmp_epu32_mask(a, b, p) \
    790   ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
    791                                           (__v4si)(__m128i)(b), (int)(p), \
    792                                           (__mmask8)-1))
    793 
    794 #define _mm_mask_cmp_epu32_mask(m, a, b, p) \
    795   ((__mmask8)__builtin_ia32_ucmpd128_mask((__v4si)(__m128i)(a), \
    796                                           (__v4si)(__m128i)(b), (int)(p), \
    797                                           (__mmask8)(m)))
    798 
    799 #define _mm256_cmp_epi32_mask(a, b, p) \
    800   ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
    801                                          (__v8si)(__m256i)(b), (int)(p), \
    802                                          (__mmask8)-1))
    803 
    804 #define _mm256_mask_cmp_epi32_mask(m, a, b, p) \
    805   ((__mmask8)__builtin_ia32_cmpd256_mask((__v8si)(__m256i)(a), \
    806                                          (__v8si)(__m256i)(b), (int)(p), \
    807                                          (__mmask8)(m)))
    808 
    809 #define _mm256_cmp_epu32_mask(a, b, p) \
    810   ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
    811                                           (__v8si)(__m256i)(b), (int)(p), \
    812                                           (__mmask8)-1))
    813 
    814 #define _mm256_mask_cmp_epu32_mask(m, a, b, p) \
    815   ((__mmask8)__builtin_ia32_ucmpd256_mask((__v8si)(__m256i)(a), \
    816                                           (__v8si)(__m256i)(b), (int)(p), \
    817                                           (__mmask8)(m)))
    818 
    819 #define _mm_cmp_epi64_mask(a, b, p) \
    820   ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
    821                                          (__v2di)(__m128i)(b), (int)(p), \
    822                                          (__mmask8)-1))
    823 
    824 #define _mm_mask_cmp_epi64_mask(m, a, b, p) \
    825   ((__mmask8)__builtin_ia32_cmpq128_mask((__v2di)(__m128i)(a), \
    826                                          (__v2di)(__m128i)(b), (int)(p), \
    827                                          (__mmask8)(m)))
    828 
    829 #define _mm_cmp_epu64_mask(a, b, p) \
    830   ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
    831                                           (__v2di)(__m128i)(b), (int)(p), \
    832                                           (__mmask8)-1))
    833 
    834 #define _mm_mask_cmp_epu64_mask(m, a, b, p) \
    835   ((__mmask8)__builtin_ia32_ucmpq128_mask((__v2di)(__m128i)(a), \
    836                                           (__v2di)(__m128i)(b), (int)(p), \
    837                                           (__mmask8)(m)))
    838 
    839 #define _mm256_cmp_epi64_mask(a, b, p) \
    840   ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
    841                                          (__v4di)(__m256i)(b), (int)(p), \
    842                                          (__mmask8)-1))
    843 
    844 #define _mm256_mask_cmp_epi64_mask(m, a, b, p) \
    845   ((__mmask8)__builtin_ia32_cmpq256_mask((__v4di)(__m256i)(a), \
    846                                          (__v4di)(__m256i)(b), (int)(p), \
    847                                          (__mmask8)(m)))
    848 
    849 #define _mm256_cmp_epu64_mask(a, b, p) \
    850   ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
    851                                           (__v4di)(__m256i)(b), (int)(p), \
    852                                           (__mmask8)-1))
    853 
    854 #define _mm256_mask_cmp_epu64_mask(m, a, b, p) \
    855   ((__mmask8)__builtin_ia32_ucmpq256_mask((__v4di)(__m256i)(a), \
    856                                           (__v4di)(__m256i)(b), (int)(p), \
    857                                           (__mmask8)(m)))
    858 
    859 #define _mm256_cmp_ps_mask(a, b, p)  \
    860   ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
    861                                           (__v8sf)(__m256)(b), (int)(p), \
    862                                           (__mmask8)-1))
    863 
    864 #define _mm256_mask_cmp_ps_mask(m, a, b, p)  \
    865   ((__mmask8)__builtin_ia32_cmpps256_mask((__v8sf)(__m256)(a), \
    866                                           (__v8sf)(__m256)(b), (int)(p), \
    867                                           (__mmask8)(m)))
    868 
    869 #define _mm256_cmp_pd_mask(a, b, p)  \
    870   ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
    871                                           (__v4df)(__m256d)(b), (int)(p), \
    872                                           (__mmask8)-1))
    873 
    874 #define _mm256_mask_cmp_pd_mask(m, a, b, p)  \
    875   ((__mmask8)__builtin_ia32_cmppd256_mask((__v4df)(__m256d)(a), \
    876                                           (__v4df)(__m256d)(b), (int)(p), \
    877                                           (__mmask8)(m)))
    878 
    879 #define _mm_cmp_ps_mask(a, b, p)  \
    880   ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
    881                                           (__v4sf)(__m128)(b), (int)(p), \
    882                                           (__mmask8)-1))
    883 
    884 #define _mm_mask_cmp_ps_mask(m, a, b, p)  \
    885   ((__mmask8)__builtin_ia32_cmpps128_mask((__v4sf)(__m128)(a), \
    886                                           (__v4sf)(__m128)(b), (int)(p), \
    887                                           (__mmask8)(m)))
    888 
    889 #define _mm_cmp_pd_mask(a, b, p)  \
    890   ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
    891                                           (__v2df)(__m128d)(b), (int)(p), \
    892                                           (__mmask8)-1))
    893 
    894 #define _mm_mask_cmp_pd_mask(m, a, b, p)  \
    895   ((__mmask8)__builtin_ia32_cmppd128_mask((__v2df)(__m128d)(a), \
    896                                           (__v2df)(__m128d)(b), (int)(p), \
    897                                           (__mmask8)(m)))
    898 
    899 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    900 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
    901 {
    902   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    903                     __builtin_ia32_vfmaddpd ((__v2df) __A,
    904                                              (__v2df) __B,
    905                                              (__v2df) __C),
    906                     (__v2df) __A);
    907 }
    908 
    909 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    910 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
    911 {
    912   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    913                     __builtin_ia32_vfmaddpd ((__v2df) __A,
    914                                              (__v2df) __B,
    915                                              (__v2df) __C),
    916                     (__v2df) __C);
    917 }
    918 
    919 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    920 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
    921 {
    922   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    923                     __builtin_ia32_vfmaddpd ((__v2df) __A,
    924                                              (__v2df) __B,
    925                                              (__v2df) __C),
    926                     (__v2df)_mm_setzero_pd());
    927 }
    928 
    929 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    930 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
    931 {
    932   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    933                     __builtin_ia32_vfmaddpd ((__v2df) __A,
    934                                              (__v2df) __B,
    935                                              -(__v2df) __C),
    936                     (__v2df) __A);
    937 }
    938 
    939 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    940 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
    941 {
    942   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    943                     __builtin_ia32_vfmaddpd ((__v2df) __A,
    944                                              (__v2df) __B,
    945                                              -(__v2df) __C),
    946                     (__v2df)_mm_setzero_pd());
    947 }
    948 
    949 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    950 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
    951 {
    952   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    953                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
    954                                              (__v2df) __B,
    955                                              (__v2df) __C),
    956                     (__v2df) __C);
    957 }
    958 
    959 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    960 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
    961 {
    962   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    963                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
    964                                              (__v2df) __B,
    965                                              (__v2df) __C),
    966                     (__v2df)_mm_setzero_pd());
    967 }
    968 
    969 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    970 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
    971 {
    972   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
    973                     __builtin_ia32_vfmaddpd (-(__v2df) __A,
    974                                              (__v2df) __B,
    975                                              -(__v2df) __C),
    976                     (__v2df)_mm_setzero_pd());
    977 }
    978 
    979 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    980 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
    981 {
    982   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
    983                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
    984                                                 (__v4df) __B,
    985                                                 (__v4df) __C),
    986                     (__v4df) __A);
    987 }
    988 
    989 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    990 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
    991 {
    992   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
    993                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
    994                                                 (__v4df) __B,
    995                                                 (__v4df) __C),
    996                     (__v4df) __C);
    997 }
    998 
    999 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1000 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1001 {
   1002   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1003                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1004                                                 (__v4df) __B,
   1005                                                 (__v4df) __C),
   1006                     (__v4df)_mm256_setzero_pd());
   1007 }
   1008 
   1009 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1010 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1011 {
   1012   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1013                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1014                                                 (__v4df) __B,
   1015                                                 -(__v4df) __C),
   1016                     (__v4df) __A);
   1017 }
   1018 
   1019 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1020 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1021 {
   1022   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1023                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1024                                                 (__v4df) __B,
   1025                                                 -(__v4df) __C),
   1026                     (__v4df)_mm256_setzero_pd());
   1027 }
   1028 
   1029 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1030 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1031 {
   1032   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1033                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
   1034                                                 (__v4df) __B,
   1035                                                 (__v4df) __C),
   1036                     (__v4df) __C);
   1037 }
   1038 
   1039 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1040 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1041 {
   1042   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1043                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
   1044                                                 (__v4df) __B,
   1045                                                 (__v4df) __C),
   1046                     (__v4df)_mm256_setzero_pd());
   1047 }
   1048 
   1049 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1050 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1051 {
   1052   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1053                     __builtin_ia32_vfmaddpd256 (-(__v4df) __A,
   1054                                                 (__v4df) __B,
   1055                                                 -(__v4df) __C),
   1056                     (__v4df)_mm256_setzero_pd());
   1057 }
   1058 
   1059 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1060 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1061 {
   1062   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1063                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1064                                              (__v4sf) __B,
   1065                                              (__v4sf) __C),
   1066                     (__v4sf) __A);
   1067 }
   1068 
   1069 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1070 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1071 {
   1072   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1073                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1074                                              (__v4sf) __B,
   1075                                              (__v4sf) __C),
   1076                     (__v4sf) __C);
   1077 }
   1078 
   1079 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1080 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1081 {
   1082   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1083                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1084                                              (__v4sf) __B,
   1085                                              (__v4sf) __C),
   1086                     (__v4sf)_mm_setzero_ps());
   1087 }
   1088 
   1089 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1090 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1091 {
   1092   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1093                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1094                                              (__v4sf) __B,
   1095                                              -(__v4sf) __C),
   1096                     (__v4sf) __A);
   1097 }
   1098 
   1099 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1100 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1101 {
   1102   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1103                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1104                                              (__v4sf) __B,
   1105                                              -(__v4sf) __C),
   1106                     (__v4sf)_mm_setzero_ps());
   1107 }
   1108 
   1109 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1110 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1111 {
   1112   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1113                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
   1114                                              (__v4sf) __B,
   1115                                              (__v4sf) __C),
   1116                     (__v4sf) __C);
   1117 }
   1118 
   1119 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1120 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1121 {
   1122   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1123                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
   1124                                              (__v4sf) __B,
   1125                                              (__v4sf) __C),
   1126                     (__v4sf)_mm_setzero_ps());
   1127 }
   1128 
   1129 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1130 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1131 {
   1132   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1133                     __builtin_ia32_vfmaddps (-(__v4sf) __A,
   1134                                              (__v4sf) __B,
   1135                                              -(__v4sf) __C),
   1136                     (__v4sf)_mm_setzero_ps());
   1137 }
   1138 
   1139 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1140 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1141 {
   1142   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1143                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1144                                                 (__v8sf) __B,
   1145                                                 (__v8sf) __C),
   1146                     (__v8sf) __A);
   1147 }
   1148 
   1149 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1150 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1151 {
   1152   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1153                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1154                                                 (__v8sf) __B,
   1155                                                 (__v8sf) __C),
   1156                     (__v8sf) __C);
   1157 }
   1158 
   1159 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1160 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1161 {
   1162   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1163                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1164                                                 (__v8sf) __B,
   1165                                                 (__v8sf) __C),
   1166                     (__v8sf)_mm256_setzero_ps());
   1167 }
   1168 
   1169 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1170 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1171 {
   1172   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1173                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1174                                                 (__v8sf) __B,
   1175                                                 -(__v8sf) __C),
   1176                     (__v8sf) __A);
   1177 }
   1178 
   1179 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1180 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1181 {
   1182   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1183                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1184                                                 (__v8sf) __B,
   1185                                                 -(__v8sf) __C),
   1186                     (__v8sf)_mm256_setzero_ps());
   1187 }
   1188 
   1189 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1190 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1191 {
   1192   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1193                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
   1194                                                 (__v8sf) __B,
   1195                                                 (__v8sf) __C),
   1196                     (__v8sf) __C);
   1197 }
   1198 
   1199 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1200 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1201 {
   1202   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1203                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
   1204                                                 (__v8sf) __B,
   1205                                                 (__v8sf) __C),
   1206                     (__v8sf)_mm256_setzero_ps());
   1207 }
   1208 
   1209 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1210 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1211 {
   1212   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1213                     __builtin_ia32_vfmaddps256 (-(__v8sf) __A,
   1214                                                 (__v8sf) __B,
   1215                                                 -(__v8sf) __C),
   1216                     (__v8sf)_mm256_setzero_ps());
   1217 }
   1218 
   1219 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1220 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1221 {
   1222   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1223                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1224                                                 (__v2df) __B,
   1225                                                 (__v2df) __C),
   1226                     (__v2df) __A);
   1227 }
   1228 
   1229 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1230 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1231 {
   1232   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1233                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1234                                                 (__v2df) __B,
   1235                                                 (__v2df) __C),
   1236                     (__v2df) __C);
   1237 }
   1238 
   1239 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1240 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1241 {
   1242   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1243                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1244                                                 (__v2df) __B,
   1245                                                 (__v2df) __C),
   1246                     (__v2df)_mm_setzero_pd());
   1247 }
   1248 
   1249 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1250 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1251 {
   1252   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1253                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1254                                                 (__v2df) __B,
   1255                                                 -(__v2df) __C),
   1256                     (__v2df) __A);
   1257 }
   1258 
   1259 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1260 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
   1261 {
   1262   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1263                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1264                                                 (__v2df) __B,
   1265                                                 -(__v2df) __C),
   1266                     (__v2df)_mm_setzero_pd());
   1267 }
   1268 
   1269 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1270 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1271 {
   1272   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1273                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1274                                                    (__v4df) __B,
   1275                                                    (__v4df) __C),
   1276                     (__v4df) __A);
   1277 }
   1278 
   1279 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1280 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1281 {
   1282   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1283                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1284                                                    (__v4df) __B,
   1285                                                    (__v4df) __C),
   1286                     (__v4df) __C);
   1287 }
   1288 
   1289 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1290 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1291 {
   1292   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1293                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1294                                                    (__v4df) __B,
   1295                                                    (__v4df) __C),
   1296                     (__v4df)_mm256_setzero_pd());
   1297 }
   1298 
   1299 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1300 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1301 {
   1302   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1303                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1304                                                    (__v4df) __B,
   1305                                                    -(__v4df) __C),
   1306                     (__v4df) __A);
   1307 }
   1308 
   1309 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1310 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
   1311 {
   1312   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1313                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1314                                                    (__v4df) __B,
   1315                                                    -(__v4df) __C),
   1316                     (__v4df)_mm256_setzero_pd());
   1317 }
   1318 
   1319 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1320 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1321 {
   1322   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1323                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1324                                                 (__v4sf) __B,
   1325                                                 (__v4sf) __C),
   1326                     (__v4sf) __A);
   1327 }
   1328 
   1329 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1330 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1331 {
   1332   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1333                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1334                                                 (__v4sf) __B,
   1335                                                 (__v4sf) __C),
   1336                     (__v4sf) __C);
   1337 }
   1338 
   1339 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1340 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1341 {
   1342   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1343                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1344                                                 (__v4sf) __B,
   1345                                                 (__v4sf) __C),
   1346                     (__v4sf)_mm_setzero_ps());
   1347 }
   1348 
   1349 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1350 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1351 {
   1352   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1353                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1354                                                 (__v4sf) __B,
   1355                                                 -(__v4sf) __C),
   1356                     (__v4sf) __A);
   1357 }
   1358 
   1359 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1360 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
   1361 {
   1362   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1363                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1364                                                 (__v4sf) __B,
   1365                                                 -(__v4sf) __C),
   1366                     (__v4sf)_mm_setzero_ps());
   1367 }
   1368 
   1369 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1370 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
   1371                          __m256 __C)
   1372 {
   1373   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1374                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1375                                                    (__v8sf) __B,
   1376                                                    (__v8sf) __C),
   1377                     (__v8sf) __A);
   1378 }
   1379 
   1380 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1381 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1382 {
   1383   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1384                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1385                                                    (__v8sf) __B,
   1386                                                    (__v8sf) __C),
   1387                     (__v8sf) __C);
   1388 }
   1389 
   1390 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1391 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1392 {
   1393   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1394                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1395                                                    (__v8sf) __B,
   1396                                                    (__v8sf) __C),
   1397                     (__v8sf)_mm256_setzero_ps());
   1398 }
   1399 
   1400 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1401 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1402 {
   1403   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1404                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1405                                                    (__v8sf) __B,
   1406                                                    -(__v8sf) __C),
   1407                     (__v8sf) __A);
   1408 }
   1409 
   1410 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1411 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
   1412 {
   1413   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1414                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1415                                                    (__v8sf) __B,
   1416                                                    -(__v8sf) __C),
   1417                     (__v8sf)_mm256_setzero_ps());
   1418 }
   1419 
   1420 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1421 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1422 {
   1423   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1424                     __builtin_ia32_vfmaddpd ((__v2df) __A,
   1425                                              (__v2df) __B,
   1426                                              -(__v2df) __C),
   1427                     (__v2df) __C);
   1428 }
   1429 
   1430 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1431 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1432 {
   1433   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1434                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1435                                                 (__v4df) __B,
   1436                                                 -(__v4df) __C),
   1437                     (__v4df) __C);
   1438 }
   1439 
   1440 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1441 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1442 {
   1443   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1444                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1445                                              (__v4sf) __B,
   1446                                              -(__v4sf) __C),
   1447                     (__v4sf) __C);
   1448 }
   1449 
   1450 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1451 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1452 {
   1453   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1454                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1455                                                 (__v8sf) __B,
   1456                                                 -(__v8sf) __C),
   1457                     (__v8sf) __C);
   1458 }
   1459 
   1460 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1461 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1462 {
   1463   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1464                     __builtin_ia32_vfmaddsubpd ((__v2df) __A,
   1465                                                 (__v2df) __B,
   1466                                                 -(__v2df) __C),
   1467                     (__v2df) __C);
   1468 }
   1469 
   1470 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1471 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1472 {
   1473   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1474                     __builtin_ia32_vfmaddsubpd256 ((__v4df) __A,
   1475                                                    (__v4df) __B,
   1476                                                    -(__v4df) __C),
   1477                     (__v4df) __C);
   1478 }
   1479 
   1480 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1481 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1482 {
   1483   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1484                     __builtin_ia32_vfmaddsubps ((__v4sf) __A,
   1485                                                 (__v4sf) __B,
   1486                                                 -(__v4sf) __C),
   1487                     (__v4sf) __C);
   1488 }
   1489 
   1490 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1491 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1492 {
   1493   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1494                     __builtin_ia32_vfmaddsubps256 ((__v8sf) __A,
   1495                                                    (__v8sf) __B,
   1496                                                    -(__v8sf) __C),
   1497                     (__v8sf) __C);
   1498 }
   1499 
   1500 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1501 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1502 {
   1503   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1504                     __builtin_ia32_vfmaddpd ((__v2df) __A,
   1505                                              -(__v2df) __B,
   1506                                              (__v2df) __C),
   1507                     (__v2df) __A);
   1508 }
   1509 
   1510 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1511 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1512 {
   1513   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1514                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1515                                                 -(__v4df) __B,
   1516                                                 (__v4df) __C),
   1517                     (__v4df) __A);
   1518 }
   1519 
   1520 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1521 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1522 {
   1523   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1524                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1525                                              -(__v4sf) __B,
   1526                                              (__v4sf) __C),
   1527                     (__v4sf) __A);
   1528 }
   1529 
   1530 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1531 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1532 {
   1533   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1534                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1535                                                 -(__v8sf) __B,
   1536                                                 (__v8sf) __C),
   1537                     (__v8sf) __A);
   1538 }
   1539 
   1540 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1541 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
   1542 {
   1543   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1544                     __builtin_ia32_vfmaddpd ((__v2df) __A,
   1545                                              -(__v2df) __B,
   1546                                              -(__v2df) __C),
   1547                     (__v2df) __A);
   1548 }
   1549 
   1550 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1551 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
   1552 {
   1553   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
   1554                     __builtin_ia32_vfmaddpd ((__v2df) __A,
   1555                                              -(__v2df) __B,
   1556                                              -(__v2df) __C),
   1557                     (__v2df) __C);
   1558 }
   1559 
   1560 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1561 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
   1562 {
   1563   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1564                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1565                                                 -(__v4df) __B,
   1566                                                 -(__v4df) __C),
   1567                     (__v4df) __A);
   1568 }
   1569 
   1570 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1571 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
   1572 {
   1573   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
   1574                     __builtin_ia32_vfmaddpd256 ((__v4df) __A,
   1575                                                 -(__v4df) __B,
   1576                                                 -(__v4df) __C),
   1577                     (__v4df) __C);
   1578 }
   1579 
   1580 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1581 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
   1582 {
   1583   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1584                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1585                                              -(__v4sf) __B,
   1586                                              -(__v4sf) __C),
   1587                     (__v4sf) __A);
   1588 }
   1589 
   1590 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1591 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
   1592 {
   1593   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
   1594                     __builtin_ia32_vfmaddps ((__v4sf) __A,
   1595                                              -(__v4sf) __B,
   1596                                              -(__v4sf) __C),
   1597                     (__v4sf) __C);
   1598 }
   1599 
   1600 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1601 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
   1602 {
   1603   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1604                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1605                                                 -(__v8sf) __B,
   1606                                                 -(__v8sf) __C),
   1607                     (__v8sf) __A);
   1608 }
   1609 
   1610 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1611 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
   1612 {
   1613   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
   1614                     __builtin_ia32_vfmaddps256 ((__v8sf) __A,
   1615                                                 -(__v8sf) __B,
   1616                                                 -(__v8sf) __C),
   1617                     (__v8sf) __C);
   1618 }
   1619 
   1620 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1621 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   1622   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   1623                                               (__v2df)_mm_add_pd(__A, __B),
   1624                                               (__v2df)__W);
   1625 }
   1626 
   1627 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1628 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   1629   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   1630                                               (__v2df)_mm_add_pd(__A, __B),
   1631                                               (__v2df)_mm_setzero_pd());
   1632 }
   1633 
   1634 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1635 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   1636   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   1637                                               (__v4df)_mm256_add_pd(__A, __B),
   1638                                               (__v4df)__W);
   1639 }
   1640 
   1641 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1642 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   1643   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   1644                                               (__v4df)_mm256_add_pd(__A, __B),
   1645                                               (__v4df)_mm256_setzero_pd());
   1646 }
   1647 
   1648 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1649 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   1650   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1651                                              (__v4sf)_mm_add_ps(__A, __B),
   1652                                              (__v4sf)__W);
   1653 }
   1654 
   1655 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1656 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   1657   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1658                                              (__v4sf)_mm_add_ps(__A, __B),
   1659                                              (__v4sf)_mm_setzero_ps());
   1660 }
   1661 
   1662 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1663 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   1664   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1665                                              (__v8sf)_mm256_add_ps(__A, __B),
   1666                                              (__v8sf)__W);
   1667 }
   1668 
   1669 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1670 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   1671   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1672                                              (__v8sf)_mm256_add_ps(__A, __B),
   1673                                              (__v8sf)_mm256_setzero_ps());
   1674 }
   1675 
   1676 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1677 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
   1678   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   1679                 (__v4si) __W,
   1680                 (__v4si) __A);
   1681 }
   1682 
   1683 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1684 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
   1685   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   1686                 (__v8si) __W,
   1687                 (__v8si) __A);
   1688 }
   1689 
   1690 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1691 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
   1692   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   1693                  (__v2df) __W,
   1694                  (__v2df) __A);
   1695 }
   1696 
   1697 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1698 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
   1699   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   1700                  (__v4df) __W,
   1701                  (__v4df) __A);
   1702 }
   1703 
   1704 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1705 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
   1706   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   1707                 (__v4sf) __W,
   1708                 (__v4sf) __A);
   1709 }
   1710 
   1711 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1712 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
   1713   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   1714                 (__v8sf) __W,
   1715                 (__v8sf) __A);
   1716 }
   1717 
   1718 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1719 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
   1720   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   1721                 (__v2di) __W,
   1722                 (__v2di) __A);
   1723 }
   1724 
   1725 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1726 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
   1727   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   1728                 (__v4di) __W,
   1729                 (__v4di) __A);
   1730 }
   1731 
   1732 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1733 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   1734   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
   1735                   (__v2df) __W,
   1736                   (__mmask8) __U);
   1737 }
   1738 
   1739 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1740 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
   1741   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
   1742                   (__v2df)
   1743                   _mm_setzero_pd (),
   1744                   (__mmask8) __U);
   1745 }
   1746 
   1747 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1748 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   1749   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
   1750                   (__v4df) __W,
   1751                   (__mmask8) __U);
   1752 }
   1753 
   1754 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1755 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
   1756   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
   1757                   (__v4df)
   1758                   _mm256_setzero_pd (),
   1759                   (__mmask8) __U);
   1760 }
   1761 
   1762 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1763 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   1764   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
   1765                   (__v2di) __W,
   1766                   (__mmask8) __U);
   1767 }
   1768 
   1769 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1770 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
   1771   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
   1772                   (__v2di)
   1773                   _mm_setzero_si128 (),
   1774                   (__mmask8) __U);
   1775 }
   1776 
   1777 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1778 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   1779   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
   1780                   (__v4di) __W,
   1781                   (__mmask8) __U);
   1782 }
   1783 
   1784 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1785 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
   1786   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
   1787                   (__v4di)
   1788                   _mm256_setzero_si256 (),
   1789                   (__mmask8) __U);
   1790 }
   1791 
   1792 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1793 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   1794   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
   1795                  (__v4sf) __W,
   1796                  (__mmask8) __U);
   1797 }
   1798 
   1799 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1800 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
   1801   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
   1802                  (__v4sf)
   1803                  _mm_setzero_ps (),
   1804                  (__mmask8) __U);
   1805 }
   1806 
   1807 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1808 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   1809   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
   1810                  (__v8sf) __W,
   1811                  (__mmask8) __U);
   1812 }
   1813 
   1814 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1815 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
   1816   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
   1817                  (__v8sf)
   1818                  _mm256_setzero_ps (),
   1819                  (__mmask8) __U);
   1820 }
   1821 
   1822 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1823 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   1824   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
   1825                   (__v4si) __W,
   1826                   (__mmask8) __U);
   1827 }
   1828 
   1829 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1830 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
   1831   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
   1832                   (__v4si)
   1833                   _mm_setzero_si128 (),
   1834                   (__mmask8) __U);
   1835 }
   1836 
   1837 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1838 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   1839   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
   1840                   (__v8si) __W,
   1841                   (__mmask8) __U);
   1842 }
   1843 
   1844 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1845 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
   1846   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
   1847                   (__v8si)
   1848                   _mm256_setzero_si256 (),
   1849                   (__mmask8) __U);
   1850 }
   1851 
   1852 static __inline__ void __DEFAULT_FN_ATTRS128
   1853 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
   1854   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
   1855             (__v2df) __A,
   1856             (__mmask8) __U);
   1857 }
   1858 
   1859 static __inline__ void __DEFAULT_FN_ATTRS256
   1860 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
   1861   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
   1862             (__v4df) __A,
   1863             (__mmask8) __U);
   1864 }
   1865 
   1866 static __inline__ void __DEFAULT_FN_ATTRS128
   1867 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
   1868   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
   1869             (__v2di) __A,
   1870             (__mmask8) __U);
   1871 }
   1872 
   1873 static __inline__ void __DEFAULT_FN_ATTRS256
   1874 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
   1875   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
   1876             (__v4di) __A,
   1877             (__mmask8) __U);
   1878 }
   1879 
   1880 static __inline__ void __DEFAULT_FN_ATTRS128
   1881 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
   1882   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
   1883             (__v4sf) __A,
   1884             (__mmask8) __U);
   1885 }
   1886 
   1887 static __inline__ void __DEFAULT_FN_ATTRS256
   1888 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
   1889   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
   1890             (__v8sf) __A,
   1891             (__mmask8) __U);
   1892 }
   1893 
   1894 static __inline__ void __DEFAULT_FN_ATTRS128
   1895 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
   1896   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
   1897             (__v4si) __A,
   1898             (__mmask8) __U);
   1899 }
   1900 
   1901 static __inline__ void __DEFAULT_FN_ATTRS256
   1902 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
   1903   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
   1904             (__v8si) __A,
   1905             (__mmask8) __U);
   1906 }
   1907 
   1908 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1909 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   1910   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   1911                                               (__v2df)_mm_cvtepi32_pd(__A),
   1912                                               (__v2df)__W);
   1913 }
   1914 
   1915 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   1916 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   1917   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   1918                                               (__v2df)_mm_cvtepi32_pd(__A),
   1919                                               (__v2df)_mm_setzero_pd());
   1920 }
   1921 
   1922 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1923 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   1924   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   1925                                               (__v4df)_mm256_cvtepi32_pd(__A),
   1926                                               (__v4df)__W);
   1927 }
   1928 
   1929 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   1930 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   1931   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   1932                                               (__v4df)_mm256_cvtepi32_pd(__A),
   1933                                               (__v4df)_mm256_setzero_pd());
   1934 }
   1935 
   1936 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1937 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   1938   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1939                                              (__v4sf)_mm_cvtepi32_ps(__A),
   1940                                              (__v4sf)__W);
   1941 }
   1942 
   1943 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1944 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
   1945   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   1946                                              (__v4sf)_mm_cvtepi32_ps(__A),
   1947                                              (__v4sf)_mm_setzero_ps());
   1948 }
   1949 
   1950 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1951 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   1952   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1953                                              (__v8sf)_mm256_cvtepi32_ps(__A),
   1954                                              (__v8sf)__W);
   1955 }
   1956 
   1957 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   1958 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
   1959   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   1960                                              (__v8sf)_mm256_cvtepi32_ps(__A),
   1961                                              (__v8sf)_mm256_setzero_ps());
   1962 }
   1963 
   1964 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1965 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   1966   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
   1967                 (__v4si) __W,
   1968                 (__mmask8) __U);
   1969 }
   1970 
   1971 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1972 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
   1973   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
   1974                 (__v4si)
   1975                 _mm_setzero_si128 (),
   1976                 (__mmask8) __U);
   1977 }
   1978 
   1979 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   1980 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   1981   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   1982                                              (__v4si)_mm256_cvtpd_epi32(__A),
   1983                                              (__v4si)__W);
   1984 }
   1985 
   1986 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   1987 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
   1988   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   1989                                              (__v4si)_mm256_cvtpd_epi32(__A),
   1990                                              (__v4si)_mm_setzero_si128());
   1991 }
   1992 
   1993 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   1994 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
   1995   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
   1996             (__v4sf) __W,
   1997             (__mmask8) __U);
   1998 }
   1999 
   2000 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2001 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
   2002   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
   2003             (__v4sf)
   2004             _mm_setzero_ps (),
   2005             (__mmask8) __U);
   2006 }
   2007 
   2008 static __inline__ __m128 __DEFAULT_FN_ATTRS256
   2009 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
   2010   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2011                                              (__v4sf)_mm256_cvtpd_ps(__A),
   2012                                              (__v4sf)__W);
   2013 }
   2014 
   2015 static __inline__ __m128 __DEFAULT_FN_ATTRS256
   2016 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
   2017   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2018                                              (__v4sf)_mm256_cvtpd_ps(__A),
   2019                                              (__v4sf)_mm_setzero_ps());
   2020 }
   2021 
   2022 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2023 _mm_cvtpd_epu32 (__m128d __A) {
   2024   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2025                  (__v4si)
   2026                  _mm_setzero_si128 (),
   2027                  (__mmask8) -1);
   2028 }
   2029 
   2030 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2031 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2032   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2033                  (__v4si) __W,
   2034                  (__mmask8) __U);
   2035 }
   2036 
   2037 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2038 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
   2039   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
   2040                  (__v4si)
   2041                  _mm_setzero_si128 (),
   2042                  (__mmask8) __U);
   2043 }
   2044 
   2045 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2046 _mm256_cvtpd_epu32 (__m256d __A) {
   2047   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2048                  (__v4si)
   2049                  _mm_setzero_si128 (),
   2050                  (__mmask8) -1);
   2051 }
   2052 
   2053 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2054 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2055   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2056                  (__v4si) __W,
   2057                  (__mmask8) __U);
   2058 }
   2059 
   2060 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2061 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
   2062   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
   2063                  (__v4si)
   2064                  _mm_setzero_si128 (),
   2065                  (__mmask8) __U);
   2066 }
   2067 
   2068 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2069 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2070   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2071                                              (__v4si)_mm_cvtps_epi32(__A),
   2072                                              (__v4si)__W);
   2073 }
   2074 
   2075 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2076 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
   2077   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2078                                              (__v4si)_mm_cvtps_epi32(__A),
   2079                                              (__v4si)_mm_setzero_si128());
   2080 }
   2081 
   2082 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2083 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2084   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2085                                              (__v8si)_mm256_cvtps_epi32(__A),
   2086                                              (__v8si)__W);
   2087 }
   2088 
   2089 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2090 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
   2091   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2092                                              (__v8si)_mm256_cvtps_epi32(__A),
   2093                                              (__v8si)_mm256_setzero_si256());
   2094 }
   2095 
   2096 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2097 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
   2098   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2099                                               (__v2df)_mm_cvtps_pd(__A),
   2100                                               (__v2df)__W);
   2101 }
   2102 
   2103 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2104 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   2105   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2106                                               (__v2df)_mm_cvtps_pd(__A),
   2107                                               (__v2df)_mm_setzero_pd());
   2108 }
   2109 
   2110 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2111 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
   2112   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2113                                               (__v4df)_mm256_cvtps_pd(__A),
   2114                                               (__v4df)__W);
   2115 }
   2116 
   2117 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2118 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   2119   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2120                                               (__v4df)_mm256_cvtps_pd(__A),
   2121                                               (__v4df)_mm256_setzero_pd());
   2122 }
   2123 
   2124 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2125 _mm_cvtps_epu32 (__m128 __A) {
   2126   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2127                  (__v4si)
   2128                  _mm_setzero_si128 (),
   2129                  (__mmask8) -1);
   2130 }
   2131 
   2132 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2133 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2134   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2135                  (__v4si) __W,
   2136                  (__mmask8) __U);
   2137 }
   2138 
   2139 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2140 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
   2141   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
   2142                  (__v4si)
   2143                  _mm_setzero_si128 (),
   2144                  (__mmask8) __U);
   2145 }
   2146 
   2147 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2148 _mm256_cvtps_epu32 (__m256 __A) {
   2149   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2150                  (__v8si)
   2151                  _mm256_setzero_si256 (),
   2152                  (__mmask8) -1);
   2153 }
   2154 
   2155 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2156 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2157   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2158                  (__v8si) __W,
   2159                  (__mmask8) __U);
   2160 }
   2161 
   2162 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2163 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
   2164   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
   2165                  (__v8si)
   2166                  _mm256_setzero_si256 (),
   2167                  (__mmask8) __U);
   2168 }
   2169 
   2170 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2171 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2172   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
   2173                  (__v4si) __W,
   2174                  (__mmask8) __U);
   2175 }
   2176 
   2177 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2178 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
   2179   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
   2180                  (__v4si)
   2181                  _mm_setzero_si128 (),
   2182                  (__mmask8) __U);
   2183 }
   2184 
   2185 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2186 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2187   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2188                                              (__v4si)_mm256_cvttpd_epi32(__A),
   2189                                              (__v4si)__W);
   2190 }
   2191 
   2192 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2193 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
   2194   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2195                                              (__v4si)_mm256_cvttpd_epi32(__A),
   2196                                              (__v4si)_mm_setzero_si128());
   2197 }
   2198 
   2199 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2200 _mm_cvttpd_epu32 (__m128d __A) {
   2201   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2202                   (__v4si)
   2203                   _mm_setzero_si128 (),
   2204                   (__mmask8) -1);
   2205 }
   2206 
   2207 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2208 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   2209   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2210                   (__v4si) __W,
   2211                   (__mmask8) __U);
   2212 }
   2213 
   2214 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2215 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
   2216   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
   2217                   (__v4si)
   2218                   _mm_setzero_si128 (),
   2219                   (__mmask8) __U);
   2220 }
   2221 
   2222 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2223 _mm256_cvttpd_epu32 (__m256d __A) {
   2224   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2225                   (__v4si)
   2226                   _mm_setzero_si128 (),
   2227                   (__mmask8) -1);
   2228 }
   2229 
   2230 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2231 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   2232   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2233                   (__v4si) __W,
   2234                   (__mmask8) __U);
   2235 }
   2236 
   2237 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   2238 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
   2239   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
   2240                   (__v4si)
   2241                   _mm_setzero_si128 (),
   2242                   (__mmask8) __U);
   2243 }
   2244 
   2245 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2246 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2247   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2248                                              (__v4si)_mm_cvttps_epi32(__A),
   2249                                              (__v4si)__W);
   2250 }
   2251 
   2252 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2253 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
   2254   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2255                                              (__v4si)_mm_cvttps_epi32(__A),
   2256                                              (__v4si)_mm_setzero_si128());
   2257 }
   2258 
   2259 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2260 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2261   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2262                                              (__v8si)_mm256_cvttps_epi32(__A),
   2263                                              (__v8si)__W);
   2264 }
   2265 
   2266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2267 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
   2268   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2269                                              (__v8si)_mm256_cvttps_epi32(__A),
   2270                                              (__v8si)_mm256_setzero_si256());
   2271 }
   2272 
   2273 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2274 _mm_cvttps_epu32 (__m128 __A) {
   2275   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2276                   (__v4si)
   2277                   _mm_setzero_si128 (),
   2278                   (__mmask8) -1);
   2279 }
   2280 
   2281 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2282 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   2283   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2284                   (__v4si) __W,
   2285                   (__mmask8) __U);
   2286 }
   2287 
   2288 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2289 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
   2290   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
   2291                   (__v4si)
   2292                   _mm_setzero_si128 (),
   2293                   (__mmask8) __U);
   2294 }
   2295 
   2296 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2297 _mm256_cvttps_epu32 (__m256 __A) {
   2298   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2299                   (__v8si)
   2300                   _mm256_setzero_si256 (),
   2301                   (__mmask8) -1);
   2302 }
   2303 
   2304 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2305 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   2306   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2307                   (__v8si) __W,
   2308                   (__mmask8) __U);
   2309 }
   2310 
   2311 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2312 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
   2313   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
   2314                   (__v8si)
   2315                   _mm256_setzero_si256 (),
   2316                   (__mmask8) __U);
   2317 }
   2318 
   2319 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2320 _mm_cvtepu32_pd (__m128i __A) {
   2321   return (__m128d) __builtin_convertvector(
   2322       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
   2323 }
   2324 
   2325 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2326 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   2327   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2328                                               (__v2df)_mm_cvtepu32_pd(__A),
   2329                                               (__v2df)__W);
   2330 }
   2331 
   2332 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2333 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   2334   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
   2335                                               (__v2df)_mm_cvtepu32_pd(__A),
   2336                                               (__v2df)_mm_setzero_pd());
   2337 }
   2338 
   2339 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2340 _mm256_cvtepu32_pd (__m128i __A) {
   2341   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
   2342 }
   2343 
   2344 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2345 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   2346   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2347                                               (__v4df)_mm256_cvtepu32_pd(__A),
   2348                                               (__v4df)__W);
   2349 }
   2350 
   2351 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2352 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   2353   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
   2354                                               (__v4df)_mm256_cvtepu32_pd(__A),
   2355                                               (__v4df)_mm256_setzero_pd());
   2356 }
   2357 
   2358 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2359 _mm_cvtepu32_ps (__m128i __A) {
   2360   return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
   2361 }
   2362 
   2363 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2364 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   2365   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2366                                              (__v4sf)_mm_cvtepu32_ps(__A),
   2367                                              (__v4sf)__W);
   2368 }
   2369 
   2370 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2371 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
   2372   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2373                                              (__v4sf)_mm_cvtepu32_ps(__A),
   2374                                              (__v4sf)_mm_setzero_ps());
   2375 }
   2376 
   2377 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2378 _mm256_cvtepu32_ps (__m256i __A) {
   2379   return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
   2380 }
   2381 
   2382 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2383 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   2384   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2385                                              (__v8sf)_mm256_cvtepu32_ps(__A),
   2386                                              (__v8sf)__W);
   2387 }
   2388 
   2389 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2390 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
   2391   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2392                                              (__v8sf)_mm256_cvtepu32_ps(__A),
   2393                                              (__v8sf)_mm256_setzero_ps());
   2394 }
   2395 
   2396 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2397 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   2398   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2399                                               (__v2df)_mm_div_pd(__A, __B),
   2400                                               (__v2df)__W);
   2401 }
   2402 
   2403 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2404 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   2405   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2406                                               (__v2df)_mm_div_pd(__A, __B),
   2407                                               (__v2df)_mm_setzero_pd());
   2408 }
   2409 
   2410 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2411 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   2412   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2413                                               (__v4df)_mm256_div_pd(__A, __B),
   2414                                               (__v4df)__W);
   2415 }
   2416 
   2417 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2418 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   2419   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2420                                               (__v4df)_mm256_div_pd(__A, __B),
   2421                                               (__v4df)_mm256_setzero_pd());
   2422 }
   2423 
   2424 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2425 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   2426   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2427                                              (__v4sf)_mm_div_ps(__A, __B),
   2428                                              (__v4sf)__W);
   2429 }
   2430 
   2431 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2432 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   2433   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2434                                              (__v4sf)_mm_div_ps(__A, __B),
   2435                                              (__v4sf)_mm_setzero_ps());
   2436 }
   2437 
   2438 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2439 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   2440   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2441                                              (__v8sf)_mm256_div_ps(__A, __B),
   2442                                              (__v8sf)__W);
   2443 }
   2444 
   2445 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2446 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   2447   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2448                                              (__v8sf)_mm256_div_ps(__A, __B),
   2449                                              (__v8sf)_mm256_setzero_ps());
   2450 }
   2451 
   2452 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2453 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   2454   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
   2455                 (__v2df) __W,
   2456                 (__mmask8) __U);
   2457 }
   2458 
   2459 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2460 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
   2461   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
   2462                  (__v2df)
   2463                  _mm_setzero_pd (),
   2464                  (__mmask8) __U);
   2465 }
   2466 
   2467 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2468 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   2469   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
   2470                 (__v4df) __W,
   2471                 (__mmask8) __U);
   2472 }
   2473 
   2474 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2475 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
   2476   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
   2477                  (__v4df)
   2478                  _mm256_setzero_pd (),
   2479                  (__mmask8) __U);
   2480 }
   2481 
   2482 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2483 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   2484   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
   2485                 (__v2di) __W,
   2486                 (__mmask8) __U);
   2487 }
   2488 
   2489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2490 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
   2491   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
   2492                  (__v2di)
   2493                  _mm_setzero_si128 (),
   2494                  (__mmask8) __U);
   2495 }
   2496 
   2497 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2498 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   2499   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
   2500                 (__v4di) __W,
   2501                 (__mmask8) __U);
   2502 }
   2503 
   2504 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2505 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
   2506   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
   2507                  (__v4di)
   2508                  _mm256_setzero_si256 (),
   2509                  (__mmask8) __U);
   2510 }
   2511 
   2512 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2513 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
   2514   return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
   2515               (__v2df) __W,
   2516               (__mmask8)
   2517               __U);
   2518 }
   2519 
   2520 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2521 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   2522   return (__m128d) __builtin_ia32_expandloaddf128_mask ((const __v2df *) __P,
   2523                (__v2df)
   2524                _mm_setzero_pd (),
   2525                (__mmask8)
   2526                __U);
   2527 }
   2528 
   2529 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2530 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
   2531   return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
   2532               (__v4df) __W,
   2533               (__mmask8)
   2534               __U);
   2535 }
   2536 
   2537 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2538 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   2539   return (__m256d) __builtin_ia32_expandloaddf256_mask ((const __v4df *) __P,
   2540                (__v4df)
   2541                _mm256_setzero_pd (),
   2542                (__mmask8)
   2543                __U);
   2544 }
   2545 
   2546 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2547 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
   2548   return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
   2549               (__v2di) __W,
   2550               (__mmask8)
   2551               __U);
   2552 }
   2553 
   2554 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2555 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   2556   return (__m128i) __builtin_ia32_expandloaddi128_mask ((const __v2di *) __P,
   2557                (__v2di)
   2558                _mm_setzero_si128 (),
   2559                (__mmask8)
   2560                __U);
   2561 }
   2562 
   2563 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2564 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
   2565              void const *__P) {
   2566   return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
   2567               (__v4di) __W,
   2568               (__mmask8)
   2569               __U);
   2570 }
   2571 
   2572 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2573 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   2574   return (__m256i) __builtin_ia32_expandloaddi256_mask ((const __v4di *) __P,
   2575                (__v4di)
   2576                _mm256_setzero_si256 (),
   2577                (__mmask8)
   2578                __U);
   2579 }
   2580 
   2581 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2582 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
   2583   return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
   2584                    (__v4sf) __W,
   2585                    (__mmask8) __U);
   2586 }
   2587 
   2588 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2589 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   2590   return (__m128) __builtin_ia32_expandloadsf128_mask ((const __v4sf *) __P,
   2591               (__v4sf)
   2592               _mm_setzero_ps (),
   2593               (__mmask8)
   2594               __U);
   2595 }
   2596 
   2597 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2598 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
   2599   return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
   2600                    (__v8sf) __W,
   2601                    (__mmask8) __U);
   2602 }
   2603 
   2604 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2605 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   2606   return (__m256) __builtin_ia32_expandloadsf256_mask ((const __v8sf *) __P,
   2607               (__v8sf)
   2608               _mm256_setzero_ps (),
   2609               (__mmask8)
   2610               __U);
   2611 }
   2612 
   2613 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2614 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
   2615   return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
   2616               (__v4si) __W,
   2617               (__mmask8)
   2618               __U);
   2619 }
   2620 
   2621 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2622 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   2623   return (__m128i) __builtin_ia32_expandloadsi128_mask ((const __v4si *) __P,
   2624                (__v4si)
   2625                _mm_setzero_si128 (),
   2626                (__mmask8)     __U);
   2627 }
   2628 
   2629 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2630 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
   2631              void const *__P) {
   2632   return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
   2633               (__v8si) __W,
   2634               (__mmask8)
   2635               __U);
   2636 }
   2637 
   2638 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2639 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   2640   return (__m256i) __builtin_ia32_expandloadsi256_mask ((const __v8si *) __P,
   2641                (__v8si)
   2642                _mm256_setzero_si256 (),
   2643                (__mmask8)
   2644                __U);
   2645 }
   2646 
   2647 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2648 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   2649   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
   2650                (__v4sf) __W,
   2651                (__mmask8) __U);
   2652 }
   2653 
   2654 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2655 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
   2656   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
   2657                 (__v4sf)
   2658                 _mm_setzero_ps (),
   2659                 (__mmask8) __U);
   2660 }
   2661 
   2662 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2663 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   2664   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
   2665                (__v8sf) __W,
   2666                (__mmask8) __U);
   2667 }
   2668 
   2669 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2670 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
   2671   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
   2672                 (__v8sf)
   2673                 _mm256_setzero_ps (),
   2674                 (__mmask8) __U);
   2675 }
   2676 
   2677 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2678 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   2679   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
   2680                 (__v4si) __W,
   2681                 (__mmask8) __U);
   2682 }
   2683 
   2684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2685 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
   2686   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
   2687                  (__v4si)
   2688                  _mm_setzero_si128 (),
   2689                  (__mmask8) __U);
   2690 }
   2691 
   2692 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2693 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   2694   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
   2695                 (__v8si) __W,
   2696                 (__mmask8) __U);
   2697 }
   2698 
   2699 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2700 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
   2701   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
   2702                  (__v8si)
   2703                  _mm256_setzero_si256 (),
   2704                  (__mmask8) __U);
   2705 }
   2706 
   2707 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2708 _mm_getexp_pd (__m128d __A) {
   2709   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2710                 (__v2df)
   2711                 _mm_setzero_pd (),
   2712                 (__mmask8) -1);
   2713 }
   2714 
   2715 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2716 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   2717   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2718                 (__v2df) __W,
   2719                 (__mmask8) __U);
   2720 }
   2721 
   2722 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2723 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
   2724   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
   2725                 (__v2df)
   2726                 _mm_setzero_pd (),
   2727                 (__mmask8) __U);
   2728 }
   2729 
   2730 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2731 _mm256_getexp_pd (__m256d __A) {
   2732   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2733                 (__v4df)
   2734                 _mm256_setzero_pd (),
   2735                 (__mmask8) -1);
   2736 }
   2737 
   2738 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2739 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   2740   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2741                 (__v4df) __W,
   2742                 (__mmask8) __U);
   2743 }
   2744 
   2745 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2746 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
   2747   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
   2748                 (__v4df)
   2749                 _mm256_setzero_pd (),
   2750                 (__mmask8) __U);
   2751 }
   2752 
   2753 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2754 _mm_getexp_ps (__m128 __A) {
   2755   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   2756                (__v4sf)
   2757                _mm_setzero_ps (),
   2758                (__mmask8) -1);
   2759 }
   2760 
   2761 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2762 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   2763   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   2764                (__v4sf) __W,
   2765                (__mmask8) __U);
   2766 }
   2767 
   2768 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2769 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
   2770   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
   2771                (__v4sf)
   2772                _mm_setzero_ps (),
   2773                (__mmask8) __U);
   2774 }
   2775 
   2776 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2777 _mm256_getexp_ps (__m256 __A) {
   2778   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   2779                (__v8sf)
   2780                _mm256_setzero_ps (),
   2781                (__mmask8) -1);
   2782 }
   2783 
   2784 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2785 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   2786   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   2787                (__v8sf) __W,
   2788                (__mmask8) __U);
   2789 }
   2790 
   2791 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2792 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
   2793   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
   2794                (__v8sf)
   2795                _mm256_setzero_ps (),
   2796                (__mmask8) __U);
   2797 }
   2798 
   2799 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2800 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   2801   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2802                                               (__v2df)_mm_max_pd(__A, __B),
   2803                                               (__v2df)__W);
   2804 }
   2805 
   2806 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2807 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   2808   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2809                                               (__v2df)_mm_max_pd(__A, __B),
   2810                                               (__v2df)_mm_setzero_pd());
   2811 }
   2812 
   2813 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2814 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   2815   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2816                                               (__v4df)_mm256_max_pd(__A, __B),
   2817                                               (__v4df)__W);
   2818 }
   2819 
   2820 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2821 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   2822   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2823                                               (__v4df)_mm256_max_pd(__A, __B),
   2824                                               (__v4df)_mm256_setzero_pd());
   2825 }
   2826 
   2827 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2828 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   2829   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2830                                              (__v4sf)_mm_max_ps(__A, __B),
   2831                                              (__v4sf)__W);
   2832 }
   2833 
   2834 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2835 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   2836   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2837                                              (__v4sf)_mm_max_ps(__A, __B),
   2838                                              (__v4sf)_mm_setzero_ps());
   2839 }
   2840 
   2841 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2842 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   2843   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2844                                              (__v8sf)_mm256_max_ps(__A, __B),
   2845                                              (__v8sf)__W);
   2846 }
   2847 
   2848 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2849 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   2850   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2851                                              (__v8sf)_mm256_max_ps(__A, __B),
   2852                                              (__v8sf)_mm256_setzero_ps());
   2853 }
   2854 
   2855 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2856 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   2857   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2858                                               (__v2df)_mm_min_pd(__A, __B),
   2859                                               (__v2df)__W);
   2860 }
   2861 
   2862 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2863 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   2864   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2865                                               (__v2df)_mm_min_pd(__A, __B),
   2866                                               (__v2df)_mm_setzero_pd());
   2867 }
   2868 
   2869 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2870 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   2871   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2872                                               (__v4df)_mm256_min_pd(__A, __B),
   2873                                               (__v4df)__W);
   2874 }
   2875 
   2876 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2877 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   2878   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2879                                               (__v4df)_mm256_min_pd(__A, __B),
   2880                                               (__v4df)_mm256_setzero_pd());
   2881 }
   2882 
   2883 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2884 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   2885   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2886                                              (__v4sf)_mm_min_ps(__A, __B),
   2887                                              (__v4sf)__W);
   2888 }
   2889 
   2890 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2891 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   2892   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2893                                              (__v4sf)_mm_min_ps(__A, __B),
   2894                                              (__v4sf)_mm_setzero_ps());
   2895 }
   2896 
   2897 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2898 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   2899   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2900                                              (__v8sf)_mm256_min_ps(__A, __B),
   2901                                              (__v8sf)__W);
   2902 }
   2903 
   2904 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2905 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   2906   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2907                                              (__v8sf)_mm256_min_ps(__A, __B),
   2908                                              (__v8sf)_mm256_setzero_ps());
   2909 }
   2910 
   2911 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2912 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   2913   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2914                                               (__v2df)_mm_mul_pd(__A, __B),
   2915                                               (__v2df)__W);
   2916 }
   2917 
   2918 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   2919 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   2920   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   2921                                               (__v2df)_mm_mul_pd(__A, __B),
   2922                                               (__v2df)_mm_setzero_pd());
   2923 }
   2924 
   2925 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2926 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   2927   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2928                                               (__v4df)_mm256_mul_pd(__A, __B),
   2929                                               (__v4df)__W);
   2930 }
   2931 
   2932 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   2933 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   2934   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   2935                                               (__v4df)_mm256_mul_pd(__A, __B),
   2936                                               (__v4df)_mm256_setzero_pd());
   2937 }
   2938 
   2939 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2940 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   2941   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2942                                              (__v4sf)_mm_mul_ps(__A, __B),
   2943                                              (__v4sf)__W);
   2944 }
   2945 
   2946 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   2947 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   2948   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   2949                                              (__v4sf)_mm_mul_ps(__A, __B),
   2950                                              (__v4sf)_mm_setzero_ps());
   2951 }
   2952 
   2953 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2954 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   2955   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2956                                              (__v8sf)_mm256_mul_ps(__A, __B),
   2957                                              (__v8sf)__W);
   2958 }
   2959 
   2960 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   2961 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   2962   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   2963                                              (__v8sf)_mm256_mul_ps(__A, __B),
   2964                                              (__v8sf)_mm256_setzero_ps());
   2965 }
   2966 
   2967 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2968 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   2969   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2970                                              (__v4si)_mm_abs_epi32(__A),
   2971                                              (__v4si)__W);
   2972 }
   2973 
   2974 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2975 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
   2976   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   2977                                              (__v4si)_mm_abs_epi32(__A),
   2978                                              (__v4si)_mm_setzero_si128());
   2979 }
   2980 
   2981 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2982 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   2983   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2984                                              (__v8si)_mm256_abs_epi32(__A),
   2985                                              (__v8si)__W);
   2986 }
   2987 
   2988 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   2989 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
   2990   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   2991                                              (__v8si)_mm256_abs_epi32(__A),
   2992                                              (__v8si)_mm256_setzero_si256());
   2993 }
   2994 
   2995 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   2996 _mm_abs_epi64 (__m128i __A) {
   2997   return (__m128i)__builtin_elementwise_abs((__v2di)__A);
   2998 }
   2999 
   3000 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3001 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   3002   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   3003                                              (__v2di)_mm_abs_epi64(__A),
   3004                                              (__v2di)__W);
   3005 }
   3006 
   3007 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3008 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
   3009   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   3010                                              (__v2di)_mm_abs_epi64(__A),
   3011                                              (__v2di)_mm_setzero_si128());
   3012 }
   3013 
   3014 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3015 _mm256_abs_epi64 (__m256i __A) {
   3016   return (__m256i)__builtin_elementwise_abs((__v4di)__A);
   3017 }
   3018 
   3019 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3020 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   3021   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   3022                                              (__v4di)_mm256_abs_epi64(__A),
   3023                                              (__v4di)__W);
   3024 }
   3025 
   3026 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3027 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
   3028   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   3029                                              (__v4di)_mm256_abs_epi64(__A),
   3030                                              (__v4di)_mm256_setzero_si256());
   3031 }
   3032 
   3033 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3034 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   3035   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3036                                              (__v4si)_mm_max_epi32(__A, __B),
   3037                                              (__v4si)_mm_setzero_si128());
   3038 }
   3039 
   3040 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3041 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3042   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3043                                              (__v4si)_mm_max_epi32(__A, __B),
   3044                                              (__v4si)__W);
   3045 }
   3046 
   3047 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3048 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   3049   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3050                                              (__v8si)_mm256_max_epi32(__A, __B),
   3051                                              (__v8si)_mm256_setzero_si256());
   3052 }
   3053 
   3054 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3055 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3056   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3057                                              (__v8si)_mm256_max_epi32(__A, __B),
   3058                                              (__v8si)__W);
   3059 }
   3060 
   3061 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3062 _mm_max_epi64 (__m128i __A, __m128i __B) {
   3063   return (__m128i)__builtin_elementwise_max((__v2di)__A, (__v2di)__B);
   3064 }
   3065 
   3066 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3067 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3068   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3069                                              (__v2di)_mm_max_epi64(__A, __B),
   3070                                              (__v2di)_mm_setzero_si128());
   3071 }
   3072 
   3073 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3074 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3075   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3076                                              (__v2di)_mm_max_epi64(__A, __B),
   3077                                              (__v2di)__W);
   3078 }
   3079 
   3080 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3081 _mm256_max_epi64 (__m256i __A, __m256i __B) {
   3082   return (__m256i)__builtin_elementwise_max((__v4di)__A, (__v4di)__B);
   3083 }
   3084 
   3085 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3086 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3087   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3088                                              (__v4di)_mm256_max_epi64(__A, __B),
   3089                                              (__v4di)_mm256_setzero_si256());
   3090 }
   3091 
   3092 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3093 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3094   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3095                                              (__v4di)_mm256_max_epi64(__A, __B),
   3096                                              (__v4di)__W);
   3097 }
   3098 
   3099 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3100 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   3101   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3102                                              (__v4si)_mm_max_epu32(__A, __B),
   3103                                              (__v4si)_mm_setzero_si128());
   3104 }
   3105 
   3106 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3107 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3108   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3109                                              (__v4si)_mm_max_epu32(__A, __B),
   3110                                              (__v4si)__W);
   3111 }
   3112 
   3113 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3114 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   3115   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3116                                              (__v8si)_mm256_max_epu32(__A, __B),
   3117                                              (__v8si)_mm256_setzero_si256());
   3118 }
   3119 
   3120 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3121 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3122   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3123                                              (__v8si)_mm256_max_epu32(__A, __B),
   3124                                              (__v8si)__W);
   3125 }
   3126 
   3127 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3128 _mm_max_epu64 (__m128i __A, __m128i __B) {
   3129   return (__m128i)__builtin_elementwise_max((__v2du)__A, (__v2du)__B);
   3130 }
   3131 
   3132 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3133 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3134   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3135                                              (__v2di)_mm_max_epu64(__A, __B),
   3136                                              (__v2di)_mm_setzero_si128());
   3137 }
   3138 
   3139 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3140 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3141   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3142                                              (__v2di)_mm_max_epu64(__A, __B),
   3143                                              (__v2di)__W);
   3144 }
   3145 
   3146 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3147 _mm256_max_epu64 (__m256i __A, __m256i __B) {
   3148   return (__m256i)__builtin_elementwise_max((__v4du)__A, (__v4du)__B);
   3149 }
   3150 
   3151 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3152 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3153   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3154                                              (__v4di)_mm256_max_epu64(__A, __B),
   3155                                              (__v4di)_mm256_setzero_si256());
   3156 }
   3157 
   3158 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3159 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3160   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3161                                              (__v4di)_mm256_max_epu64(__A, __B),
   3162                                              (__v4di)__W);
   3163 }
   3164 
   3165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3166 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   3167   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3168                                              (__v4si)_mm_min_epi32(__A, __B),
   3169                                              (__v4si)_mm_setzero_si128());
   3170 }
   3171 
   3172 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3173 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3174   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3175                                              (__v4si)_mm_min_epi32(__A, __B),
   3176                                              (__v4si)__W);
   3177 }
   3178 
   3179 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3180 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   3181   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3182                                              (__v8si)_mm256_min_epi32(__A, __B),
   3183                                              (__v8si)_mm256_setzero_si256());
   3184 }
   3185 
   3186 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3187 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3188   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3189                                              (__v8si)_mm256_min_epi32(__A, __B),
   3190                                              (__v8si)__W);
   3191 }
   3192 
   3193 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3194 _mm_min_epi64 (__m128i __A, __m128i __B) {
   3195   return (__m128i)__builtin_elementwise_min((__v2di)__A, (__v2di)__B);
   3196 }
   3197 
   3198 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3199 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3200   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3201                                              (__v2di)_mm_min_epi64(__A, __B),
   3202                                              (__v2di)__W);
   3203 }
   3204 
   3205 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3206 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3207   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3208                                              (__v2di)_mm_min_epi64(__A, __B),
   3209                                              (__v2di)_mm_setzero_si128());
   3210 }
   3211 
   3212 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3213 _mm256_min_epi64 (__m256i __A, __m256i __B) {
   3214   return (__m256i)__builtin_elementwise_min((__v4di)__A, (__v4di)__B);
   3215 }
   3216 
   3217 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3218 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3219   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3220                                              (__v4di)_mm256_min_epi64(__A, __B),
   3221                                              (__v4di)__W);
   3222 }
   3223 
   3224 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3225 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3226   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3227                                              (__v4di)_mm256_min_epi64(__A, __B),
   3228                                              (__v4di)_mm256_setzero_si256());
   3229 }
   3230 
   3231 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3232 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   3233   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3234                                              (__v4si)_mm_min_epu32(__A, __B),
   3235                                              (__v4si)_mm_setzero_si128());
   3236 }
   3237 
   3238 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3239 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3240   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   3241                                              (__v4si)_mm_min_epu32(__A, __B),
   3242                                              (__v4si)__W);
   3243 }
   3244 
   3245 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3246 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   3247   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3248                                              (__v8si)_mm256_min_epu32(__A, __B),
   3249                                              (__v8si)_mm256_setzero_si256());
   3250 }
   3251 
   3252 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3253 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3254   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   3255                                              (__v8si)_mm256_min_epu32(__A, __B),
   3256                                              (__v8si)__W);
   3257 }
   3258 
   3259 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3260 _mm_min_epu64 (__m128i __A, __m128i __B) {
   3261   return (__m128i)__builtin_elementwise_min((__v2du)__A, (__v2du)__B);
   3262 }
   3263 
   3264 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3265 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   3266   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3267                                              (__v2di)_mm_min_epu64(__A, __B),
   3268                                              (__v2di)__W);
   3269 }
   3270 
   3271 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3272 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   3273   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
   3274                                              (__v2di)_mm_min_epu64(__A, __B),
   3275                                              (__v2di)_mm_setzero_si128());
   3276 }
   3277 
   3278 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3279 _mm256_min_epu64 (__m256i __A, __m256i __B) {
   3280   return (__m256i)__builtin_elementwise_min((__v4du)__A, (__v4du)__B);
   3281 }
   3282 
   3283 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3284 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   3285   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3286                                              (__v4di)_mm256_min_epu64(__A, __B),
   3287                                              (__v4di)__W);
   3288 }
   3289 
   3290 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3291 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   3292   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   3293                                              (__v4di)_mm256_min_epu64(__A, __B),
   3294                                              (__v4di)_mm256_setzero_si256());
   3295 }
   3296 
   3297 #define _mm_roundscale_pd(A, imm) \
   3298   ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3299                                                (int)(imm), \
   3300                                                (__v2df)_mm_setzero_pd(), \
   3301                                                (__mmask8)-1))
   3302 
   3303 
   3304 #define _mm_mask_roundscale_pd(W, U, A, imm) \
   3305   ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3306                                                (int)(imm), \
   3307                                                (__v2df)(__m128d)(W), \
   3308                                                (__mmask8)(U)))
   3309 
   3310 
   3311 #define _mm_maskz_roundscale_pd(U, A, imm) \
   3312   ((__m128d)__builtin_ia32_rndscalepd_128_mask((__v2df)(__m128d)(A), \
   3313                                                (int)(imm), \
   3314                                                (__v2df)_mm_setzero_pd(), \
   3315                                                (__mmask8)(U)))
   3316 
   3317 
   3318 #define _mm256_roundscale_pd(A, imm) \
   3319   ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3320                                                (int)(imm), \
   3321                                                (__v4df)_mm256_setzero_pd(), \
   3322                                                (__mmask8)-1))
   3323 
   3324 
   3325 #define _mm256_mask_roundscale_pd(W, U, A, imm) \
   3326   ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3327                                                (int)(imm), \
   3328                                                (__v4df)(__m256d)(W), \
   3329                                                (__mmask8)(U)))
   3330 
   3331 
   3332 #define _mm256_maskz_roundscale_pd(U, A, imm)  \
   3333   ((__m256d)__builtin_ia32_rndscalepd_256_mask((__v4df)(__m256d)(A), \
   3334                                                (int)(imm), \
   3335                                                (__v4df)_mm256_setzero_pd(), \
   3336                                                (__mmask8)(U)))
   3337 
   3338 #define _mm_roundscale_ps(A, imm)  \
   3339   ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3340                                               (__v4sf)_mm_setzero_ps(), \
   3341                                               (__mmask8)-1))
   3342 
   3343 
   3344 #define _mm_mask_roundscale_ps(W, U, A, imm)  \
   3345   ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3346                                               (__v4sf)(__m128)(W), \
   3347                                               (__mmask8)(U)))
   3348 
   3349 
   3350 #define _mm_maskz_roundscale_ps(U, A, imm)  \
   3351   ((__m128)__builtin_ia32_rndscaleps_128_mask((__v4sf)(__m128)(A), (int)(imm), \
   3352                                               (__v4sf)_mm_setzero_ps(), \
   3353                                               (__mmask8)(U)))
   3354 
   3355 #define _mm256_roundscale_ps(A, imm)  \
   3356   ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3357                                               (__v8sf)_mm256_setzero_ps(), \
   3358                                               (__mmask8)-1))
   3359 
   3360 #define _mm256_mask_roundscale_ps(W, U, A, imm)  \
   3361   ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3362                                               (__v8sf)(__m256)(W), \
   3363                                               (__mmask8)(U)))
   3364 
   3365 
   3366 #define _mm256_maskz_roundscale_ps(U, A, imm)  \
   3367   ((__m256)__builtin_ia32_rndscaleps_256_mask((__v8sf)(__m256)(A), (int)(imm), \
   3368                                               (__v8sf)_mm256_setzero_ps(), \
   3369                                               (__mmask8)(U)))
   3370 
   3371 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3372 _mm_scalef_pd (__m128d __A, __m128d __B) {
   3373   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3374                 (__v2df) __B,
   3375                 (__v2df)
   3376                 _mm_setzero_pd (),
   3377                 (__mmask8) -1);
   3378 }
   3379 
   3380 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3381 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
   3382         __m128d __B) {
   3383   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3384                 (__v2df) __B,
   3385                 (__v2df) __W,
   3386                 (__mmask8) __U);
   3387 }
   3388 
   3389 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3390 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   3391   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
   3392                 (__v2df) __B,
   3393                 (__v2df)
   3394                 _mm_setzero_pd (),
   3395                 (__mmask8) __U);
   3396 }
   3397 
   3398 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3399 _mm256_scalef_pd (__m256d __A, __m256d __B) {
   3400   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3401                 (__v4df) __B,
   3402                 (__v4df)
   3403                 _mm256_setzero_pd (),
   3404                 (__mmask8) -1);
   3405 }
   3406 
   3407 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3408 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
   3409            __m256d __B) {
   3410   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3411                 (__v4df) __B,
   3412                 (__v4df) __W,
   3413                 (__mmask8) __U);
   3414 }
   3415 
   3416 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3417 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   3418   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
   3419                 (__v4df) __B,
   3420                 (__v4df)
   3421                 _mm256_setzero_pd (),
   3422                 (__mmask8) __U);
   3423 }
   3424 
   3425 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3426 _mm_scalef_ps (__m128 __A, __m128 __B) {
   3427   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3428                (__v4sf) __B,
   3429                (__v4sf)
   3430                _mm_setzero_ps (),
   3431                (__mmask8) -1);
   3432 }
   3433 
   3434 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3435 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3436   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3437                (__v4sf) __B,
   3438                (__v4sf) __W,
   3439                (__mmask8) __U);
   3440 }
   3441 
   3442 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3443 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   3444   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
   3445                (__v4sf) __B,
   3446                (__v4sf)
   3447                _mm_setzero_ps (),
   3448                (__mmask8) __U);
   3449 }
   3450 
   3451 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3452 _mm256_scalef_ps (__m256 __A, __m256 __B) {
   3453   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3454                (__v8sf) __B,
   3455                (__v8sf)
   3456                _mm256_setzero_ps (),
   3457                (__mmask8) -1);
   3458 }
   3459 
   3460 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3461 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
   3462            __m256 __B) {
   3463   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3464                (__v8sf) __B,
   3465                (__v8sf) __W,
   3466                (__mmask8) __U);
   3467 }
   3468 
   3469 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3470 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   3471   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
   3472                (__v8sf) __B,
   3473                (__v8sf)
   3474                _mm256_setzero_ps (),
   3475                (__mmask8) __U);
   3476 }
   3477 
   3478 #define _mm_i64scatter_pd(addr, index, v1, scale) \
   3479   __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)-1, \
   3480                                (__v2di)(__m128i)(index), \
   3481                                (__v2df)(__m128d)(v1), (int)(scale))
   3482 
   3483 #define _mm_mask_i64scatter_pd(addr, mask, index, v1, scale) \
   3484   __builtin_ia32_scatterdiv2df((void *)(addr), (__mmask8)(mask), \
   3485                                (__v2di)(__m128i)(index), \
   3486                                (__v2df)(__m128d)(v1), (int)(scale))
   3487 
   3488 #define _mm_i64scatter_epi64(addr, index, v1, scale) \
   3489   __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)-1, \
   3490                                (__v2di)(__m128i)(index), \
   3491                                (__v2di)(__m128i)(v1), (int)(scale))
   3492 
   3493 #define _mm_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
   3494   __builtin_ia32_scatterdiv2di((void *)(addr), (__mmask8)(mask), \
   3495                                (__v2di)(__m128i)(index), \
   3496                                (__v2di)(__m128i)(v1), (int)(scale))
   3497 
   3498 #define _mm256_i64scatter_pd(addr, index, v1, scale) \
   3499   __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)-1, \
   3500                                (__v4di)(__m256i)(index), \
   3501                                (__v4df)(__m256d)(v1), (int)(scale))
   3502 
   3503 #define _mm256_mask_i64scatter_pd(addr, mask, index, v1, scale) \
   3504   __builtin_ia32_scatterdiv4df((void *)(addr), (__mmask8)(mask), \
   3505                                (__v4di)(__m256i)(index), \
   3506                                (__v4df)(__m256d)(v1), (int)(scale))
   3507 
   3508 #define _mm256_i64scatter_epi64(addr, index, v1, scale) \
   3509   __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)-1, \
   3510                                (__v4di)(__m256i)(index), \
   3511                                (__v4di)(__m256i)(v1), (int)(scale))
   3512 
   3513 #define _mm256_mask_i64scatter_epi64(addr, mask, index, v1, scale) \
   3514   __builtin_ia32_scatterdiv4di((void *)(addr), (__mmask8)(mask), \
   3515                                (__v4di)(__m256i)(index), \
   3516                                (__v4di)(__m256i)(v1), (int)(scale))
   3517 
   3518 #define _mm_i64scatter_ps(addr, index, v1, scale) \
   3519   __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)-1, \
   3520                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3521                                (int)(scale))
   3522 
   3523 #define _mm_mask_i64scatter_ps(addr, mask, index, v1, scale) \
   3524   __builtin_ia32_scatterdiv4sf((void *)(addr), (__mmask8)(mask), \
   3525                                (__v2di)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3526                                (int)(scale))
   3527 
   3528 #define _mm_i64scatter_epi32(addr, index, v1, scale) \
   3529   __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)-1, \
   3530                                (__v2di)(__m128i)(index), \
   3531                                (__v4si)(__m128i)(v1), (int)(scale))
   3532 
   3533 #define _mm_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
   3534   __builtin_ia32_scatterdiv4si((void *)(addr), (__mmask8)(mask), \
   3535                                (__v2di)(__m128i)(index), \
   3536                                (__v4si)(__m128i)(v1), (int)(scale))
   3537 
   3538 #define _mm256_i64scatter_ps(addr, index, v1, scale) \
   3539   __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)-1, \
   3540                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
   3541                                (int)(scale))
   3542 
   3543 #define _mm256_mask_i64scatter_ps(addr, mask, index, v1, scale) \
   3544   __builtin_ia32_scatterdiv8sf((void *)(addr), (__mmask8)(mask), \
   3545                                (__v4di)(__m256i)(index), (__v4sf)(__m128)(v1), \
   3546                                (int)(scale))
   3547 
   3548 #define _mm256_i64scatter_epi32(addr, index, v1, scale) \
   3549   __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)-1, \
   3550                                (__v4di)(__m256i)(index), \
   3551                                (__v4si)(__m128i)(v1), (int)(scale))
   3552 
   3553 #define _mm256_mask_i64scatter_epi32(addr, mask, index, v1, scale) \
   3554   __builtin_ia32_scatterdiv8si((void *)(addr), (__mmask8)(mask), \
   3555                                (__v4di)(__m256i)(index), \
   3556                                (__v4si)(__m128i)(v1), (int)(scale))
   3557 
   3558 #define _mm_i32scatter_pd(addr, index, v1, scale) \
   3559   __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)-1, \
   3560                                (__v4si)(__m128i)(index), \
   3561                                (__v2df)(__m128d)(v1), (int)(scale))
   3562 
   3563 #define _mm_mask_i32scatter_pd(addr, mask, index, v1, scale) \
   3564     __builtin_ia32_scattersiv2df((void *)(addr), (__mmask8)(mask), \
   3565                                  (__v4si)(__m128i)(index), \
   3566                                  (__v2df)(__m128d)(v1), (int)(scale))
   3567 
   3568 #define _mm_i32scatter_epi64(addr, index, v1, scale) \
   3569     __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)-1, \
   3570                                  (__v4si)(__m128i)(index), \
   3571                                  (__v2di)(__m128i)(v1), (int)(scale))
   3572 
   3573 #define _mm_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
   3574     __builtin_ia32_scattersiv2di((void *)(addr), (__mmask8)(mask), \
   3575                                  (__v4si)(__m128i)(index), \
   3576                                  (__v2di)(__m128i)(v1), (int)(scale))
   3577 
   3578 #define _mm256_i32scatter_pd(addr, index, v1, scale) \
   3579     __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)-1, \
   3580                                  (__v4si)(__m128i)(index), \
   3581                                  (__v4df)(__m256d)(v1), (int)(scale))
   3582 
   3583 #define _mm256_mask_i32scatter_pd(addr, mask, index, v1, scale) \
   3584     __builtin_ia32_scattersiv4df((void *)(addr), (__mmask8)(mask), \
   3585                                  (__v4si)(__m128i)(index), \
   3586                                  (__v4df)(__m256d)(v1), (int)(scale))
   3587 
   3588 #define _mm256_i32scatter_epi64(addr, index, v1, scale) \
   3589     __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)-1, \
   3590                                  (__v4si)(__m128i)(index), \
   3591                                  (__v4di)(__m256i)(v1), (int)(scale))
   3592 
   3593 #define _mm256_mask_i32scatter_epi64(addr, mask, index, v1, scale) \
   3594     __builtin_ia32_scattersiv4di((void *)(addr), (__mmask8)(mask), \
   3595                                  (__v4si)(__m128i)(index), \
   3596                                  (__v4di)(__m256i)(v1), (int)(scale))
   3597 
   3598 #define _mm_i32scatter_ps(addr, index, v1, scale) \
   3599     __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)-1, \
   3600                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3601                                  (int)(scale))
   3602 
   3603 #define _mm_mask_i32scatter_ps(addr, mask, index, v1, scale) \
   3604     __builtin_ia32_scattersiv4sf((void *)(addr), (__mmask8)(mask), \
   3605                                  (__v4si)(__m128i)(index), (__v4sf)(__m128)(v1), \
   3606                                  (int)(scale))
   3607 
   3608 #define _mm_i32scatter_epi32(addr, index, v1, scale) \
   3609     __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)-1, \
   3610                                  (__v4si)(__m128i)(index), \
   3611                                  (__v4si)(__m128i)(v1), (int)(scale))
   3612 
   3613 #define _mm_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
   3614     __builtin_ia32_scattersiv4si((void *)(addr), (__mmask8)(mask), \
   3615                                  (__v4si)(__m128i)(index), \
   3616                                  (__v4si)(__m128i)(v1), (int)(scale))
   3617 
   3618 #define _mm256_i32scatter_ps(addr, index, v1, scale) \
   3619     __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)-1, \
   3620                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
   3621                                  (int)(scale))
   3622 
   3623 #define _mm256_mask_i32scatter_ps(addr, mask, index, v1, scale) \
   3624     __builtin_ia32_scattersiv8sf((void *)(addr), (__mmask8)(mask), \
   3625                                  (__v8si)(__m256i)(index), (__v8sf)(__m256)(v1), \
   3626                                  (int)(scale))
   3627 
   3628 #define _mm256_i32scatter_epi32(addr, index, v1, scale) \
   3629     __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)-1, \
   3630                                  (__v8si)(__m256i)(index), \
   3631                                  (__v8si)(__m256i)(v1), (int)(scale))
   3632 
   3633 #define _mm256_mask_i32scatter_epi32(addr, mask, index, v1, scale) \
   3634     __builtin_ia32_scattersiv8si((void *)(addr), (__mmask8)(mask), \
   3635                                  (__v8si)(__m256i)(index), \
   3636                                  (__v8si)(__m256i)(v1), (int)(scale))
   3637 
   3638   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3639   _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
   3640     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3641                                                 (__v2df)_mm_sqrt_pd(__A),
   3642                                                 (__v2df)__W);
   3643   }
   3644 
   3645   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3646   _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
   3647     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3648                                                 (__v2df)_mm_sqrt_pd(__A),
   3649                                                 (__v2df)_mm_setzero_pd());
   3650   }
   3651 
   3652   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3653   _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
   3654     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3655                                                 (__v4df)_mm256_sqrt_pd(__A),
   3656                                                 (__v4df)__W);
   3657   }
   3658 
   3659   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3660   _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
   3661     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3662                                                 (__v4df)_mm256_sqrt_pd(__A),
   3663                                                 (__v4df)_mm256_setzero_pd());
   3664   }
   3665 
   3666   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3667   _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
   3668     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3669                                                (__v4sf)_mm_sqrt_ps(__A),
   3670                                                (__v4sf)__W);
   3671   }
   3672 
   3673   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3674   _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
   3675     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3676                                                (__v4sf)_mm_sqrt_ps(__A),
   3677                                                (__v4sf)_mm_setzero_ps());
   3678   }
   3679 
   3680   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3681   _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
   3682     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3683                                                (__v8sf)_mm256_sqrt_ps(__A),
   3684                                                (__v8sf)__W);
   3685   }
   3686 
   3687   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3688   _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
   3689     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3690                                                (__v8sf)_mm256_sqrt_ps(__A),
   3691                                                (__v8sf)_mm256_setzero_ps());
   3692   }
   3693 
   3694   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3695   _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   3696     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3697                                                 (__v2df)_mm_sub_pd(__A, __B),
   3698                                                 (__v2df)__W);
   3699   }
   3700 
   3701   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3702   _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   3703     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   3704                                                 (__v2df)_mm_sub_pd(__A, __B),
   3705                                                 (__v2df)_mm_setzero_pd());
   3706   }
   3707 
   3708   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3709   _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   3710     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3711                                                 (__v4df)_mm256_sub_pd(__A, __B),
   3712                                                 (__v4df)__W);
   3713   }
   3714 
   3715   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3716   _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   3717     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   3718                                                 (__v4df)_mm256_sub_pd(__A, __B),
   3719                                                 (__v4df)_mm256_setzero_pd());
   3720   }
   3721 
   3722   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3723   _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   3724     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3725                                                (__v4sf)_mm_sub_ps(__A, __B),
   3726                                                (__v4sf)__W);
   3727   }
   3728 
   3729   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3730   _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   3731     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   3732                                                (__v4sf)_mm_sub_ps(__A, __B),
   3733                                                (__v4sf)_mm_setzero_ps());
   3734   }
   3735 
   3736   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3737   _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   3738     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3739                                                (__v8sf)_mm256_sub_ps(__A, __B),
   3740                                                (__v8sf)__W);
   3741   }
   3742 
   3743   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3744   _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   3745     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   3746                                                (__v8sf)_mm256_sub_ps(__A, __B),
   3747                                                (__v8sf)_mm256_setzero_ps());
   3748   }
   3749 
   3750   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3751   _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
   3752     return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
   3753                                                   (__v4si)__B);
   3754   }
   3755 
   3756   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3757   _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
   3758                               __m128i __B) {
   3759     return (__m128i)__builtin_ia32_selectd_128(__U,
   3760                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
   3761                                     (__v4si)__A);
   3762   }
   3763 
   3764   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3765   _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
   3766                                __m128i __B) {
   3767     return (__m128i)__builtin_ia32_selectd_128(__U,
   3768                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
   3769                                     (__v4si)__I);
   3770   }
   3771 
   3772   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3773   _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
   3774                                __m128i __B) {
   3775     return (__m128i)__builtin_ia32_selectd_128(__U,
   3776                                     (__v4si)_mm_permutex2var_epi32(__A, __I, __B),
   3777                                     (__v4si)_mm_setzero_si128());
   3778   }
   3779 
   3780   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3781   _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
   3782     return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
   3783                                                   (__v8si) __B);
   3784   }
   3785 
   3786   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3787   _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
   3788                                  __m256i __B) {
   3789     return (__m256i)__builtin_ia32_selectd_256(__U,
   3790                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
   3791                                  (__v8si)__A);
   3792   }
   3793 
   3794   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3795   _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
   3796                                   __m256i __B) {
   3797     return (__m256i)__builtin_ia32_selectd_256(__U,
   3798                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
   3799                                  (__v8si)__I);
   3800   }
   3801 
   3802   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3803   _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
   3804                                   __m256i __B) {
   3805     return (__m256i)__builtin_ia32_selectd_256(__U,
   3806                                  (__v8si)_mm256_permutex2var_epi32(__A, __I, __B),
   3807                                  (__v8si)_mm256_setzero_si256());
   3808   }
   3809 
   3810   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3811   _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
   3812     return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
   3813                                                    (__v2df)__B);
   3814   }
   3815 
   3816   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3817   _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
   3818     return (__m128d)__builtin_ia32_selectpd_128(__U,
   3819                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
   3820                                        (__v2df)__A);
   3821   }
   3822 
   3823   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3824   _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
   3825     return (__m128d)__builtin_ia32_selectpd_128(__U,
   3826                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
   3827                                        (__v2df)(__m128d)__I);
   3828   }
   3829 
   3830   static __inline__ __m128d __DEFAULT_FN_ATTRS128
   3831   _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
   3832     return (__m128d)__builtin_ia32_selectpd_128(__U,
   3833                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
   3834                                        (__v2df)_mm_setzero_pd());
   3835   }
   3836 
   3837   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3838   _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
   3839     return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
   3840                                                    (__v4df)__B);
   3841   }
   3842 
   3843   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3844   _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
   3845                               __m256d __B) {
   3846     return (__m256d)__builtin_ia32_selectpd_256(__U,
   3847                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
   3848                                     (__v4df)__A);
   3849   }
   3850 
   3851   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3852   _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
   3853                                __m256d __B) {
   3854     return (__m256d)__builtin_ia32_selectpd_256(__U,
   3855                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
   3856                                     (__v4df)(__m256d)__I);
   3857   }
   3858 
   3859   static __inline__ __m256d __DEFAULT_FN_ATTRS256
   3860   _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
   3861                                __m256d __B) {
   3862     return (__m256d)__builtin_ia32_selectpd_256(__U,
   3863                                     (__v4df)_mm256_permutex2var_pd(__A, __I, __B),
   3864                                     (__v4df)_mm256_setzero_pd());
   3865   }
   3866 
   3867   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3868   _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
   3869     return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
   3870                                                   (__v4sf)__B);
   3871   }
   3872 
   3873   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3874   _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
   3875     return (__m128)__builtin_ia32_selectps_128(__U,
   3876                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
   3877                                        (__v4sf)__A);
   3878   }
   3879 
   3880   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3881   _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
   3882     return (__m128)__builtin_ia32_selectps_128(__U,
   3883                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
   3884                                        (__v4sf)(__m128)__I);
   3885   }
   3886 
   3887   static __inline__ __m128 __DEFAULT_FN_ATTRS128
   3888   _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
   3889     return (__m128)__builtin_ia32_selectps_128(__U,
   3890                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
   3891                                        (__v4sf)_mm_setzero_ps());
   3892   }
   3893 
   3894   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3895   _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
   3896     return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
   3897                                                   (__v8sf) __B);
   3898   }
   3899 
   3900   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3901   _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
   3902     return (__m256)__builtin_ia32_selectps_256(__U,
   3903                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
   3904                                     (__v8sf)__A);
   3905   }
   3906 
   3907   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3908   _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
   3909                                __m256 __B) {
   3910     return (__m256)__builtin_ia32_selectps_256(__U,
   3911                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
   3912                                     (__v8sf)(__m256)__I);
   3913   }
   3914 
   3915   static __inline__ __m256 __DEFAULT_FN_ATTRS256
   3916   _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
   3917                                __m256 __B) {
   3918     return (__m256)__builtin_ia32_selectps_256(__U,
   3919                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
   3920                                     (__v8sf)_mm256_setzero_ps());
   3921   }
   3922 
   3923   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3924   _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
   3925     return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
   3926                                                   (__v2di)__B);
   3927   }
   3928 
   3929   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3930   _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
   3931                               __m128i __B) {
   3932     return (__m128i)__builtin_ia32_selectq_128(__U,
   3933                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
   3934                                     (__v2di)__A);
   3935   }
   3936 
   3937   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3938   _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
   3939                                __m128i __B) {
   3940     return (__m128i)__builtin_ia32_selectq_128(__U,
   3941                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
   3942                                     (__v2di)__I);
   3943   }
   3944 
   3945   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3946   _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
   3947                                __m128i __B) {
   3948     return (__m128i)__builtin_ia32_selectq_128(__U,
   3949                                     (__v2di)_mm_permutex2var_epi64(__A, __I, __B),
   3950                                     (__v2di)_mm_setzero_si128());
   3951   }
   3952 
   3953 
   3954   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3955   _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
   3956     return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
   3957                                                   (__v4di) __B);
   3958   }
   3959 
   3960   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3961   _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
   3962                                  __m256i __B) {
   3963     return (__m256i)__builtin_ia32_selectq_256(__U,
   3964                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
   3965                                  (__v4di)__A);
   3966   }
   3967 
   3968   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3969   _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
   3970                                   __m256i __B) {
   3971     return (__m256i)__builtin_ia32_selectq_256(__U,
   3972                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
   3973                                  (__v4di)__I);
   3974   }
   3975 
   3976   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   3977   _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
   3978                                   __m256i __B) {
   3979     return (__m256i)__builtin_ia32_selectq_256(__U,
   3980                                  (__v4di)_mm256_permutex2var_epi64(__A, __I, __B),
   3981                                  (__v4di)_mm256_setzero_si256());
   3982   }
   3983 
   3984   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3985   _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   3986   {
   3987     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   3988                                                (__v4si)_mm_cvtepi8_epi32(__A),
   3989                                                (__v4si)__W);
   3990   }
   3991 
   3992   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   3993   _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
   3994   {
   3995     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   3996                                                (__v4si)_mm_cvtepi8_epi32(__A),
   3997                                                (__v4si)_mm_setzero_si128());
   3998   }
   3999 
   4000   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4001   _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
   4002   {
   4003     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4004                                                (__v8si)_mm256_cvtepi8_epi32(__A),
   4005                                                (__v8si)__W);
   4006   }
   4007 
   4008   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4009   _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
   4010   {
   4011     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4012                                                (__v8si)_mm256_cvtepi8_epi32(__A),
   4013                                                (__v8si)_mm256_setzero_si256());
   4014   }
   4015 
   4016   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4017   _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4018   {
   4019     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4020                                                (__v2di)_mm_cvtepi8_epi64(__A),
   4021                                                (__v2di)__W);
   4022   }
   4023 
   4024   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4025   _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   4026   {
   4027     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4028                                                (__v2di)_mm_cvtepi8_epi64(__A),
   4029                                                (__v2di)_mm_setzero_si128());
   4030   }
   4031 
   4032   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4033   _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4034   {
   4035     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4036                                                (__v4di)_mm256_cvtepi8_epi64(__A),
   4037                                                (__v4di)__W);
   4038   }
   4039 
   4040   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4041   _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   4042   {
   4043     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4044                                                (__v4di)_mm256_cvtepi8_epi64(__A),
   4045                                                (__v4di)_mm256_setzero_si256());
   4046   }
   4047 
   4048   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4049   _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   4050   {
   4051     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4052                                                (__v2di)_mm_cvtepi32_epi64(__X),
   4053                                                (__v2di)__W);
   4054   }
   4055 
   4056   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4057   _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   4058   {
   4059     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4060                                                (__v2di)_mm_cvtepi32_epi64(__X),
   4061                                                (__v2di)_mm_setzero_si128());
   4062   }
   4063 
   4064   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4065   _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   4066   {
   4067     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4068                                                (__v4di)_mm256_cvtepi32_epi64(__X),
   4069                                                (__v4di)__W);
   4070   }
   4071 
   4072   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4073   _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   4074   {
   4075     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4076                                                (__v4di)_mm256_cvtepi32_epi64(__X),
   4077                                                (__v4di)_mm256_setzero_si256());
   4078   }
   4079 
   4080   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4081   _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4082   {
   4083     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4084                                                (__v4si)_mm_cvtepi16_epi32(__A),
   4085                                                (__v4si)__W);
   4086   }
   4087 
   4088   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4089   _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
   4090   {
   4091     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4092                                                (__v4si)_mm_cvtepi16_epi32(__A),
   4093                                                (__v4si)_mm_setzero_si128());
   4094   }
   4095 
   4096   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4097   _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4098   {
   4099     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4100                                                (__v8si)_mm256_cvtepi16_epi32(__A),
   4101                                                (__v8si)__W);
   4102   }
   4103 
   4104   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4105   _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
   4106   {
   4107     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4108                                                (__v8si)_mm256_cvtepi16_epi32(__A),
   4109                                                (__v8si)_mm256_setzero_si256());
   4110   }
   4111 
   4112   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4113   _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4114   {
   4115     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4116                                                (__v2di)_mm_cvtepi16_epi64(__A),
   4117                                                (__v2di)__W);
   4118   }
   4119 
   4120   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4121   _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   4122   {
   4123     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4124                                                (__v2di)_mm_cvtepi16_epi64(__A),
   4125                                                (__v2di)_mm_setzero_si128());
   4126   }
   4127 
   4128   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4129   _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4130   {
   4131     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4132                                                (__v4di)_mm256_cvtepi16_epi64(__A),
   4133                                                (__v4di)__W);
   4134   }
   4135 
   4136   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4137   _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   4138   {
   4139     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4140                                                (__v4di)_mm256_cvtepi16_epi64(__A),
   4141                                                (__v4di)_mm256_setzero_si256());
   4142   }
   4143 
   4144 
   4145   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4146   _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4147   {
   4148     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4149                                                (__v4si)_mm_cvtepu8_epi32(__A),
   4150                                                (__v4si)__W);
   4151   }
   4152 
   4153   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4154   _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   4155   {
   4156     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4157                                                (__v4si)_mm_cvtepu8_epi32(__A),
   4158                                                (__v4si)_mm_setzero_si128());
   4159   }
   4160 
   4161   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4162   _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4163   {
   4164     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4165                                                (__v8si)_mm256_cvtepu8_epi32(__A),
   4166                                                (__v8si)__W);
   4167   }
   4168 
   4169   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4170   _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   4171   {
   4172     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4173                                                (__v8si)_mm256_cvtepu8_epi32(__A),
   4174                                                (__v8si)_mm256_setzero_si256());
   4175   }
   4176 
   4177   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4178   _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4179   {
   4180     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4181                                                (__v2di)_mm_cvtepu8_epi64(__A),
   4182                                                (__v2di)__W);
   4183   }
   4184 
   4185   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4186   _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   4187   {
   4188     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4189                                                (__v2di)_mm_cvtepu8_epi64(__A),
   4190                                                (__v2di)_mm_setzero_si128());
   4191   }
   4192 
   4193   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4194   _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4195   {
   4196     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4197                                                (__v4di)_mm256_cvtepu8_epi64(__A),
   4198                                                (__v4di)__W);
   4199   }
   4200 
   4201   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4202   _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   4203   {
   4204     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4205                                                (__v4di)_mm256_cvtepu8_epi64(__A),
   4206                                                (__v4di)_mm256_setzero_si256());
   4207   }
   4208 
   4209   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4210   _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   4211   {
   4212     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4213                                                (__v2di)_mm_cvtepu32_epi64(__X),
   4214                                                (__v2di)__W);
   4215   }
   4216 
   4217   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4218   _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   4219   {
   4220     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4221                                                (__v2di)_mm_cvtepu32_epi64(__X),
   4222                                                (__v2di)_mm_setzero_si128());
   4223   }
   4224 
   4225   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4226   _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   4227   {
   4228     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4229                                                (__v4di)_mm256_cvtepu32_epi64(__X),
   4230                                                (__v4di)__W);
   4231   }
   4232 
   4233   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4234   _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   4235   {
   4236     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4237                                                (__v4di)_mm256_cvtepu32_epi64(__X),
   4238                                                (__v4di)_mm256_setzero_si256());
   4239   }
   4240 
   4241   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4242   _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   4243   {
   4244     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4245                                                (__v4si)_mm_cvtepu16_epi32(__A),
   4246                                                (__v4si)__W);
   4247   }
   4248 
   4249   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4250   _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   4251   {
   4252     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4253                                                (__v4si)_mm_cvtepu16_epi32(__A),
   4254                                                (__v4si)_mm_setzero_si128());
   4255   }
   4256 
   4257   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4258   _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   4259   {
   4260     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4261                                                (__v8si)_mm256_cvtepu16_epi32(__A),
   4262                                                (__v8si)__W);
   4263   }
   4264 
   4265   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4266   _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   4267   {
   4268     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4269                                                (__v8si)_mm256_cvtepu16_epi32(__A),
   4270                                                (__v8si)_mm256_setzero_si256());
   4271   }
   4272 
   4273   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4274   _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   4275   {
   4276     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4277                                                (__v2di)_mm_cvtepu16_epi64(__A),
   4278                                                (__v2di)__W);
   4279   }
   4280 
   4281   static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4282   _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   4283   {
   4284     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4285                                                (__v2di)_mm_cvtepu16_epi64(__A),
   4286                                                (__v2di)_mm_setzero_si128());
   4287   }
   4288 
   4289   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4290   _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   4291   {
   4292     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4293                                                (__v4di)_mm256_cvtepu16_epi64(__A),
   4294                                                (__v4di)__W);
   4295   }
   4296 
   4297   static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4298   _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   4299   {
   4300     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4301                                                (__v4di)_mm256_cvtepu16_epi64(__A),
   4302                                                (__v4di)_mm256_setzero_si256());
   4303   }
   4304 
   4305 
   4306 #define _mm_rol_epi32(a, b) \
   4307   ((__m128i)__builtin_ia32_prold128((__v4si)(__m128i)(a), (int)(b)))
   4308 
   4309 #define _mm_mask_rol_epi32(w, u, a, b) \
   4310   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
   4311                                        (__v4si)_mm_rol_epi32((a), (b)), \
   4312                                        (__v4si)(__m128i)(w)))
   4313 
   4314 #define _mm_maskz_rol_epi32(u, a, b) \
   4315   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
   4316                                        (__v4si)_mm_rol_epi32((a), (b)), \
   4317                                        (__v4si)_mm_setzero_si128()))
   4318 
   4319 #define _mm256_rol_epi32(a, b) \
   4320   ((__m256i)__builtin_ia32_prold256((__v8si)(__m256i)(a), (int)(b)))
   4321 
   4322 #define _mm256_mask_rol_epi32(w, u, a, b) \
   4323   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
   4324                                        (__v8si)_mm256_rol_epi32((a), (b)), \
   4325                                        (__v8si)(__m256i)(w)))
   4326 
   4327 #define _mm256_maskz_rol_epi32(u, a, b) \
   4328   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
   4329                                        (__v8si)_mm256_rol_epi32((a), (b)), \
   4330                                        (__v8si)_mm256_setzero_si256()))
   4331 
   4332 #define _mm_rol_epi64(a, b) \
   4333   ((__m128i)__builtin_ia32_prolq128((__v2di)(__m128i)(a), (int)(b)))
   4334 
   4335 #define _mm_mask_rol_epi64(w, u, a, b) \
   4336   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
   4337                                        (__v2di)_mm_rol_epi64((a), (b)), \
   4338                                        (__v2di)(__m128i)(w)))
   4339 
   4340 #define _mm_maskz_rol_epi64(u, a, b) \
   4341   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
   4342                                        (__v2di)_mm_rol_epi64((a), (b)), \
   4343                                        (__v2di)_mm_setzero_si128()))
   4344 
   4345 #define _mm256_rol_epi64(a, b) \
   4346   ((__m256i)__builtin_ia32_prolq256((__v4di)(__m256i)(a), (int)(b)))
   4347 
   4348 #define _mm256_mask_rol_epi64(w, u, a, b) \
   4349   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
   4350                                        (__v4di)_mm256_rol_epi64((a), (b)), \
   4351                                        (__v4di)(__m256i)(w)))
   4352 
   4353 #define _mm256_maskz_rol_epi64(u, a, b) \
   4354   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
   4355                                        (__v4di)_mm256_rol_epi64((a), (b)), \
   4356                                        (__v4di)_mm256_setzero_si256()))
   4357 
   4358 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4359 _mm_rolv_epi32 (__m128i __A, __m128i __B)
   4360 {
   4361   return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
   4362 }
   4363 
   4364 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4365 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4366 {
   4367   return (__m128i)__builtin_ia32_selectd_128(__U,
   4368                                              (__v4si)_mm_rolv_epi32(__A, __B),
   4369                                              (__v4si)__W);
   4370 }
   4371 
   4372 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4373 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   4374 {
   4375   return (__m128i)__builtin_ia32_selectd_128(__U,
   4376                                              (__v4si)_mm_rolv_epi32(__A, __B),
   4377                                              (__v4si)_mm_setzero_si128());
   4378 }
   4379 
   4380 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4381 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
   4382 {
   4383   return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
   4384 }
   4385 
   4386 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4387 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   4388 {
   4389   return (__m256i)__builtin_ia32_selectd_256(__U,
   4390                                             (__v8si)_mm256_rolv_epi32(__A, __B),
   4391                                             (__v8si)__W);
   4392 }
   4393 
   4394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4395 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   4396 {
   4397   return (__m256i)__builtin_ia32_selectd_256(__U,
   4398                                             (__v8si)_mm256_rolv_epi32(__A, __B),
   4399                                             (__v8si)_mm256_setzero_si256());
   4400 }
   4401 
   4402 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4403 _mm_rolv_epi64 (__m128i __A, __m128i __B)
   4404 {
   4405   return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
   4406 }
   4407 
   4408 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4409 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4410 {
   4411   return (__m128i)__builtin_ia32_selectq_128(__U,
   4412                                              (__v2di)_mm_rolv_epi64(__A, __B),
   4413                                              (__v2di)__W);
   4414 }
   4415 
   4416 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4417 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   4418 {
   4419   return (__m128i)__builtin_ia32_selectq_128(__U,
   4420                                              (__v2di)_mm_rolv_epi64(__A, __B),
   4421                                              (__v2di)_mm_setzero_si128());
   4422 }
   4423 
   4424 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4425 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
   4426 {
   4427   return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
   4428 }
   4429 
   4430 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4431 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   4432 {
   4433   return (__m256i)__builtin_ia32_selectq_256(__U,
   4434                                             (__v4di)_mm256_rolv_epi64(__A, __B),
   4435                                             (__v4di)__W);
   4436 }
   4437 
   4438 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4439 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   4440 {
   4441   return (__m256i)__builtin_ia32_selectq_256(__U,
   4442                                             (__v4di)_mm256_rolv_epi64(__A, __B),
   4443                                             (__v4di)_mm256_setzero_si256());
   4444 }
   4445 
   4446 #define _mm_ror_epi32(a, b) \
   4447   ((__m128i)__builtin_ia32_prord128((__v4si)(__m128i)(a), (int)(b)))
   4448 
   4449 #define _mm_mask_ror_epi32(w, u, a, b) \
   4450   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
   4451                                        (__v4si)_mm_ror_epi32((a), (b)), \
   4452                                        (__v4si)(__m128i)(w)))
   4453 
   4454 #define _mm_maskz_ror_epi32(u, a, b) \
   4455   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(u), \
   4456                                        (__v4si)_mm_ror_epi32((a), (b)), \
   4457                                        (__v4si)_mm_setzero_si128()))
   4458 
   4459 #define _mm256_ror_epi32(a, b) \
   4460   ((__m256i)__builtin_ia32_prord256((__v8si)(__m256i)(a), (int)(b)))
   4461 
   4462 #define _mm256_mask_ror_epi32(w, u, a, b) \
   4463   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
   4464                                        (__v8si)_mm256_ror_epi32((a), (b)), \
   4465                                        (__v8si)(__m256i)(w)))
   4466 
   4467 #define _mm256_maskz_ror_epi32(u, a, b) \
   4468   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(u), \
   4469                                        (__v8si)_mm256_ror_epi32((a), (b)), \
   4470                                        (__v8si)_mm256_setzero_si256()))
   4471 
   4472 #define _mm_ror_epi64(a, b) \
   4473   ((__m128i)__builtin_ia32_prorq128((__v2di)(__m128i)(a), (int)(b)))
   4474 
   4475 #define _mm_mask_ror_epi64(w, u, a, b) \
   4476   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
   4477                                        (__v2di)_mm_ror_epi64((a), (b)), \
   4478                                        (__v2di)(__m128i)(w)))
   4479 
   4480 #define _mm_maskz_ror_epi64(u, a, b) \
   4481   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(u), \
   4482                                        (__v2di)_mm_ror_epi64((a), (b)), \
   4483                                        (__v2di)_mm_setzero_si128()))
   4484 
   4485 #define _mm256_ror_epi64(a, b) \
   4486   ((__m256i)__builtin_ia32_prorq256((__v4di)(__m256i)(a), (int)(b)))
   4487 
   4488 #define _mm256_mask_ror_epi64(w, u, a, b) \
   4489   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
   4490                                        (__v4di)_mm256_ror_epi64((a), (b)), \
   4491                                        (__v4di)(__m256i)(w)))
   4492 
   4493 #define _mm256_maskz_ror_epi64(u, a, b) \
   4494   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(u), \
   4495                                        (__v4di)_mm256_ror_epi64((a), (b)), \
   4496                                        (__v4di)_mm256_setzero_si256()))
   4497 
   4498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4499 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4500 {
   4501   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4502                                              (__v4si)_mm_sll_epi32(__A, __B),
   4503                                              (__v4si)__W);
   4504 }
   4505 
   4506 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4507 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   4508 {
   4509   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4510                                              (__v4si)_mm_sll_epi32(__A, __B),
   4511                                              (__v4si)_mm_setzero_si128());
   4512 }
   4513 
   4514 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4515 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   4516 {
   4517   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4518                                              (__v8si)_mm256_sll_epi32(__A, __B),
   4519                                              (__v8si)__W);
   4520 }
   4521 
   4522 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4523 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   4524 {
   4525   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4526                                              (__v8si)_mm256_sll_epi32(__A, __B),
   4527                                              (__v8si)_mm256_setzero_si256());
   4528 }
   4529 
   4530 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4531 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
   4532 {
   4533   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4534                                              (__v4si)_mm_slli_epi32(__A, (int)__B),
   4535                                              (__v4si)__W);
   4536 }
   4537 
   4538 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4539 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
   4540 {
   4541   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4542                                              (__v4si)_mm_slli_epi32(__A, (int)__B),
   4543                                              (__v4si)_mm_setzero_si128());
   4544 }
   4545 
   4546 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4547 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
   4548 {
   4549   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4550                                              (__v8si)_mm256_slli_epi32(__A, (int)__B),
   4551                                              (__v8si)__W);
   4552 }
   4553 
   4554 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4555 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
   4556 {
   4557   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4558                                              (__v8si)_mm256_slli_epi32(__A, (int)__B),
   4559                                              (__v8si)_mm256_setzero_si256());
   4560 }
   4561 
   4562 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4563 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4564 {
   4565   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4566                                              (__v2di)_mm_sll_epi64(__A, __B),
   4567                                              (__v2di)__W);
   4568 }
   4569 
   4570 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4571 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   4572 {
   4573   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4574                                              (__v2di)_mm_sll_epi64(__A, __B),
   4575                                              (__v2di)_mm_setzero_si128());
   4576 }
   4577 
   4578 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4579 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   4580 {
   4581   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4582                                              (__v4di)_mm256_sll_epi64(__A, __B),
   4583                                              (__v4di)__W);
   4584 }
   4585 
   4586 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4587 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   4588 {
   4589   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4590                                              (__v4di)_mm256_sll_epi64(__A, __B),
   4591                                              (__v4di)_mm256_setzero_si256());
   4592 }
   4593 
   4594 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4595 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
   4596 {
   4597   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4598                                              (__v2di)_mm_slli_epi64(__A, (int)__B),
   4599                                              (__v2di)__W);
   4600 }
   4601 
   4602 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4603 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
   4604 {
   4605   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4606                                              (__v2di)_mm_slli_epi64(__A, (int)__B),
   4607                                              (__v2di)_mm_setzero_si128());
   4608 }
   4609 
   4610 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4611 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
   4612 {
   4613   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4614                                              (__v4di)_mm256_slli_epi64(__A, (int)__B),
   4615                                              (__v4di)__W);
   4616 }
   4617 
   4618 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4619 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
   4620 {
   4621   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4622                                              (__v4di)_mm256_slli_epi64(__A, (int)__B),
   4623                                              (__v4di)_mm256_setzero_si256());
   4624 }
   4625 
   4626 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4627 _mm_rorv_epi32 (__m128i __A, __m128i __B)
   4628 {
   4629   return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
   4630 }
   4631 
   4632 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4633 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4634 {
   4635   return (__m128i)__builtin_ia32_selectd_128(__U,
   4636                                              (__v4si)_mm_rorv_epi32(__A, __B),
   4637                                              (__v4si)__W);
   4638 }
   4639 
   4640 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4641 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
   4642 {
   4643   return (__m128i)__builtin_ia32_selectd_128(__U,
   4644                                              (__v4si)_mm_rorv_epi32(__A, __B),
   4645                                              (__v4si)_mm_setzero_si128());
   4646 }
   4647 
   4648 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4649 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
   4650 {
   4651   return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
   4652 }
   4653 
   4654 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4655 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   4656 {
   4657   return (__m256i)__builtin_ia32_selectd_256(__U,
   4658                                             (__v8si)_mm256_rorv_epi32(__A, __B),
   4659                                             (__v8si)__W);
   4660 }
   4661 
   4662 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4663 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
   4664 {
   4665   return (__m256i)__builtin_ia32_selectd_256(__U,
   4666                                             (__v8si)_mm256_rorv_epi32(__A, __B),
   4667                                             (__v8si)_mm256_setzero_si256());
   4668 }
   4669 
   4670 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4671 _mm_rorv_epi64 (__m128i __A, __m128i __B)
   4672 {
   4673   return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
   4674 }
   4675 
   4676 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4677 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4678 {
   4679   return (__m128i)__builtin_ia32_selectq_128(__U,
   4680                                              (__v2di)_mm_rorv_epi64(__A, __B),
   4681                                              (__v2di)__W);
   4682 }
   4683 
   4684 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4685 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
   4686 {
   4687   return (__m128i)__builtin_ia32_selectq_128(__U,
   4688                                              (__v2di)_mm_rorv_epi64(__A, __B),
   4689                                              (__v2di)_mm_setzero_si128());
   4690 }
   4691 
   4692 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4693 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
   4694 {
   4695   return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
   4696 }
   4697 
   4698 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4699 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   4700 {
   4701   return (__m256i)__builtin_ia32_selectq_256(__U,
   4702                                             (__v4di)_mm256_rorv_epi64(__A, __B),
   4703                                             (__v4di)__W);
   4704 }
   4705 
   4706 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4707 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
   4708 {
   4709   return (__m256i)__builtin_ia32_selectq_256(__U,
   4710                                             (__v4di)_mm256_rorv_epi64(__A, __B),
   4711                                             (__v4di)_mm256_setzero_si256());
   4712 }
   4713 
   4714 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4715 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   4716 {
   4717   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4718                                              (__v2di)_mm_sllv_epi64(__X, __Y),
   4719                                              (__v2di)__W);
   4720 }
   4721 
   4722 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4723 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   4724 {
   4725   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4726                                              (__v2di)_mm_sllv_epi64(__X, __Y),
   4727                                              (__v2di)_mm_setzero_si128());
   4728 }
   4729 
   4730 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4731 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   4732 {
   4733   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4734                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
   4735                                             (__v4di)__W);
   4736 }
   4737 
   4738 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4739 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
   4740 {
   4741   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4742                                             (__v4di)_mm256_sllv_epi64(__X, __Y),
   4743                                             (__v4di)_mm256_setzero_si256());
   4744 }
   4745 
   4746 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4747 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   4748 {
   4749   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4750                                              (__v4si)_mm_sllv_epi32(__X, __Y),
   4751                                              (__v4si)__W);
   4752 }
   4753 
   4754 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4755 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   4756 {
   4757   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4758                                              (__v4si)_mm_sllv_epi32(__X, __Y),
   4759                                              (__v4si)_mm_setzero_si128());
   4760 }
   4761 
   4762 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4763 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   4764 {
   4765   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4766                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
   4767                                             (__v8si)__W);
   4768 }
   4769 
   4770 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4771 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   4772 {
   4773   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4774                                             (__v8si)_mm256_sllv_epi32(__X, __Y),
   4775                                             (__v8si)_mm256_setzero_si256());
   4776 }
   4777 
   4778 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4779 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   4780 {
   4781   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4782                                              (__v2di)_mm_srlv_epi64(__X, __Y),
   4783                                              (__v2di)__W);
   4784 }
   4785 
   4786 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4787 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   4788 {
   4789   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4790                                              (__v2di)_mm_srlv_epi64(__X, __Y),
   4791                                              (__v2di)_mm_setzero_si128());
   4792 }
   4793 
   4794 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4795 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   4796 {
   4797   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4798                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
   4799                                             (__v4di)__W);
   4800 }
   4801 
   4802 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4803 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
   4804 {
   4805   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4806                                             (__v4di)_mm256_srlv_epi64(__X, __Y),
   4807                                             (__v4di)_mm256_setzero_si256());
   4808 }
   4809 
   4810 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4811 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   4812 {
   4813   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4814                                             (__v4si)_mm_srlv_epi32(__X, __Y),
   4815                                             (__v4si)__W);
   4816 }
   4817 
   4818 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4819 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   4820 {
   4821   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4822                                             (__v4si)_mm_srlv_epi32(__X, __Y),
   4823                                             (__v4si)_mm_setzero_si128());
   4824 }
   4825 
   4826 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4827 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   4828 {
   4829   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4830                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
   4831                                             (__v8si)__W);
   4832 }
   4833 
   4834 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4835 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   4836 {
   4837   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4838                                             (__v8si)_mm256_srlv_epi32(__X, __Y),
   4839                                             (__v8si)_mm256_setzero_si256());
   4840 }
   4841 
   4842 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4843 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4844 {
   4845   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4846                                              (__v4si)_mm_srl_epi32(__A, __B),
   4847                                              (__v4si)__W);
   4848 }
   4849 
   4850 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4851 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   4852 {
   4853   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4854                                              (__v4si)_mm_srl_epi32(__A, __B),
   4855                                              (__v4si)_mm_setzero_si128());
   4856 }
   4857 
   4858 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4859 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   4860 {
   4861   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4862                                              (__v8si)_mm256_srl_epi32(__A, __B),
   4863                                              (__v8si)__W);
   4864 }
   4865 
   4866 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4867 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   4868 {
   4869   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4870                                              (__v8si)_mm256_srl_epi32(__A, __B),
   4871                                              (__v8si)_mm256_setzero_si256());
   4872 }
   4873 
   4874 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4875 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
   4876 {
   4877   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4878                                              (__v4si)_mm_srli_epi32(__A, (int)__B),
   4879                                              (__v4si)__W);
   4880 }
   4881 
   4882 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4883 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
   4884 {
   4885   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4886                                              (__v4si)_mm_srli_epi32(__A, (int)__B),
   4887                                              (__v4si)_mm_setzero_si128());
   4888 }
   4889 
   4890 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4891 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
   4892 {
   4893   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4894                                              (__v8si)_mm256_srli_epi32(__A, (int)__B),
   4895                                              (__v8si)__W);
   4896 }
   4897 
   4898 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4899 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
   4900 {
   4901   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4902                                              (__v8si)_mm256_srli_epi32(__A, (int)__B),
   4903                                              (__v8si)_mm256_setzero_si256());
   4904 }
   4905 
   4906 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4907 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   4908 {
   4909   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4910                                              (__v2di)_mm_srl_epi64(__A, __B),
   4911                                              (__v2di)__W);
   4912 }
   4913 
   4914 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4915 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   4916 {
   4917   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4918                                              (__v2di)_mm_srl_epi64(__A, __B),
   4919                                              (__v2di)_mm_setzero_si128());
   4920 }
   4921 
   4922 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4923 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   4924 {
   4925   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4926                                              (__v4di)_mm256_srl_epi64(__A, __B),
   4927                                              (__v4di)__W);
   4928 }
   4929 
   4930 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4931 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   4932 {
   4933   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4934                                              (__v4di)_mm256_srl_epi64(__A, __B),
   4935                                              (__v4di)_mm256_setzero_si256());
   4936 }
   4937 
   4938 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4939 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
   4940 {
   4941   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4942                                              (__v2di)_mm_srli_epi64(__A, (int)__B),
   4943                                              (__v2di)__W);
   4944 }
   4945 
   4946 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4947 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, unsigned int __B)
   4948 {
   4949   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   4950                                              (__v2di)_mm_srli_epi64(__A, (int)__B),
   4951                                              (__v2di)_mm_setzero_si128());
   4952 }
   4953 
   4954 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4955 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
   4956 {
   4957   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4958                                              (__v4di)_mm256_srli_epi64(__A, (int)__B),
   4959                                              (__v4di)__W);
   4960 }
   4961 
   4962 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4963 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, unsigned int __B)
   4964 {
   4965   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   4966                                              (__v4di)_mm256_srli_epi64(__A, (int)__B),
   4967                                              (__v4di)_mm256_setzero_si256());
   4968 }
   4969 
   4970 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4971 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   4972 {
   4973   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4974                                             (__v4si)_mm_srav_epi32(__X, __Y),
   4975                                             (__v4si)__W);
   4976 }
   4977 
   4978 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   4979 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
   4980 {
   4981   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   4982                                             (__v4si)_mm_srav_epi32(__X, __Y),
   4983                                             (__v4si)_mm_setzero_si128());
   4984 }
   4985 
   4986 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4987 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   4988 {
   4989   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4990                                             (__v8si)_mm256_srav_epi32(__X, __Y),
   4991                                             (__v8si)__W);
   4992 }
   4993 
   4994 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   4995 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
   4996 {
   4997   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   4998                                             (__v8si)_mm256_srav_epi32(__X, __Y),
   4999                                             (__v8si)_mm256_setzero_si256());
   5000 }
   5001 
   5002 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5003 _mm_srav_epi64(__m128i __X, __m128i __Y)
   5004 {
   5005   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
   5006 }
   5007 
   5008 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5009 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
   5010 {
   5011   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5012                                              (__v2di)_mm_srav_epi64(__X, __Y),
   5013                                              (__v2di)__W);
   5014 }
   5015 
   5016 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5017 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
   5018 {
   5019   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   5020                                              (__v2di)_mm_srav_epi64(__X, __Y),
   5021                                              (__v2di)_mm_setzero_si128());
   5022 }
   5023 
   5024 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5025 _mm256_srav_epi64(__m256i __X, __m256i __Y)
   5026 {
   5027   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
   5028 }
   5029 
   5030 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5031 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
   5032 {
   5033   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5034                                              (__v4di)_mm256_srav_epi64(__X, __Y),
   5035                                              (__v4di)__W);
   5036 }
   5037 
   5038 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5039 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
   5040 {
   5041   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   5042                                              (__v4di)_mm256_srav_epi64(__X, __Y),
   5043                                              (__v4di)_mm256_setzero_si256());
   5044 }
   5045 
   5046 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5047 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
   5048 {
   5049   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   5050                  (__v4si) __A,
   5051                  (__v4si) __W);
   5052 }
   5053 
   5054 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5055 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
   5056 {
   5057   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
   5058                  (__v4si) __A,
   5059                  (__v4si) _mm_setzero_si128 ());
   5060 }
   5061 
   5062 
   5063 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5064 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
   5065 {
   5066   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   5067                  (__v8si) __A,
   5068                  (__v8si) __W);
   5069 }
   5070 
   5071 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5072 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
   5073 {
   5074   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
   5075                  (__v8si) __A,
   5076                  (__v8si) _mm256_setzero_si256 ());
   5077 }
   5078 
   5079 static __inline __m128i __DEFAULT_FN_ATTRS128
   5080 _mm_load_epi32 (void const *__P)
   5081 {
   5082   return *(const __m128i *) __P;
   5083 }
   5084 
   5085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5086 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
   5087 {
   5088   return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
   5089               (__v4si) __W,
   5090               (__mmask8)
   5091               __U);
   5092 }
   5093 
   5094 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5095 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
   5096 {
   5097   return (__m128i) __builtin_ia32_movdqa32load128_mask ((const __v4si *) __P,
   5098               (__v4si)
   5099               _mm_setzero_si128 (),
   5100               (__mmask8)
   5101               __U);
   5102 }
   5103 
   5104 static __inline __m256i __DEFAULT_FN_ATTRS256
   5105 _mm256_load_epi32 (void const *__P)
   5106 {
   5107   return *(const __m256i *) __P;
   5108 }
   5109 
   5110 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5111 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
   5112 {
   5113   return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
   5114               (__v8si) __W,
   5115               (__mmask8)
   5116               __U);
   5117 }
   5118 
   5119 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5120 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
   5121 {
   5122   return (__m256i) __builtin_ia32_movdqa32load256_mask ((const __v8si *) __P,
   5123               (__v8si)
   5124               _mm256_setzero_si256 (),
   5125               (__mmask8)
   5126               __U);
   5127 }
   5128 
   5129 static __inline void __DEFAULT_FN_ATTRS128
   5130 _mm_store_epi32 (void *__P, __m128i __A)
   5131 {
   5132   *(__m128i *) __P = __A;
   5133 }
   5134 
   5135 static __inline__ void __DEFAULT_FN_ATTRS128
   5136 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
   5137 {
   5138   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
   5139           (__v4si) __A,
   5140           (__mmask8) __U);
   5141 }
   5142 
   5143 static __inline void __DEFAULT_FN_ATTRS256
   5144 _mm256_store_epi32 (void *__P, __m256i __A)
   5145 {
   5146   *(__m256i *) __P = __A;
   5147 }
   5148 
   5149 static __inline__ void __DEFAULT_FN_ATTRS256
   5150 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
   5151 {
   5152   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
   5153           (__v8si) __A,
   5154           (__mmask8) __U);
   5155 }
   5156 
   5157 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5158 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
   5159 {
   5160   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   5161                  (__v2di) __A,
   5162                  (__v2di) __W);
   5163 }
   5164 
   5165 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5166 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
   5167 {
   5168   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
   5169                  (__v2di) __A,
   5170                  (__v2di) _mm_setzero_si128 ());
   5171 }
   5172 
   5173 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5174 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
   5175 {
   5176   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   5177                  (__v4di) __A,
   5178                  (__v4di) __W);
   5179 }
   5180 
   5181 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5182 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
   5183 {
   5184   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
   5185                  (__v4di) __A,
   5186                  (__v4di) _mm256_setzero_si256 ());
   5187 }
   5188 
   5189 static __inline __m128i __DEFAULT_FN_ATTRS128
   5190 _mm_load_epi64 (void const *__P)
   5191 {
   5192   return *(const __m128i *) __P;
   5193 }
   5194 
   5195 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5196 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
   5197 {
   5198   return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
   5199               (__v2di) __W,
   5200               (__mmask8)
   5201               __U);
   5202 }
   5203 
   5204 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5205 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5206 {
   5207   return (__m128i) __builtin_ia32_movdqa64load128_mask ((const __v2di *) __P,
   5208               (__v2di)
   5209               _mm_setzero_si128 (),
   5210               (__mmask8)
   5211               __U);
   5212 }
   5213 
   5214 static __inline __m256i __DEFAULT_FN_ATTRS256
   5215 _mm256_load_epi64 (void const *__P)
   5216 {
   5217   return *(const __m256i *) __P;
   5218 }
   5219 
   5220 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5221 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
   5222 {
   5223   return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
   5224               (__v4di) __W,
   5225               (__mmask8)
   5226               __U);
   5227 }
   5228 
   5229 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5230 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
   5231 {
   5232   return (__m256i) __builtin_ia32_movdqa64load256_mask ((const __v4di *) __P,
   5233               (__v4di)
   5234               _mm256_setzero_si256 (),
   5235               (__mmask8)
   5236               __U);
   5237 }
   5238 
   5239 static __inline void __DEFAULT_FN_ATTRS128
   5240 _mm_store_epi64 (void *__P, __m128i __A)
   5241 {
   5242   *(__m128i *) __P = __A;
   5243 }
   5244 
   5245 static __inline__ void __DEFAULT_FN_ATTRS128
   5246 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
   5247 {
   5248   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
   5249           (__v2di) __A,
   5250           (__mmask8) __U);
   5251 }
   5252 
   5253 static __inline void __DEFAULT_FN_ATTRS256
   5254 _mm256_store_epi64 (void *__P, __m256i __A)
   5255 {
   5256   *(__m256i *) __P = __A;
   5257 }
   5258 
   5259 static __inline__ void __DEFAULT_FN_ATTRS256
   5260 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
   5261 {
   5262   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
   5263           (__v4di) __A,
   5264           (__mmask8) __U);
   5265 }
   5266 
   5267 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5268 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
   5269 {
   5270   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5271                                               (__v2df)_mm_movedup_pd(__A),
   5272                                               (__v2df)__W);
   5273 }
   5274 
   5275 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5276 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
   5277 {
   5278   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5279                                               (__v2df)_mm_movedup_pd(__A),
   5280                                               (__v2df)_mm_setzero_pd());
   5281 }
   5282 
   5283 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5284 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
   5285 {
   5286   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5287                                               (__v4df)_mm256_movedup_pd(__A),
   5288                                               (__v4df)__W);
   5289 }
   5290 
   5291 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5292 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
   5293 {
   5294   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5295                                               (__v4df)_mm256_movedup_pd(__A),
   5296                                               (__v4df)_mm256_setzero_pd());
   5297 }
   5298 
   5299 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5300 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
   5301 {
   5302    return (__m128i)__builtin_ia32_selectd_128(__M,
   5303                                               (__v4si) _mm_set1_epi32(__A),
   5304                                               (__v4si)__O);
   5305 }
   5306 
   5307 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5308 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
   5309 {
   5310    return (__m128i)__builtin_ia32_selectd_128(__M,
   5311                                               (__v4si) _mm_set1_epi32(__A),
   5312                                               (__v4si)_mm_setzero_si128());
   5313 }
   5314 
   5315 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5316 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
   5317 {
   5318    return (__m256i)__builtin_ia32_selectd_256(__M,
   5319                                               (__v8si) _mm256_set1_epi32(__A),
   5320                                               (__v8si)__O);
   5321 }
   5322 
   5323 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5324 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
   5325 {
   5326    return (__m256i)__builtin_ia32_selectd_256(__M,
   5327                                               (__v8si) _mm256_set1_epi32(__A),
   5328                                               (__v8si)_mm256_setzero_si256());
   5329 }
   5330 
   5331 
   5332 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5333 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
   5334 {
   5335   return (__m128i) __builtin_ia32_selectq_128(__M,
   5336                                               (__v2di) _mm_set1_epi64x(__A),
   5337                                               (__v2di) __O);
   5338 }
   5339 
   5340 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5341 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
   5342 {
   5343   return (__m128i) __builtin_ia32_selectq_128(__M,
   5344                                               (__v2di) _mm_set1_epi64x(__A),
   5345                                               (__v2di) _mm_setzero_si128());
   5346 }
   5347 
   5348 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5349 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
   5350 {
   5351   return (__m256i) __builtin_ia32_selectq_256(__M,
   5352                                               (__v4di) _mm256_set1_epi64x(__A),
   5353                                               (__v4di) __O) ;
   5354 }
   5355 
   5356 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5357 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
   5358 {
   5359    return (__m256i) __builtin_ia32_selectq_256(__M,
   5360                                                (__v4di) _mm256_set1_epi64x(__A),
   5361                                                (__v4di) _mm256_setzero_si256());
   5362 }
   5363 
   5364 #define _mm_fixupimm_pd(A, B, C, imm) \
   5365   ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
   5366                                               (__v2df)(__m128d)(B), \
   5367                                               (__v2di)(__m128i)(C), (int)(imm), \
   5368                                               (__mmask8)-1))
   5369 
   5370 #define _mm_mask_fixupimm_pd(A, U, B, C, imm) \
   5371   ((__m128d)__builtin_ia32_fixupimmpd128_mask((__v2df)(__m128d)(A), \
   5372                                               (__v2df)(__m128d)(B), \
   5373                                               (__v2di)(__m128i)(C), (int)(imm), \
   5374                                               (__mmask8)(U)))
   5375 
   5376 #define _mm_maskz_fixupimm_pd(U, A, B, C, imm) \
   5377   ((__m128d)__builtin_ia32_fixupimmpd128_maskz((__v2df)(__m128d)(A), \
   5378                                                (__v2df)(__m128d)(B), \
   5379                                                (__v2di)(__m128i)(C), \
   5380                                                (int)(imm), (__mmask8)(U)))
   5381 
   5382 #define _mm256_fixupimm_pd(A, B, C, imm) \
   5383   ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
   5384                                               (__v4df)(__m256d)(B), \
   5385                                               (__v4di)(__m256i)(C), (int)(imm), \
   5386                                               (__mmask8)-1))
   5387 
   5388 #define _mm256_mask_fixupimm_pd(A, U, B, C, imm) \
   5389   ((__m256d)__builtin_ia32_fixupimmpd256_mask((__v4df)(__m256d)(A), \
   5390                                               (__v4df)(__m256d)(B), \
   5391                                               (__v4di)(__m256i)(C), (int)(imm), \
   5392                                               (__mmask8)(U)))
   5393 
   5394 #define _mm256_maskz_fixupimm_pd(U, A, B, C, imm) \
   5395   ((__m256d)__builtin_ia32_fixupimmpd256_maskz((__v4df)(__m256d)(A), \
   5396                                                (__v4df)(__m256d)(B), \
   5397                                                (__v4di)(__m256i)(C), \
   5398                                                (int)(imm), (__mmask8)(U)))
   5399 
   5400 #define _mm_fixupimm_ps(A, B, C, imm) \
   5401   ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
   5402                                              (__v4sf)(__m128)(B), \
   5403                                              (__v4si)(__m128i)(C), (int)(imm), \
   5404                                              (__mmask8)-1))
   5405 
   5406 #define _mm_mask_fixupimm_ps(A, U, B, C, imm) \
   5407   ((__m128)__builtin_ia32_fixupimmps128_mask((__v4sf)(__m128)(A), \
   5408                                              (__v4sf)(__m128)(B), \
   5409                                              (__v4si)(__m128i)(C), (int)(imm), \
   5410                                              (__mmask8)(U)))
   5411 
   5412 #define _mm_maskz_fixupimm_ps(U, A, B, C, imm) \
   5413   ((__m128)__builtin_ia32_fixupimmps128_maskz((__v4sf)(__m128)(A), \
   5414                                               (__v4sf)(__m128)(B), \
   5415                                               (__v4si)(__m128i)(C), (int)(imm), \
   5416                                               (__mmask8)(U)))
   5417 
   5418 #define _mm256_fixupimm_ps(A, B, C, imm) \
   5419   ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
   5420                                              (__v8sf)(__m256)(B), \
   5421                                              (__v8si)(__m256i)(C), (int)(imm), \
   5422                                              (__mmask8)-1))
   5423 
   5424 #define _mm256_mask_fixupimm_ps(A, U, B, C, imm) \
   5425   ((__m256)__builtin_ia32_fixupimmps256_mask((__v8sf)(__m256)(A), \
   5426                                              (__v8sf)(__m256)(B), \
   5427                                              (__v8si)(__m256i)(C), (int)(imm), \
   5428                                              (__mmask8)(U)))
   5429 
   5430 #define _mm256_maskz_fixupimm_ps(U, A, B, C, imm) \
   5431   ((__m256)__builtin_ia32_fixupimmps256_maskz((__v8sf)(__m256)(A), \
   5432                                               (__v8sf)(__m256)(B), \
   5433                                               (__v8si)(__m256i)(C), (int)(imm), \
   5434                                               (__mmask8)(U)))
   5435 
   5436 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5437 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
   5438 {
   5439   return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
   5440                (__v2df) __W,
   5441                (__mmask8) __U);
   5442 }
   5443 
   5444 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5445 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
   5446 {
   5447   return (__m128d) __builtin_ia32_loadapd128_mask ((const __v2df *) __P,
   5448                (__v2df)
   5449                _mm_setzero_pd (),
   5450                (__mmask8) __U);
   5451 }
   5452 
   5453 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5454 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
   5455 {
   5456   return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
   5457                (__v4df) __W,
   5458                (__mmask8) __U);
   5459 }
   5460 
   5461 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5462 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
   5463 {
   5464   return (__m256d) __builtin_ia32_loadapd256_mask ((const __v4df *) __P,
   5465                (__v4df)
   5466                _mm256_setzero_pd (),
   5467                (__mmask8) __U);
   5468 }
   5469 
   5470 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5471 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
   5472 {
   5473   return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
   5474               (__v4sf) __W,
   5475               (__mmask8) __U);
   5476 }
   5477 
   5478 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5479 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
   5480 {
   5481   return (__m128) __builtin_ia32_loadaps128_mask ((const __v4sf *) __P,
   5482               (__v4sf)
   5483               _mm_setzero_ps (),
   5484               (__mmask8) __U);
   5485 }
   5486 
   5487 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5488 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
   5489 {
   5490   return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
   5491               (__v8sf) __W,
   5492               (__mmask8) __U);
   5493 }
   5494 
   5495 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5496 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
   5497 {
   5498   return (__m256) __builtin_ia32_loadaps256_mask ((const __v8sf *) __P,
   5499               (__v8sf)
   5500               _mm256_setzero_ps (),
   5501               (__mmask8) __U);
   5502 }
   5503 
   5504 static __inline __m128i __DEFAULT_FN_ATTRS128
   5505 _mm_loadu_epi64 (void const *__P)
   5506 {
   5507   struct __loadu_epi64 {
   5508     __m128i_u __v;
   5509   } __attribute__((__packed__, __may_alias__));
   5510   return ((const struct __loadu_epi64*)__P)->__v;
   5511 }
   5512 
   5513 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5514 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
   5515 {
   5516   return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
   5517                  (__v2di) __W,
   5518                  (__mmask8) __U);
   5519 }
   5520 
   5521 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5522 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
   5523 {
   5524   return (__m128i) __builtin_ia32_loaddqudi128_mask ((const __v2di *) __P,
   5525                  (__v2di)
   5526                  _mm_setzero_si128 (),
   5527                  (__mmask8) __U);
   5528 }
   5529 
   5530 static __inline __m256i __DEFAULT_FN_ATTRS256
   5531 _mm256_loadu_epi64 (void const *__P)
   5532 {
   5533   struct __loadu_epi64 {
   5534     __m256i_u __v;
   5535   } __attribute__((__packed__, __may_alias__));
   5536   return ((const struct __loadu_epi64*)__P)->__v;
   5537 }
   5538 
   5539 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5540 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
   5541 {
   5542   return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
   5543                  (__v4di) __W,
   5544                  (__mmask8) __U);
   5545 }
   5546 
   5547 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5548 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
   5549 {
   5550   return (__m256i) __builtin_ia32_loaddqudi256_mask ((const __v4di *) __P,
   5551                  (__v4di)
   5552                  _mm256_setzero_si256 (),
   5553                  (__mmask8) __U);
   5554 }
   5555 
   5556 static __inline __m128i __DEFAULT_FN_ATTRS128
   5557 _mm_loadu_epi32 (void const *__P)
   5558 {
   5559   struct __loadu_epi32 {
   5560     __m128i_u __v;
   5561   } __attribute__((__packed__, __may_alias__));
   5562   return ((const struct __loadu_epi32*)__P)->__v;
   5563 }
   5564 
   5565 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5566 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
   5567 {
   5568   return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
   5569                  (__v4si) __W,
   5570                  (__mmask8) __U);
   5571 }
   5572 
   5573 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   5574 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
   5575 {
   5576   return (__m128i) __builtin_ia32_loaddqusi128_mask ((const __v4si *) __P,
   5577                  (__v4si)
   5578                  _mm_setzero_si128 (),
   5579                  (__mmask8) __U);
   5580 }
   5581 
   5582 static __inline __m256i __DEFAULT_FN_ATTRS256
   5583 _mm256_loadu_epi32 (void const *__P)
   5584 {
   5585   struct __loadu_epi32 {
   5586     __m256i_u __v;
   5587   } __attribute__((__packed__, __may_alias__));
   5588   return ((const struct __loadu_epi32*)__P)->__v;
   5589 }
   5590 
   5591 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5592 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
   5593 {
   5594   return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
   5595                  (__v8si) __W,
   5596                  (__mmask8) __U);
   5597 }
   5598 
   5599 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   5600 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
   5601 {
   5602   return (__m256i) __builtin_ia32_loaddqusi256_mask ((const __v8si *) __P,
   5603                  (__v8si)
   5604                  _mm256_setzero_si256 (),
   5605                  (__mmask8) __U);
   5606 }
   5607 
   5608 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5609 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
   5610 {
   5611   return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
   5612                (__v2df) __W,
   5613                (__mmask8) __U);
   5614 }
   5615 
   5616 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5617 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
   5618 {
   5619   return (__m128d) __builtin_ia32_loadupd128_mask ((const __v2df *) __P,
   5620                (__v2df)
   5621                _mm_setzero_pd (),
   5622                (__mmask8) __U);
   5623 }
   5624 
   5625 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5626 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
   5627 {
   5628   return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
   5629                (__v4df) __W,
   5630                (__mmask8) __U);
   5631 }
   5632 
   5633 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5634 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
   5635 {
   5636   return (__m256d) __builtin_ia32_loadupd256_mask ((const __v4df *) __P,
   5637                (__v4df)
   5638                _mm256_setzero_pd (),
   5639                (__mmask8) __U);
   5640 }
   5641 
   5642 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5643 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
   5644 {
   5645   return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
   5646               (__v4sf) __W,
   5647               (__mmask8) __U);
   5648 }
   5649 
   5650 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5651 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
   5652 {
   5653   return (__m128) __builtin_ia32_loadups128_mask ((const __v4sf *) __P,
   5654               (__v4sf)
   5655               _mm_setzero_ps (),
   5656               (__mmask8) __U);
   5657 }
   5658 
   5659 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5660 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
   5661 {
   5662   return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
   5663               (__v8sf) __W,
   5664               (__mmask8) __U);
   5665 }
   5666 
   5667 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5668 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
   5669 {
   5670   return (__m256) __builtin_ia32_loadups256_mask ((const __v8sf *) __P,
   5671               (__v8sf)
   5672               _mm256_setzero_ps (),
   5673               (__mmask8) __U);
   5674 }
   5675 
   5676 static __inline__ void __DEFAULT_FN_ATTRS128
   5677 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
   5678 {
   5679   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
   5680            (__v2df) __A,
   5681            (__mmask8) __U);
   5682 }
   5683 
   5684 static __inline__ void __DEFAULT_FN_ATTRS256
   5685 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
   5686 {
   5687   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
   5688            (__v4df) __A,
   5689            (__mmask8) __U);
   5690 }
   5691 
   5692 static __inline__ void __DEFAULT_FN_ATTRS128
   5693 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
   5694 {
   5695   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
   5696            (__v4sf) __A,
   5697            (__mmask8) __U);
   5698 }
   5699 
   5700 static __inline__ void __DEFAULT_FN_ATTRS256
   5701 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
   5702 {
   5703   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
   5704            (__v8sf) __A,
   5705            (__mmask8) __U);
   5706 }
   5707 
   5708 static __inline void __DEFAULT_FN_ATTRS128
   5709 _mm_storeu_epi64 (void *__P, __m128i __A)
   5710 {
   5711   struct __storeu_epi64 {
   5712     __m128i_u __v;
   5713   } __attribute__((__packed__, __may_alias__));
   5714   ((struct __storeu_epi64*)__P)->__v = __A;
   5715 }
   5716 
   5717 static __inline__ void __DEFAULT_FN_ATTRS128
   5718 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
   5719 {
   5720   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
   5721              (__v2di) __A,
   5722              (__mmask8) __U);
   5723 }
   5724 
   5725 static __inline void __DEFAULT_FN_ATTRS256
   5726 _mm256_storeu_epi64 (void *__P, __m256i __A)
   5727 {
   5728   struct __storeu_epi64 {
   5729     __m256i_u __v;
   5730   } __attribute__((__packed__, __may_alias__));
   5731   ((struct __storeu_epi64*)__P)->__v = __A;
   5732 }
   5733 
   5734 static __inline__ void __DEFAULT_FN_ATTRS256
   5735 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
   5736 {
   5737   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
   5738              (__v4di) __A,
   5739              (__mmask8) __U);
   5740 }
   5741 
   5742 static __inline void __DEFAULT_FN_ATTRS128
   5743 _mm_storeu_epi32 (void *__P, __m128i __A)
   5744 {
   5745   struct __storeu_epi32 {
   5746     __m128i_u __v;
   5747   } __attribute__((__packed__, __may_alias__));
   5748   ((struct __storeu_epi32*)__P)->__v = __A;
   5749 }
   5750 
   5751 static __inline__ void __DEFAULT_FN_ATTRS128
   5752 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
   5753 {
   5754   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
   5755              (__v4si) __A,
   5756              (__mmask8) __U);
   5757 }
   5758 
   5759 static __inline void __DEFAULT_FN_ATTRS256
   5760 _mm256_storeu_epi32 (void *__P, __m256i __A)
   5761 {
   5762   struct __storeu_epi32 {
   5763     __m256i_u __v;
   5764   } __attribute__((__packed__, __may_alias__));
   5765   ((struct __storeu_epi32*)__P)->__v = __A;
   5766 }
   5767 
   5768 static __inline__ void __DEFAULT_FN_ATTRS256
   5769 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
   5770 {
   5771   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
   5772              (__v8si) __A,
   5773              (__mmask8) __U);
   5774 }
   5775 
   5776 static __inline__ void __DEFAULT_FN_ATTRS128
   5777 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
   5778 {
   5779   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
   5780            (__v2df) __A,
   5781            (__mmask8) __U);
   5782 }
   5783 
   5784 static __inline__ void __DEFAULT_FN_ATTRS256
   5785 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
   5786 {
   5787   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
   5788            (__v4df) __A,
   5789            (__mmask8) __U);
   5790 }
   5791 
   5792 static __inline__ void __DEFAULT_FN_ATTRS128
   5793 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
   5794 {
   5795   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
   5796            (__v4sf) __A,
   5797            (__mmask8) __U);
   5798 }
   5799 
   5800 static __inline__ void __DEFAULT_FN_ATTRS256
   5801 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
   5802 {
   5803   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
   5804            (__v8sf) __A,
   5805            (__mmask8) __U);
   5806 }
   5807 
   5808 
   5809 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5810 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5811 {
   5812   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5813                                               (__v2df)_mm_unpackhi_pd(__A, __B),
   5814                                               (__v2df)__W);
   5815 }
   5816 
   5817 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5818 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
   5819 {
   5820   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5821                                               (__v2df)_mm_unpackhi_pd(__A, __B),
   5822                                               (__v2df)_mm_setzero_pd());
   5823 }
   5824 
   5825 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5826 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
   5827 {
   5828   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5829                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
   5830                                            (__v4df)__W);
   5831 }
   5832 
   5833 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5834 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
   5835 {
   5836   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5837                                            (__v4df)_mm256_unpackhi_pd(__A, __B),
   5838                                            (__v4df)_mm256_setzero_pd());
   5839 }
   5840 
   5841 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5842 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5843 {
   5844   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   5845                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
   5846                                              (__v4sf)__W);
   5847 }
   5848 
   5849 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5850 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
   5851 {
   5852   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   5853                                              (__v4sf)_mm_unpackhi_ps(__A, __B),
   5854                                              (__v4sf)_mm_setzero_ps());
   5855 }
   5856 
   5857 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5858 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
   5859 {
   5860   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   5861                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
   5862                                            (__v8sf)__W);
   5863 }
   5864 
   5865 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5866 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
   5867 {
   5868   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   5869                                            (__v8sf)_mm256_unpackhi_ps(__A, __B),
   5870                                            (__v8sf)_mm256_setzero_ps());
   5871 }
   5872 
   5873 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5874 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
   5875 {
   5876   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5877                                               (__v2df)_mm_unpacklo_pd(__A, __B),
   5878                                               (__v2df)__W);
   5879 }
   5880 
   5881 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5882 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
   5883 {
   5884   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   5885                                               (__v2df)_mm_unpacklo_pd(__A, __B),
   5886                                               (__v2df)_mm_setzero_pd());
   5887 }
   5888 
   5889 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5890 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
   5891 {
   5892   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5893                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
   5894                                            (__v4df)__W);
   5895 }
   5896 
   5897 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5898 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
   5899 {
   5900   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   5901                                            (__v4df)_mm256_unpacklo_pd(__A, __B),
   5902                                            (__v4df)_mm256_setzero_pd());
   5903 }
   5904 
   5905 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5906 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
   5907 {
   5908   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   5909                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
   5910                                              (__v4sf)__W);
   5911 }
   5912 
   5913 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5914 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
   5915 {
   5916   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   5917                                              (__v4sf)_mm_unpacklo_ps(__A, __B),
   5918                                              (__v4sf)_mm_setzero_ps());
   5919 }
   5920 
   5921 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5922 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
   5923 {
   5924   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   5925                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
   5926                                            (__v8sf)__W);
   5927 }
   5928 
   5929 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   5930 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
   5931 {
   5932   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   5933                                            (__v8sf)_mm256_unpacklo_ps(__A, __B),
   5934                                            (__v8sf)_mm256_setzero_ps());
   5935 }
   5936 
   5937 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5938 _mm_rcp14_pd (__m128d __A)
   5939 {
   5940   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   5941                 (__v2df)
   5942                 _mm_setzero_pd (),
   5943                 (__mmask8) -1);
   5944 }
   5945 
   5946 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5947 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
   5948 {
   5949   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   5950                 (__v2df) __W,
   5951                 (__mmask8) __U);
   5952 }
   5953 
   5954 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   5955 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
   5956 {
   5957   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
   5958                 (__v2df)
   5959                 _mm_setzero_pd (),
   5960                 (__mmask8) __U);
   5961 }
   5962 
   5963 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5964 _mm256_rcp14_pd (__m256d __A)
   5965 {
   5966   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   5967                 (__v4df)
   5968                 _mm256_setzero_pd (),
   5969                 (__mmask8) -1);
   5970 }
   5971 
   5972 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5973 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
   5974 {
   5975   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   5976                 (__v4df) __W,
   5977                 (__mmask8) __U);
   5978 }
   5979 
   5980 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   5981 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
   5982 {
   5983   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
   5984                 (__v4df)
   5985                 _mm256_setzero_pd (),
   5986                 (__mmask8) __U);
   5987 }
   5988 
   5989 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5990 _mm_rcp14_ps (__m128 __A)
   5991 {
   5992   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   5993                (__v4sf)
   5994                _mm_setzero_ps (),
   5995                (__mmask8) -1);
   5996 }
   5997 
   5998 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   5999 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
   6000 {
   6001   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   6002                (__v4sf) __W,
   6003                (__mmask8) __U);
   6004 }
   6005 
   6006 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6007 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
   6008 {
   6009   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
   6010                (__v4sf)
   6011                _mm_setzero_ps (),
   6012                (__mmask8) __U);
   6013 }
   6014 
   6015 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6016 _mm256_rcp14_ps (__m256 __A)
   6017 {
   6018   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6019                (__v8sf)
   6020                _mm256_setzero_ps (),
   6021                (__mmask8) -1);
   6022 }
   6023 
   6024 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6025 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
   6026 {
   6027   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6028                (__v8sf) __W,
   6029                (__mmask8) __U);
   6030 }
   6031 
   6032 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6033 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
   6034 {
   6035   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
   6036                (__v8sf)
   6037                _mm256_setzero_ps (),
   6038                (__mmask8) __U);
   6039 }
   6040 
   6041 #define _mm_mask_permute_pd(W, U, X, C) \
   6042   ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6043                                         (__v2df)_mm_permute_pd((X), (C)), \
   6044                                         (__v2df)(__m128d)(W)))
   6045 
   6046 #define _mm_maskz_permute_pd(U, X, C) \
   6047   ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6048                                         (__v2df)_mm_permute_pd((X), (C)), \
   6049                                         (__v2df)_mm_setzero_pd()))
   6050 
   6051 #define _mm256_mask_permute_pd(W, U, X, C) \
   6052   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6053                                         (__v4df)_mm256_permute_pd((X), (C)), \
   6054                                         (__v4df)(__m256d)(W)))
   6055 
   6056 #define _mm256_maskz_permute_pd(U, X, C) \
   6057   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6058                                         (__v4df)_mm256_permute_pd((X), (C)), \
   6059                                         (__v4df)_mm256_setzero_pd()))
   6060 
   6061 #define _mm_mask_permute_ps(W, U, X, C) \
   6062   ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6063                                        (__v4sf)_mm_permute_ps((X), (C)), \
   6064                                        (__v4sf)(__m128)(W)))
   6065 
   6066 #define _mm_maskz_permute_ps(U, X, C) \
   6067   ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6068                                        (__v4sf)_mm_permute_ps((X), (C)), \
   6069                                        (__v4sf)_mm_setzero_ps()))
   6070 
   6071 #define _mm256_mask_permute_ps(W, U, X, C) \
   6072   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6073                                        (__v8sf)_mm256_permute_ps((X), (C)), \
   6074                                        (__v8sf)(__m256)(W)))
   6075 
   6076 #define _mm256_maskz_permute_ps(U, X, C) \
   6077   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6078                                        (__v8sf)_mm256_permute_ps((X), (C)), \
   6079                                        (__v8sf)_mm256_setzero_ps()))
   6080 
   6081 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   6082 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
   6083 {
   6084   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6085                                             (__v2df)_mm_permutevar_pd(__A, __C),
   6086                                             (__v2df)__W);
   6087 }
   6088 
   6089 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   6090 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
   6091 {
   6092   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
   6093                                             (__v2df)_mm_permutevar_pd(__A, __C),
   6094                                             (__v2df)_mm_setzero_pd());
   6095 }
   6096 
   6097 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6098 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
   6099 {
   6100   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6101                                          (__v4df)_mm256_permutevar_pd(__A, __C),
   6102                                          (__v4df)__W);
   6103 }
   6104 
   6105 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6106 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
   6107 {
   6108   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   6109                                          (__v4df)_mm256_permutevar_pd(__A, __C),
   6110                                          (__v4df)_mm256_setzero_pd());
   6111 }
   6112 
   6113 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6114 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
   6115 {
   6116   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6117                                             (__v4sf)_mm_permutevar_ps(__A, __C),
   6118                                             (__v4sf)__W);
   6119 }
   6120 
   6121 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6122 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
   6123 {
   6124   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   6125                                             (__v4sf)_mm_permutevar_ps(__A, __C),
   6126                                             (__v4sf)_mm_setzero_ps());
   6127 }
   6128 
   6129 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6130 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
   6131 {
   6132   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6133                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
   6134                                           (__v8sf)__W);
   6135 }
   6136 
   6137 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6138 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
   6139 {
   6140   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   6141                                           (__v8sf)_mm256_permutevar_ps(__A, __C),
   6142                                           (__v8sf)_mm256_setzero_ps());
   6143 }
   6144 
   6145 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6146 _mm_test_epi32_mask (__m128i __A, __m128i __B)
   6147 {
   6148   return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
   6149 }
   6150 
   6151 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6152 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6153 {
   6154   return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
   6155                                      _mm_setzero_si128());
   6156 }
   6157 
   6158 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6159 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
   6160 {
   6161   return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
   6162                                    _mm256_setzero_si256());
   6163 }
   6164 
   6165 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6166 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6167 {
   6168   return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
   6169                                         _mm256_setzero_si256());
   6170 }
   6171 
   6172 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6173 _mm_test_epi64_mask (__m128i __A, __m128i __B)
   6174 {
   6175   return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
   6176 }
   6177 
   6178 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6179 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6180 {
   6181   return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
   6182                                      _mm_setzero_si128());
   6183 }
   6184 
   6185 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6186 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
   6187 {
   6188   return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
   6189                                    _mm256_setzero_si256());
   6190 }
   6191 
   6192 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6193 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6194 {
   6195   return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
   6196                                         _mm256_setzero_si256());
   6197 }
   6198 
   6199 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6200 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
   6201 {
   6202   return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
   6203 }
   6204 
   6205 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6206 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6207 {
   6208   return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
   6209                                     _mm_setzero_si128());
   6210 }
   6211 
   6212 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6213 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
   6214 {
   6215   return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
   6216                                   _mm256_setzero_si256());
   6217 }
   6218 
   6219 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6220 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6221 {
   6222   return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
   6223                                        _mm256_setzero_si256());
   6224 }
   6225 
   6226 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6227 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
   6228 {
   6229   return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
   6230 }
   6231 
   6232 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
   6233 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
   6234 {
   6235   return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
   6236                                     _mm_setzero_si128());
   6237 }
   6238 
   6239 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6240 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
   6241 {
   6242   return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
   6243                                   _mm256_setzero_si256());
   6244 }
   6245 
   6246 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
   6247 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
   6248 {
   6249   return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
   6250                                        _mm256_setzero_si256());
   6251 }
   6252 
   6253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6254 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6255 {
   6256   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6257                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
   6258                                            (__v4si)__W);
   6259 }
   6260 
   6261 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6262 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6263 {
   6264   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6265                                            (__v4si)_mm_unpackhi_epi32(__A, __B),
   6266                                            (__v4si)_mm_setzero_si128());
   6267 }
   6268 
   6269 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6270 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6271 {
   6272   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6273                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
   6274                                         (__v8si)__W);
   6275 }
   6276 
   6277 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6278 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
   6279 {
   6280   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6281                                         (__v8si)_mm256_unpackhi_epi32(__A, __B),
   6282                                         (__v8si)_mm256_setzero_si256());
   6283 }
   6284 
   6285 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6286 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6287 {
   6288   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6289                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
   6290                                            (__v2di)__W);
   6291 }
   6292 
   6293 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6294 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6295 {
   6296   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6297                                            (__v2di)_mm_unpackhi_epi64(__A, __B),
   6298                                            (__v2di)_mm_setzero_si128());
   6299 }
   6300 
   6301 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6302 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6303 {
   6304   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6305                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
   6306                                         (__v4di)__W);
   6307 }
   6308 
   6309 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6310 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   6311 {
   6312   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6313                                         (__v4di)_mm256_unpackhi_epi64(__A, __B),
   6314                                         (__v4di)_mm256_setzero_si256());
   6315 }
   6316 
   6317 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6318 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6319 {
   6320   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6321                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
   6322                                            (__v4si)__W);
   6323 }
   6324 
   6325 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6326 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6327 {
   6328   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6329                                            (__v4si)_mm_unpacklo_epi32(__A, __B),
   6330                                            (__v4si)_mm_setzero_si128());
   6331 }
   6332 
   6333 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6334 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6335 {
   6336   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6337                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
   6338                                         (__v8si)__W);
   6339 }
   6340 
   6341 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6342 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
   6343 {
   6344   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6345                                         (__v8si)_mm256_unpacklo_epi32(__A, __B),
   6346                                         (__v8si)_mm256_setzero_si256());
   6347 }
   6348 
   6349 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6350 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6351 {
   6352   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6353                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
   6354                                            (__v2di)__W);
   6355 }
   6356 
   6357 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6358 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6359 {
   6360   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
   6361                                            (__v2di)_mm_unpacklo_epi64(__A, __B),
   6362                                            (__v2di)_mm_setzero_si128());
   6363 }
   6364 
   6365 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6366 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
   6367 {
   6368   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6369                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
   6370                                         (__v4di)__W);
   6371 }
   6372 
   6373 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6374 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
   6375 {
   6376   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
   6377                                         (__v4di)_mm256_unpacklo_epi64(__A, __B),
   6378                                         (__v4di)_mm256_setzero_si256());
   6379 }
   6380 
   6381 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6382 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6383 {
   6384   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6385                                              (__v4si)_mm_sra_epi32(__A, __B),
   6386                                              (__v4si)__W);
   6387 }
   6388 
   6389 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6390 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
   6391 {
   6392   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6393                                              (__v4si)_mm_sra_epi32(__A, __B),
   6394                                              (__v4si)_mm_setzero_si128());
   6395 }
   6396 
   6397 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6398 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   6399 {
   6400   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6401                                              (__v8si)_mm256_sra_epi32(__A, __B),
   6402                                              (__v8si)__W);
   6403 }
   6404 
   6405 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6406 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
   6407 {
   6408   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6409                                              (__v8si)_mm256_sra_epi32(__A, __B),
   6410                                              (__v8si)_mm256_setzero_si256());
   6411 }
   6412 
   6413 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6414 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __B)
   6415 {
   6416   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6417                                              (__v4si)_mm_srai_epi32(__A, (int)__B),
   6418                                              (__v4si)__W);
   6419 }
   6420 
   6421 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6422 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, unsigned int __B)
   6423 {
   6424   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
   6425                                              (__v4si)_mm_srai_epi32(__A, (int)__B),
   6426                                              (__v4si)_mm_setzero_si128());
   6427 }
   6428 
   6429 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6430 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, unsigned int __B)
   6431 {
   6432   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6433                                              (__v8si)_mm256_srai_epi32(__A, (int)__B),
   6434                                              (__v8si)__W);
   6435 }
   6436 
   6437 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6438 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, unsigned int __B)
   6439 {
   6440   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
   6441                                              (__v8si)_mm256_srai_epi32(__A, (int)__B),
   6442                                              (__v8si)_mm256_setzero_si256());
   6443 }
   6444 
   6445 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6446 _mm_sra_epi64(__m128i __A, __m128i __B)
   6447 {
   6448   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
   6449 }
   6450 
   6451 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6452 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
   6453 {
   6454   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6455                                              (__v2di)_mm_sra_epi64(__A, __B), \
   6456                                              (__v2di)__W);
   6457 }
   6458 
   6459 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6460 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
   6461 {
   6462   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6463                                              (__v2di)_mm_sra_epi64(__A, __B), \
   6464                                              (__v2di)_mm_setzero_si128());
   6465 }
   6466 
   6467 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6468 _mm256_sra_epi64(__m256i __A, __m128i __B)
   6469 {
   6470   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
   6471 }
   6472 
   6473 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6474 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
   6475 {
   6476   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6477                                            (__v4di)_mm256_sra_epi64(__A, __B), \
   6478                                            (__v4di)__W);
   6479 }
   6480 
   6481 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6482 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
   6483 {
   6484   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6485                                            (__v4di)_mm256_sra_epi64(__A, __B), \
   6486                                            (__v4di)_mm256_setzero_si256());
   6487 }
   6488 
   6489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6490 _mm_srai_epi64(__m128i __A, unsigned int __imm)
   6491 {
   6492   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, (int)__imm);
   6493 }
   6494 
   6495 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6496 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, unsigned int __imm)
   6497 {
   6498   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6499                                            (__v2di)_mm_srai_epi64(__A, __imm), \
   6500                                            (__v2di)__W);
   6501 }
   6502 
   6503 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6504 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, unsigned int __imm)
   6505 {
   6506   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
   6507                                            (__v2di)_mm_srai_epi64(__A, __imm), \
   6508                                            (__v2di)_mm_setzero_si128());
   6509 }
   6510 
   6511 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6512 _mm256_srai_epi64(__m256i __A, unsigned int __imm)
   6513 {
   6514   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, (int)__imm);
   6515 }
   6516 
   6517 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6518 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A,
   6519                        unsigned int __imm)
   6520 {
   6521   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6522                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
   6523                                         (__v4di)__W);
   6524 }
   6525 
   6526 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6527 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, unsigned int __imm)
   6528 {
   6529   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
   6530                                         (__v4di)_mm256_srai_epi64(__A, __imm), \
   6531                                         (__v4di)_mm256_setzero_si256());
   6532 }
   6533 
   6534 #define _mm_ternarylogic_epi32(A, B, C, imm)                                   \
   6535   ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
   6536       (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
   6537       (unsigned char)(imm), (__mmask8)-1))
   6538 
   6539 #define _mm_mask_ternarylogic_epi32(A, U, B, C, imm)                           \
   6540   ((__m128i)__builtin_ia32_pternlogd128_mask(                                  \
   6541       (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
   6542       (unsigned char)(imm), (__mmask8)(U)))
   6543 
   6544 #define _mm_maskz_ternarylogic_epi32(U, A, B, C, imm)                          \
   6545   ((__m128i)__builtin_ia32_pternlogd128_maskz(                                 \
   6546       (__v4si)(__m128i)(A), (__v4si)(__m128i)(B), (__v4si)(__m128i)(C),        \
   6547       (unsigned char)(imm), (__mmask8)(U)))
   6548 
   6549 #define _mm256_ternarylogic_epi32(A, B, C, imm)                                \
   6550   ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
   6551       (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
   6552       (unsigned char)(imm), (__mmask8)-1))
   6553 
   6554 #define _mm256_mask_ternarylogic_epi32(A, U, B, C, imm)                        \
   6555   ((__m256i)__builtin_ia32_pternlogd256_mask(                                  \
   6556       (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
   6557       (unsigned char)(imm), (__mmask8)(U)))
   6558 
   6559 #define _mm256_maskz_ternarylogic_epi32(U, A, B, C, imm)                       \
   6560   ((__m256i)__builtin_ia32_pternlogd256_maskz(                                 \
   6561       (__v8si)(__m256i)(A), (__v8si)(__m256i)(B), (__v8si)(__m256i)(C),        \
   6562       (unsigned char)(imm), (__mmask8)(U)))
   6563 
   6564 #define _mm_ternarylogic_epi64(A, B, C, imm)                                   \
   6565   ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
   6566       (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
   6567       (unsigned char)(imm), (__mmask8)-1))
   6568 
   6569 #define _mm_mask_ternarylogic_epi64(A, U, B, C, imm)                           \
   6570   ((__m128i)__builtin_ia32_pternlogq128_mask(                                  \
   6571       (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
   6572       (unsigned char)(imm), (__mmask8)(U)))
   6573 
   6574 #define _mm_maskz_ternarylogic_epi64(U, A, B, C, imm)                          \
   6575   ((__m128i)__builtin_ia32_pternlogq128_maskz(                                 \
   6576       (__v2di)(__m128i)(A), (__v2di)(__m128i)(B), (__v2di)(__m128i)(C),        \
   6577       (unsigned char)(imm), (__mmask8)(U)))
   6578 
   6579 #define _mm256_ternarylogic_epi64(A, B, C, imm)                                \
   6580   ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
   6581       (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
   6582       (unsigned char)(imm), (__mmask8)-1))
   6583 
   6584 #define _mm256_mask_ternarylogic_epi64(A, U, B, C, imm)                        \
   6585   ((__m256i)__builtin_ia32_pternlogq256_mask(                                  \
   6586       (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
   6587       (unsigned char)(imm), (__mmask8)(U)))
   6588 
   6589 #define _mm256_maskz_ternarylogic_epi64(U, A, B, C, imm)                       \
   6590   ((__m256i)__builtin_ia32_pternlogq256_maskz(                                 \
   6591       (__v4di)(__m256i)(A), (__v4di)(__m256i)(B), (__v4di)(__m256i)(C),        \
   6592       (unsigned char)(imm), (__mmask8)(U)))
   6593 
   6594 #define _mm256_shuffle_f32x4(A, B, imm) \
   6595   ((__m256)__builtin_ia32_shuf_f32x4_256((__v8sf)(__m256)(A), \
   6596                                          (__v8sf)(__m256)(B), (int)(imm)))
   6597 
   6598 #define _mm256_mask_shuffle_f32x4(W, U, A, B, imm) \
   6599   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6600                                        (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
   6601                                        (__v8sf)(__m256)(W)))
   6602 
   6603 #define _mm256_maskz_shuffle_f32x4(U, A, B, imm) \
   6604   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6605                                        (__v8sf)_mm256_shuffle_f32x4((A), (B), (imm)), \
   6606                                        (__v8sf)_mm256_setzero_ps()))
   6607 
   6608 #define _mm256_shuffle_f64x2(A, B, imm) \
   6609   ((__m256d)__builtin_ia32_shuf_f64x2_256((__v4df)(__m256d)(A), \
   6610                                           (__v4df)(__m256d)(B), (int)(imm)))
   6611 
   6612 #define _mm256_mask_shuffle_f64x2(W, U, A, B, imm) \
   6613   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6614                                        (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
   6615                                        (__v4df)(__m256d)(W)))
   6616 
   6617 #define _mm256_maskz_shuffle_f64x2(U, A, B, imm) \
   6618   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6619                                        (__v4df)_mm256_shuffle_f64x2((A), (B), (imm)), \
   6620                                        (__v4df)_mm256_setzero_pd()))
   6621 
   6622 #define _mm256_shuffle_i32x4(A, B, imm) \
   6623   ((__m256i)__builtin_ia32_shuf_i32x4_256((__v8si)(__m256i)(A), \
   6624                                           (__v8si)(__m256i)(B), (int)(imm)))
   6625 
   6626 #define _mm256_mask_shuffle_i32x4(W, U, A, B, imm) \
   6627   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   6628                                        (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
   6629                                        (__v8si)(__m256i)(W)))
   6630 
   6631 #define _mm256_maskz_shuffle_i32x4(U, A, B, imm) \
   6632   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   6633                                        (__v8si)_mm256_shuffle_i32x4((A), (B), (imm)), \
   6634                                        (__v8si)_mm256_setzero_si256()))
   6635 
   6636 #define _mm256_shuffle_i64x2(A, B, imm) \
   6637   ((__m256i)__builtin_ia32_shuf_i64x2_256((__v4di)(__m256i)(A), \
   6638                                           (__v4di)(__m256i)(B), (int)(imm)))
   6639 
   6640 #define _mm256_mask_shuffle_i64x2(W, U, A, B, imm) \
   6641   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   6642                                        (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
   6643                                        (__v4di)(__m256i)(W)))
   6644 
   6645 
   6646 #define _mm256_maskz_shuffle_i64x2(U, A, B, imm) \
   6647   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   6648                                        (__v4di)_mm256_shuffle_i64x2((A), (B), (imm)), \
   6649                                        (__v4di)_mm256_setzero_si256()))
   6650 
   6651 #define _mm_mask_shuffle_pd(W, U, A, B, M) \
   6652   ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6653                                         (__v2df)_mm_shuffle_pd((A), (B), (M)), \
   6654                                         (__v2df)(__m128d)(W)))
   6655 
   6656 #define _mm_maskz_shuffle_pd(U, A, B, M) \
   6657   ((__m128d)__builtin_ia32_selectpd_128((__mmask8)(U), \
   6658                                         (__v2df)_mm_shuffle_pd((A), (B), (M)), \
   6659                                         (__v2df)_mm_setzero_pd()))
   6660 
   6661 #define _mm256_mask_shuffle_pd(W, U, A, B, M) \
   6662   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6663                                         (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
   6664                                         (__v4df)(__m256d)(W)))
   6665 
   6666 #define _mm256_maskz_shuffle_pd(U, A, B, M) \
   6667   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   6668                                         (__v4df)_mm256_shuffle_pd((A), (B), (M)), \
   6669                                         (__v4df)_mm256_setzero_pd()))
   6670 
   6671 #define _mm_mask_shuffle_ps(W, U, A, B, M) \
   6672   ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6673                                        (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
   6674                                        (__v4sf)(__m128)(W)))
   6675 
   6676 #define _mm_maskz_shuffle_ps(U, A, B, M) \
   6677   ((__m128)__builtin_ia32_selectps_128((__mmask8)(U), \
   6678                                        (__v4sf)_mm_shuffle_ps((A), (B), (M)), \
   6679                                        (__v4sf)_mm_setzero_ps()))
   6680 
   6681 #define _mm256_mask_shuffle_ps(W, U, A, B, M) \
   6682   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6683                                        (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
   6684                                        (__v8sf)(__m256)(W)))
   6685 
   6686 #define _mm256_maskz_shuffle_ps(U, A, B, M) \
   6687   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   6688                                        (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
   6689                                        (__v8sf)_mm256_setzero_ps()))
   6690 
   6691 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   6692 _mm_rsqrt14_pd (__m128d __A)
   6693 {
   6694   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   6695                  (__v2df)
   6696                  _mm_setzero_pd (),
   6697                  (__mmask8) -1);
   6698 }
   6699 
   6700 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   6701 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
   6702 {
   6703   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   6704                  (__v2df) __W,
   6705                  (__mmask8) __U);
   6706 }
   6707 
   6708 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   6709 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
   6710 {
   6711   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
   6712                  (__v2df)
   6713                  _mm_setzero_pd (),
   6714                  (__mmask8) __U);
   6715 }
   6716 
   6717 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6718 _mm256_rsqrt14_pd (__m256d __A)
   6719 {
   6720   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   6721                  (__v4df)
   6722                  _mm256_setzero_pd (),
   6723                  (__mmask8) -1);
   6724 }
   6725 
   6726 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6727 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
   6728 {
   6729   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   6730                  (__v4df) __W,
   6731                  (__mmask8) __U);
   6732 }
   6733 
   6734 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6735 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
   6736 {
   6737   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
   6738                  (__v4df)
   6739                  _mm256_setzero_pd (),
   6740                  (__mmask8) __U);
   6741 }
   6742 
   6743 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6744 _mm_rsqrt14_ps (__m128 __A)
   6745 {
   6746   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   6747                 (__v4sf)
   6748                 _mm_setzero_ps (),
   6749                 (__mmask8) -1);
   6750 }
   6751 
   6752 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6753 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
   6754 {
   6755   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   6756                 (__v4sf) __W,
   6757                 (__mmask8) __U);
   6758 }
   6759 
   6760 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6761 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
   6762 {
   6763   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
   6764                 (__v4sf)
   6765                 _mm_setzero_ps (),
   6766                 (__mmask8) __U);
   6767 }
   6768 
   6769 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6770 _mm256_rsqrt14_ps (__m256 __A)
   6771 {
   6772   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   6773                 (__v8sf)
   6774                 _mm256_setzero_ps (),
   6775                 (__mmask8) -1);
   6776 }
   6777 
   6778 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6779 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
   6780 {
   6781   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   6782                 (__v8sf) __W,
   6783                 (__mmask8) __U);
   6784 }
   6785 
   6786 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6787 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
   6788 {
   6789   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
   6790                 (__v8sf)
   6791                 _mm256_setzero_ps (),
   6792                 (__mmask8) __U);
   6793 }
   6794 
   6795 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6796 _mm256_broadcast_f32x4(__m128 __A)
   6797 {
   6798   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
   6799                                          0, 1, 2, 3, 0, 1, 2, 3);
   6800 }
   6801 
   6802 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6803 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
   6804 {
   6805   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
   6806                                             (__v8sf)_mm256_broadcast_f32x4(__A),
   6807                                             (__v8sf)__O);
   6808 }
   6809 
   6810 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6811 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
   6812 {
   6813   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
   6814                                             (__v8sf)_mm256_broadcast_f32x4(__A),
   6815                                             (__v8sf)_mm256_setzero_ps());
   6816 }
   6817 
   6818 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6819 _mm256_broadcast_i32x4(__m128i __A)
   6820 {
   6821   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   6822                                           0, 1, 2, 3, 0, 1, 2, 3);
   6823 }
   6824 
   6825 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6826 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
   6827 {
   6828   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   6829                                             (__v8si)_mm256_broadcast_i32x4(__A),
   6830                                             (__v8si)__O);
   6831 }
   6832 
   6833 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6834 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
   6835 {
   6836   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   6837                                             (__v8si)_mm256_broadcast_i32x4(__A),
   6838                                             (__v8si)_mm256_setzero_si256());
   6839 }
   6840 
   6841 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6842 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
   6843 {
   6844   return (__m256d)__builtin_ia32_selectpd_256(__M,
   6845                                               (__v4df) _mm256_broadcastsd_pd(__A),
   6846                                               (__v4df) __O);
   6847 }
   6848 
   6849 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   6850 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
   6851 {
   6852   return (__m256d)__builtin_ia32_selectpd_256(__M,
   6853                                               (__v4df) _mm256_broadcastsd_pd(__A),
   6854                                               (__v4df) _mm256_setzero_pd());
   6855 }
   6856 
   6857 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6858 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
   6859 {
   6860   return (__m128)__builtin_ia32_selectps_128(__M,
   6861                                              (__v4sf) _mm_broadcastss_ps(__A),
   6862                                              (__v4sf) __O);
   6863 }
   6864 
   6865 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   6866 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
   6867 {
   6868   return (__m128)__builtin_ia32_selectps_128(__M,
   6869                                              (__v4sf) _mm_broadcastss_ps(__A),
   6870                                              (__v4sf) _mm_setzero_ps());
   6871 }
   6872 
   6873 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6874 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
   6875 {
   6876   return (__m256)__builtin_ia32_selectps_256(__M,
   6877                                              (__v8sf) _mm256_broadcastss_ps(__A),
   6878                                              (__v8sf) __O);
   6879 }
   6880 
   6881 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   6882 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
   6883 {
   6884   return (__m256)__builtin_ia32_selectps_256(__M,
   6885                                              (__v8sf) _mm256_broadcastss_ps(__A),
   6886                                              (__v8sf) _mm256_setzero_ps());
   6887 }
   6888 
   6889 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6890 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   6891 {
   6892   return (__m128i)__builtin_ia32_selectd_128(__M,
   6893                                              (__v4si) _mm_broadcastd_epi32(__A),
   6894                                              (__v4si) __O);
   6895 }
   6896 
   6897 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6898 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
   6899 {
   6900   return (__m128i)__builtin_ia32_selectd_128(__M,
   6901                                              (__v4si) _mm_broadcastd_epi32(__A),
   6902                                              (__v4si) _mm_setzero_si128());
   6903 }
   6904 
   6905 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6906 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
   6907 {
   6908   return (__m256i)__builtin_ia32_selectd_256(__M,
   6909                                              (__v8si) _mm256_broadcastd_epi32(__A),
   6910                                              (__v8si) __O);
   6911 }
   6912 
   6913 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6914 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
   6915 {
   6916   return (__m256i)__builtin_ia32_selectd_256(__M,
   6917                                              (__v8si) _mm256_broadcastd_epi32(__A),
   6918                                              (__v8si) _mm256_setzero_si256());
   6919 }
   6920 
   6921 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6922 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
   6923 {
   6924   return (__m128i)__builtin_ia32_selectq_128(__M,
   6925                                              (__v2di) _mm_broadcastq_epi64(__A),
   6926                                              (__v2di) __O);
   6927 }
   6928 
   6929 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6930 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
   6931 {
   6932   return (__m128i)__builtin_ia32_selectq_128(__M,
   6933                                              (__v2di) _mm_broadcastq_epi64(__A),
   6934                                              (__v2di) _mm_setzero_si128());
   6935 }
   6936 
   6937 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6938 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
   6939 {
   6940   return (__m256i)__builtin_ia32_selectq_256(__M,
   6941                                              (__v4di) _mm256_broadcastq_epi64(__A),
   6942                                              (__v4di) __O);
   6943 }
   6944 
   6945 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   6946 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
   6947 {
   6948   return (__m256i)__builtin_ia32_selectq_256(__M,
   6949                                              (__v4di) _mm256_broadcastq_epi64(__A),
   6950                                              (__v4di) _mm256_setzero_si256());
   6951 }
   6952 
   6953 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6954 _mm_cvtsepi32_epi8 (__m128i __A)
   6955 {
   6956   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   6957                (__v16qi)_mm_undefined_si128(),
   6958                (__mmask8) -1);
   6959 }
   6960 
   6961 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6962 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   6963 {
   6964   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   6965                (__v16qi) __O, __M);
   6966 }
   6967 
   6968 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   6969 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
   6970 {
   6971   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
   6972                (__v16qi) _mm_setzero_si128 (),
   6973                __M);
   6974 }
   6975 
   6976 static __inline__ void __DEFAULT_FN_ATTRS128
   6977 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   6978 {
   6979   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   6980 }
   6981 
   6982 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   6983 _mm256_cvtsepi32_epi8 (__m256i __A)
   6984 {
   6985   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   6986                (__v16qi)_mm_undefined_si128(),
   6987                (__mmask8) -1);
   6988 }
   6989 
   6990 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   6991 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   6992 {
   6993   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   6994                (__v16qi) __O, __M);
   6995 }
   6996 
   6997 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   6998 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
   6999 {
   7000   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
   7001                (__v16qi) _mm_setzero_si128 (),
   7002                __M);
   7003 }
   7004 
   7005 static __inline__ void __DEFAULT_FN_ATTRS256
   7006 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7007 {
   7008   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
   7009 }
   7010 
   7011 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7012 _mm_cvtsepi32_epi16 (__m128i __A)
   7013 {
   7014   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7015                (__v8hi)_mm_setzero_si128 (),
   7016                (__mmask8) -1);
   7017 }
   7018 
   7019 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7020 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7021 {
   7022   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7023                (__v8hi)__O,
   7024                __M);
   7025 }
   7026 
   7027 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7028 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
   7029 {
   7030   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
   7031                (__v8hi) _mm_setzero_si128 (),
   7032                __M);
   7033 }
   7034 
   7035 static __inline__ void __DEFAULT_FN_ATTRS128
   7036 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7037 {
   7038   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   7039 }
   7040 
   7041 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7042 _mm256_cvtsepi32_epi16 (__m256i __A)
   7043 {
   7044   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7045                (__v8hi)_mm_undefined_si128(),
   7046                (__mmask8) -1);
   7047 }
   7048 
   7049 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7050 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7051 {
   7052   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7053                (__v8hi) __O, __M);
   7054 }
   7055 
   7056 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7057 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
   7058 {
   7059   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
   7060                (__v8hi) _mm_setzero_si128 (),
   7061                __M);
   7062 }
   7063 
   7064 static __inline__ void __DEFAULT_FN_ATTRS256
   7065 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7066 {
   7067   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   7068 }
   7069 
   7070 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7071 _mm_cvtsepi64_epi8 (__m128i __A)
   7072 {
   7073   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7074                (__v16qi)_mm_undefined_si128(),
   7075                (__mmask8) -1);
   7076 }
   7077 
   7078 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7079 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7080 {
   7081   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7082                (__v16qi) __O, __M);
   7083 }
   7084 
   7085 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7086 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
   7087 {
   7088   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
   7089                (__v16qi) _mm_setzero_si128 (),
   7090                __M);
   7091 }
   7092 
   7093 static __inline__ void __DEFAULT_FN_ATTRS128
   7094 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7095 {
   7096   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   7097 }
   7098 
   7099 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7100 _mm256_cvtsepi64_epi8 (__m256i __A)
   7101 {
   7102   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7103                (__v16qi)_mm_undefined_si128(),
   7104                (__mmask8) -1);
   7105 }
   7106 
   7107 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7108 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7109 {
   7110   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7111                (__v16qi) __O, __M);
   7112 }
   7113 
   7114 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7115 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
   7116 {
   7117   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
   7118                (__v16qi) _mm_setzero_si128 (),
   7119                __M);
   7120 }
   7121 
   7122 static __inline__ void __DEFAULT_FN_ATTRS256
   7123 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7124 {
   7125   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   7126 }
   7127 
   7128 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7129 _mm_cvtsepi64_epi32 (__m128i __A)
   7130 {
   7131   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7132                (__v4si)_mm_undefined_si128(),
   7133                (__mmask8) -1);
   7134 }
   7135 
   7136 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7137 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7138 {
   7139   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7140                (__v4si) __O, __M);
   7141 }
   7142 
   7143 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7144 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
   7145 {
   7146   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
   7147                (__v4si) _mm_setzero_si128 (),
   7148                __M);
   7149 }
   7150 
   7151 static __inline__ void __DEFAULT_FN_ATTRS128
   7152 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   7153 {
   7154   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   7155 }
   7156 
   7157 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7158 _mm256_cvtsepi64_epi32 (__m256i __A)
   7159 {
   7160   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7161                (__v4si)_mm_undefined_si128(),
   7162                (__mmask8) -1);
   7163 }
   7164 
   7165 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7166 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   7167 {
   7168   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7169                (__v4si)__O,
   7170                __M);
   7171 }
   7172 
   7173 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7174 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
   7175 {
   7176   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
   7177                (__v4si) _mm_setzero_si128 (),
   7178                __M);
   7179 }
   7180 
   7181 static __inline__ void __DEFAULT_FN_ATTRS256
   7182 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   7183 {
   7184   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   7185 }
   7186 
   7187 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7188 _mm_cvtsepi64_epi16 (__m128i __A)
   7189 {
   7190   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7191                (__v8hi)_mm_undefined_si128(),
   7192                (__mmask8) -1);
   7193 }
   7194 
   7195 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7196 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7197 {
   7198   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7199                (__v8hi) __O, __M);
   7200 }
   7201 
   7202 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7203 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
   7204 {
   7205   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
   7206                (__v8hi) _mm_setzero_si128 (),
   7207                __M);
   7208 }
   7209 
   7210 static __inline__ void __DEFAULT_FN_ATTRS128
   7211 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7212 {
   7213   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   7214 }
   7215 
   7216 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7217 _mm256_cvtsepi64_epi16 (__m256i __A)
   7218 {
   7219   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7220                (__v8hi)_mm_undefined_si128(),
   7221                (__mmask8) -1);
   7222 }
   7223 
   7224 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7225 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7226 {
   7227   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7228                (__v8hi) __O, __M);
   7229 }
   7230 
   7231 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7232 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
   7233 {
   7234   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
   7235                (__v8hi) _mm_setzero_si128 (),
   7236                __M);
   7237 }
   7238 
   7239 static __inline__ void __DEFAULT_FN_ATTRS256
   7240 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7241 {
   7242   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   7243 }
   7244 
   7245 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7246 _mm_cvtusepi32_epi8 (__m128i __A)
   7247 {
   7248   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7249                 (__v16qi)_mm_undefined_si128(),
   7250                 (__mmask8) -1);
   7251 }
   7252 
   7253 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7254 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7255 {
   7256   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7257                 (__v16qi) __O,
   7258                 __M);
   7259 }
   7260 
   7261 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7262 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
   7263 {
   7264   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
   7265                 (__v16qi) _mm_setzero_si128 (),
   7266                 __M);
   7267 }
   7268 
   7269 static __inline__ void __DEFAULT_FN_ATTRS128
   7270 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7271 {
   7272   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   7273 }
   7274 
   7275 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7276 _mm256_cvtusepi32_epi8 (__m256i __A)
   7277 {
   7278   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7279                 (__v16qi)_mm_undefined_si128(),
   7280                 (__mmask8) -1);
   7281 }
   7282 
   7283 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7284 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7285 {
   7286   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7287                 (__v16qi) __O,
   7288                 __M);
   7289 }
   7290 
   7291 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7292 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
   7293 {
   7294   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
   7295                 (__v16qi) _mm_setzero_si128 (),
   7296                 __M);
   7297 }
   7298 
   7299 static __inline__ void __DEFAULT_FN_ATTRS256
   7300 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7301 {
   7302   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
   7303 }
   7304 
   7305 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7306 _mm_cvtusepi32_epi16 (__m128i __A)
   7307 {
   7308   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7309                 (__v8hi)_mm_undefined_si128(),
   7310                 (__mmask8) -1);
   7311 }
   7312 
   7313 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7314 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7315 {
   7316   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7317                 (__v8hi) __O, __M);
   7318 }
   7319 
   7320 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7321 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
   7322 {
   7323   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
   7324                 (__v8hi) _mm_setzero_si128 (),
   7325                 __M);
   7326 }
   7327 
   7328 static __inline__ void __DEFAULT_FN_ATTRS128
   7329 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7330 {
   7331   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   7332 }
   7333 
   7334 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7335 _mm256_cvtusepi32_epi16 (__m256i __A)
   7336 {
   7337   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7338                 (__v8hi) _mm_undefined_si128(),
   7339                 (__mmask8) -1);
   7340 }
   7341 
   7342 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7343 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7344 {
   7345   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7346                 (__v8hi) __O, __M);
   7347 }
   7348 
   7349 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7350 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
   7351 {
   7352   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
   7353                 (__v8hi) _mm_setzero_si128 (),
   7354                 __M);
   7355 }
   7356 
   7357 static __inline__ void __DEFAULT_FN_ATTRS256
   7358 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7359 {
   7360   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   7361 }
   7362 
   7363 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7364 _mm_cvtusepi64_epi8 (__m128i __A)
   7365 {
   7366   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7367                 (__v16qi)_mm_undefined_si128(),
   7368                 (__mmask8) -1);
   7369 }
   7370 
   7371 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7372 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7373 {
   7374   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7375                 (__v16qi) __O,
   7376                 __M);
   7377 }
   7378 
   7379 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7380 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
   7381 {
   7382   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
   7383                 (__v16qi) _mm_setzero_si128 (),
   7384                 __M);
   7385 }
   7386 
   7387 static __inline__ void __DEFAULT_FN_ATTRS128
   7388 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7389 {
   7390   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   7391 }
   7392 
   7393 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7394 _mm256_cvtusepi64_epi8 (__m256i __A)
   7395 {
   7396   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7397                 (__v16qi)_mm_undefined_si128(),
   7398                 (__mmask8) -1);
   7399 }
   7400 
   7401 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7402 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7403 {
   7404   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7405                 (__v16qi) __O,
   7406                 __M);
   7407 }
   7408 
   7409 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7410 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
   7411 {
   7412   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
   7413                 (__v16qi) _mm_setzero_si128 (),
   7414                 __M);
   7415 }
   7416 
   7417 static __inline__ void __DEFAULT_FN_ATTRS256
   7418 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7419 {
   7420   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   7421 }
   7422 
   7423 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7424 _mm_cvtusepi64_epi32 (__m128i __A)
   7425 {
   7426   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7427                 (__v4si)_mm_undefined_si128(),
   7428                 (__mmask8) -1);
   7429 }
   7430 
   7431 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7432 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7433 {
   7434   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7435                 (__v4si) __O, __M);
   7436 }
   7437 
   7438 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7439 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
   7440 {
   7441   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
   7442                 (__v4si) _mm_setzero_si128 (),
   7443                 __M);
   7444 }
   7445 
   7446 static __inline__ void __DEFAULT_FN_ATTRS128
   7447 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   7448 {
   7449   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   7450 }
   7451 
   7452 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7453 _mm256_cvtusepi64_epi32 (__m256i __A)
   7454 {
   7455   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7456                 (__v4si)_mm_undefined_si128(),
   7457                 (__mmask8) -1);
   7458 }
   7459 
   7460 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7461 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   7462 {
   7463   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7464                 (__v4si) __O, __M);
   7465 }
   7466 
   7467 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7468 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
   7469 {
   7470   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
   7471                 (__v4si) _mm_setzero_si128 (),
   7472                 __M);
   7473 }
   7474 
   7475 static __inline__ void __DEFAULT_FN_ATTRS256
   7476 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   7477 {
   7478   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   7479 }
   7480 
   7481 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7482 _mm_cvtusepi64_epi16 (__m128i __A)
   7483 {
   7484   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7485                 (__v8hi)_mm_undefined_si128(),
   7486                 (__mmask8) -1);
   7487 }
   7488 
   7489 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7490 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7491 {
   7492   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7493                 (__v8hi) __O, __M);
   7494 }
   7495 
   7496 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7497 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
   7498 {
   7499   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
   7500                 (__v8hi) _mm_setzero_si128 (),
   7501                 __M);
   7502 }
   7503 
   7504 static __inline__ void __DEFAULT_FN_ATTRS128
   7505 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7506 {
   7507   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   7508 }
   7509 
   7510 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7511 _mm256_cvtusepi64_epi16 (__m256i __A)
   7512 {
   7513   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7514                 (__v8hi)_mm_undefined_si128(),
   7515                 (__mmask8) -1);
   7516 }
   7517 
   7518 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7519 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7520 {
   7521   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7522                 (__v8hi) __O, __M);
   7523 }
   7524 
   7525 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7526 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
   7527 {
   7528   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
   7529                 (__v8hi) _mm_setzero_si128 (),
   7530                 __M);
   7531 }
   7532 
   7533 static __inline__ void __DEFAULT_FN_ATTRS256
   7534 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7535 {
   7536   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   7537 }
   7538 
   7539 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7540 _mm_cvtepi32_epi8 (__m128i __A)
   7541 {
   7542   return (__m128i)__builtin_shufflevector(
   7543       __builtin_convertvector((__v4si)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
   7544       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
   7545 }
   7546 
   7547 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7548 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7549 {
   7550   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
   7551               (__v16qi) __O, __M);
   7552 }
   7553 
   7554 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7555 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
   7556 {
   7557   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
   7558               (__v16qi)
   7559               _mm_setzero_si128 (),
   7560               __M);
   7561 }
   7562 
   7563 static __inline__ void __DEFAULT_FN_ATTRS128
   7564 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7565 {
   7566   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
   7567 }
   7568 
   7569 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7570 _mm256_cvtepi32_epi8 (__m256i __A)
   7571 {
   7572   return (__m128i)__builtin_shufflevector(
   7573       __builtin_convertvector((__v8si)__A, __v8qi),
   7574       (__v8qi){0, 0, 0, 0, 0, 0, 0, 0}, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
   7575       12, 13, 14, 15);
   7576 }
   7577 
   7578 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7579 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7580 {
   7581   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
   7582               (__v16qi) __O, __M);
   7583 }
   7584 
   7585 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7586 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
   7587 {
   7588   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
   7589               (__v16qi) _mm_setzero_si128 (),
   7590               __M);
   7591 }
   7592 
   7593 static __inline__ void __DEFAULT_FN_ATTRS256
   7594 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7595 {
   7596   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
   7597 }
   7598 
   7599 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7600 _mm_cvtepi32_epi16 (__m128i __A)
   7601 {
   7602   return (__m128i)__builtin_shufflevector(
   7603       __builtin_convertvector((__v4si)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
   7604       2, 3, 4, 5, 6, 7);
   7605 }
   7606 
   7607 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7608 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7609 {
   7610   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
   7611               (__v8hi) __O, __M);
   7612 }
   7613 
   7614 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7615 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
   7616 {
   7617   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
   7618               (__v8hi) _mm_setzero_si128 (),
   7619               __M);
   7620 }
   7621 
   7622 static __inline__ void __DEFAULT_FN_ATTRS128
   7623 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7624 {
   7625   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
   7626 }
   7627 
   7628 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7629 _mm256_cvtepi32_epi16 (__m256i __A)
   7630 {
   7631   return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
   7632 }
   7633 
   7634 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7635 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7636 {
   7637   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
   7638               (__v8hi) __O, __M);
   7639 }
   7640 
   7641 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7642 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
   7643 {
   7644   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
   7645               (__v8hi) _mm_setzero_si128 (),
   7646               __M);
   7647 }
   7648 
   7649 static __inline__ void __DEFAULT_FN_ATTRS256
   7650 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
   7651 {
   7652   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
   7653 }
   7654 
   7655 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7656 _mm_cvtepi64_epi8 (__m128i __A)
   7657 {
   7658   return (__m128i)__builtin_shufflevector(
   7659       __builtin_convertvector((__v2di)__A, __v2qi), (__v2qi){0, 0}, 0, 1, 2, 3,
   7660       3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3);
   7661 }
   7662 
   7663 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7664 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
   7665 {
   7666   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
   7667               (__v16qi) __O, __M);
   7668 }
   7669 
   7670 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7671 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
   7672 {
   7673   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
   7674               (__v16qi) _mm_setzero_si128 (),
   7675               __M);
   7676 }
   7677 
   7678 static __inline__ void __DEFAULT_FN_ATTRS128
   7679 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
   7680 {
   7681   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
   7682 }
   7683 
   7684 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7685 _mm256_cvtepi64_epi8 (__m256i __A)
   7686 {
   7687   return (__m128i)__builtin_shufflevector(
   7688       __builtin_convertvector((__v4di)__A, __v4qi), (__v4qi){0, 0, 0, 0}, 0, 1,
   7689       2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7);
   7690 }
   7691 
   7692 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7693 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
   7694 {
   7695   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
   7696               (__v16qi) __O, __M);
   7697 }
   7698 
   7699 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7700 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
   7701 {
   7702   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
   7703               (__v16qi) _mm_setzero_si128 (),
   7704               __M);
   7705 }
   7706 
   7707 static __inline__ void __DEFAULT_FN_ATTRS256
   7708 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
   7709 {
   7710   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
   7711 }
   7712 
   7713 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7714 _mm_cvtepi64_epi32 (__m128i __A)
   7715 {
   7716   return (__m128i)__builtin_shufflevector(
   7717       __builtin_convertvector((__v2di)__A, __v2si), (__v2si){0, 0}, 0, 1, 2, 3);
   7718 }
   7719 
   7720 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7721 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
   7722 {
   7723   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
   7724               (__v4si) __O, __M);
   7725 }
   7726 
   7727 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7728 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
   7729 {
   7730   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
   7731               (__v4si) _mm_setzero_si128 (),
   7732               __M);
   7733 }
   7734 
   7735 static __inline__ void __DEFAULT_FN_ATTRS128
   7736 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
   7737 {
   7738   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
   7739 }
   7740 
   7741 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7742 _mm256_cvtepi64_epi32 (__m256i __A)
   7743 {
   7744   return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
   7745 }
   7746 
   7747 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7748 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
   7749 {
   7750   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   7751                                              (__v4si)_mm256_cvtepi64_epi32(__A),
   7752                                              (__v4si)__O);
   7753 }
   7754 
   7755 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7756 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
   7757 {
   7758   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   7759                                              (__v4si)_mm256_cvtepi64_epi32(__A),
   7760                                              (__v4si)_mm_setzero_si128());
   7761 }
   7762 
   7763 static __inline__ void __DEFAULT_FN_ATTRS256
   7764 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
   7765 {
   7766   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
   7767 }
   7768 
   7769 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7770 _mm_cvtepi64_epi16 (__m128i __A)
   7771 {
   7772   return (__m128i)__builtin_shufflevector(
   7773       __builtin_convertvector((__v2di)__A, __v2hi), (__v2hi){0, 0}, 0, 1, 2, 3,
   7774       3, 3, 3, 3);
   7775 }
   7776 
   7777 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7778 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
   7779 {
   7780   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
   7781               (__v8hi)__O,
   7782               __M);
   7783 }
   7784 
   7785 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   7786 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
   7787 {
   7788   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
   7789               (__v8hi) _mm_setzero_si128 (),
   7790               __M);
   7791 }
   7792 
   7793 static __inline__ void __DEFAULT_FN_ATTRS128
   7794 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
   7795 {
   7796   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
   7797 }
   7798 
   7799 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7800 _mm256_cvtepi64_epi16 (__m256i __A)
   7801 {
   7802   return (__m128i)__builtin_shufflevector(
   7803       __builtin_convertvector((__v4di)__A, __v4hi), (__v4hi){0, 0, 0, 0}, 0, 1,
   7804       2, 3, 4, 5, 6, 7);
   7805 }
   7806 
   7807 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7808 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
   7809 {
   7810   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
   7811               (__v8hi) __O, __M);
   7812 }
   7813 
   7814 static __inline__ __m128i __DEFAULT_FN_ATTRS256
   7815 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
   7816 {
   7817   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
   7818               (__v8hi) _mm_setzero_si128 (),
   7819               __M);
   7820 }
   7821 
   7822 static __inline__ void __DEFAULT_FN_ATTRS256
   7823 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
   7824 {
   7825   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
   7826 }
   7827 
   7828 #define _mm256_extractf32x4_ps(A, imm) \
   7829   ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
   7830                                                 (int)(imm), \
   7831                                                 (__v4sf)_mm_undefined_ps(), \
   7832                                                 (__mmask8)-1))
   7833 
   7834 #define _mm256_mask_extractf32x4_ps(W, U, A, imm) \
   7835   ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
   7836                                                 (int)(imm), \
   7837                                                 (__v4sf)(__m128)(W), \
   7838                                                 (__mmask8)(U)))
   7839 
   7840 #define _mm256_maskz_extractf32x4_ps(U, A, imm) \
   7841   ((__m128)__builtin_ia32_extractf32x4_256_mask((__v8sf)(__m256)(A), \
   7842                                                 (int)(imm), \
   7843                                                 (__v4sf)_mm_setzero_ps(), \
   7844                                                 (__mmask8)(U)))
   7845 
   7846 #define _mm256_extracti32x4_epi32(A, imm) \
   7847   ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
   7848                                                  (int)(imm), \
   7849                                                  (__v4si)_mm_undefined_si128(), \
   7850                                                  (__mmask8)-1))
   7851 
   7852 #define _mm256_mask_extracti32x4_epi32(W, U, A, imm) \
   7853   ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
   7854                                                  (int)(imm), \
   7855                                                  (__v4si)(__m128i)(W), \
   7856                                                  (__mmask8)(U)))
   7857 
   7858 #define _mm256_maskz_extracti32x4_epi32(U, A, imm) \
   7859   ((__m128i)__builtin_ia32_extracti32x4_256_mask((__v8si)(__m256i)(A), \
   7860                                                  (int)(imm), \
   7861                                                  (__v4si)_mm_setzero_si128(), \
   7862                                                  (__mmask8)(U)))
   7863 
   7864 #define _mm256_insertf32x4(A, B, imm) \
   7865   ((__m256)__builtin_ia32_insertf32x4_256((__v8sf)(__m256)(A), \
   7866                                           (__v4sf)(__m128)(B), (int)(imm)))
   7867 
   7868 #define _mm256_mask_insertf32x4(W, U, A, B, imm) \
   7869   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   7870                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
   7871                                   (__v8sf)(__m256)(W)))
   7872 
   7873 #define _mm256_maskz_insertf32x4(U, A, B, imm) \
   7874   ((__m256)__builtin_ia32_selectps_256((__mmask8)(U), \
   7875                                   (__v8sf)_mm256_insertf32x4((A), (B), (imm)), \
   7876                                   (__v8sf)_mm256_setzero_ps()))
   7877 
   7878 #define _mm256_inserti32x4(A, B, imm) \
   7879   ((__m256i)__builtin_ia32_inserti32x4_256((__v8si)(__m256i)(A), \
   7880                                            (__v4si)(__m128i)(B), (int)(imm)))
   7881 
   7882 #define _mm256_mask_inserti32x4(W, U, A, B, imm) \
   7883   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   7884                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
   7885                                   (__v8si)(__m256i)(W)))
   7886 
   7887 #define _mm256_maskz_inserti32x4(U, A, B, imm) \
   7888   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   7889                                   (__v8si)_mm256_inserti32x4((A), (B), (imm)), \
   7890                                   (__v8si)_mm256_setzero_si256()))
   7891 
   7892 #define _mm_getmant_pd(A, B, C) \
   7893   ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   7894                                              (int)(((C)<<2) | (B)), \
   7895                                              (__v2df)_mm_setzero_pd(), \
   7896                                              (__mmask8)-1))
   7897 
   7898 #define _mm_mask_getmant_pd(W, U, A, B, C) \
   7899   ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   7900                                              (int)(((C)<<2) | (B)), \
   7901                                              (__v2df)(__m128d)(W), \
   7902                                              (__mmask8)(U)))
   7903 
   7904 #define _mm_maskz_getmant_pd(U, A, B, C) \
   7905   ((__m128d)__builtin_ia32_getmantpd128_mask((__v2df)(__m128d)(A), \
   7906                                              (int)(((C)<<2) | (B)), \
   7907                                              (__v2df)_mm_setzero_pd(), \
   7908                                              (__mmask8)(U)))
   7909 
   7910 #define _mm256_getmant_pd(A, B, C) \
   7911   ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   7912                                              (int)(((C)<<2) | (B)), \
   7913                                              (__v4df)_mm256_setzero_pd(), \
   7914                                              (__mmask8)-1))
   7915 
   7916 #define _mm256_mask_getmant_pd(W, U, A, B, C) \
   7917   ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   7918                                              (int)(((C)<<2) | (B)), \
   7919                                              (__v4df)(__m256d)(W), \
   7920                                              (__mmask8)(U)))
   7921 
   7922 #define _mm256_maskz_getmant_pd(U, A, B, C) \
   7923   ((__m256d)__builtin_ia32_getmantpd256_mask((__v4df)(__m256d)(A), \
   7924                                              (int)(((C)<<2) | (B)), \
   7925                                              (__v4df)_mm256_setzero_pd(), \
   7926                                              (__mmask8)(U)))
   7927 
   7928 #define _mm_getmant_ps(A, B, C) \
   7929   ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   7930                                             (int)(((C)<<2) | (B)), \
   7931                                             (__v4sf)_mm_setzero_ps(), \
   7932                                             (__mmask8)-1))
   7933 
   7934 #define _mm_mask_getmant_ps(W, U, A, B, C) \
   7935   ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   7936                                             (int)(((C)<<2) | (B)), \
   7937                                             (__v4sf)(__m128)(W), \
   7938                                             (__mmask8)(U)))
   7939 
   7940 #define _mm_maskz_getmant_ps(U, A, B, C) \
   7941   ((__m128)__builtin_ia32_getmantps128_mask((__v4sf)(__m128)(A), \
   7942                                             (int)(((C)<<2) | (B)), \
   7943                                             (__v4sf)_mm_setzero_ps(), \
   7944                                             (__mmask8)(U)))
   7945 
   7946 #define _mm256_getmant_ps(A, B, C) \
   7947   ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   7948                                             (int)(((C)<<2) | (B)), \
   7949                                             (__v8sf)_mm256_setzero_ps(), \
   7950                                             (__mmask8)-1))
   7951 
   7952 #define _mm256_mask_getmant_ps(W, U, A, B, C) \
   7953   ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   7954                                             (int)(((C)<<2) | (B)), \
   7955                                             (__v8sf)(__m256)(W), \
   7956                                             (__mmask8)(U)))
   7957 
   7958 #define _mm256_maskz_getmant_ps(U, A, B, C) \
   7959   ((__m256)__builtin_ia32_getmantps256_mask((__v8sf)(__m256)(A), \
   7960                                             (int)(((C)<<2) | (B)), \
   7961                                             (__v8sf)_mm256_setzero_ps(), \
   7962                                             (__mmask8)(U)))
   7963 
   7964 #define _mm_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
   7965   ((__m128d)__builtin_ia32_gather3div2df((__v2df)(__m128d)(v1_old), \
   7966                                          (void const *)(addr), \
   7967                                          (__v2di)(__m128i)(index), \
   7968                                          (__mmask8)(mask), (int)(scale)))
   7969 
   7970 #define _mm_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
   7971   ((__m128i)__builtin_ia32_gather3div2di((__v2di)(__m128i)(v1_old), \
   7972                                          (void const *)(addr), \
   7973                                          (__v2di)(__m128i)(index), \
   7974                                          (__mmask8)(mask), (int)(scale)))
   7975 
   7976 #define _mm256_mmask_i64gather_pd(v1_old, mask, index, addr, scale) \
   7977   ((__m256d)__builtin_ia32_gather3div4df((__v4df)(__m256d)(v1_old), \
   7978                                          (void const *)(addr), \
   7979                                          (__v4di)(__m256i)(index), \
   7980                                          (__mmask8)(mask), (int)(scale)))
   7981 
   7982 #define _mm256_mmask_i64gather_epi64(v1_old, mask, index, addr, scale) \
   7983   ((__m256i)__builtin_ia32_gather3div4di((__v4di)(__m256i)(v1_old), \
   7984                                          (void const *)(addr), \
   7985                                          (__v4di)(__m256i)(index), \
   7986                                          (__mmask8)(mask), (int)(scale)))
   7987 
   7988 #define _mm_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
   7989   ((__m128)__builtin_ia32_gather3div4sf((__v4sf)(__m128)(v1_old), \
   7990                                         (void const *)(addr), \
   7991                                         (__v2di)(__m128i)(index), \
   7992                                         (__mmask8)(mask), (int)(scale)))
   7993 
   7994 #define _mm_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
   7995   ((__m128i)__builtin_ia32_gather3div4si((__v4si)(__m128i)(v1_old), \
   7996                                          (void const *)(addr), \
   7997                                          (__v2di)(__m128i)(index), \
   7998                                          (__mmask8)(mask), (int)(scale)))
   7999 
   8000 #define _mm256_mmask_i64gather_ps(v1_old, mask, index, addr, scale) \
   8001   ((__m128)__builtin_ia32_gather3div8sf((__v4sf)(__m128)(v1_old), \
   8002                                         (void const *)(addr), \
   8003                                         (__v4di)(__m256i)(index), \
   8004                                         (__mmask8)(mask), (int)(scale)))
   8005 
   8006 #define _mm256_mmask_i64gather_epi32(v1_old, mask, index, addr, scale) \
   8007   ((__m128i)__builtin_ia32_gather3div8si((__v4si)(__m128i)(v1_old), \
   8008                                          (void const *)(addr), \
   8009                                          (__v4di)(__m256i)(index), \
   8010                                          (__mmask8)(mask), (int)(scale)))
   8011 
   8012 #define _mm_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
   8013   ((__m128d)__builtin_ia32_gather3siv2df((__v2df)(__m128d)(v1_old), \
   8014                                          (void const *)(addr), \
   8015                                          (__v4si)(__m128i)(index), \
   8016                                          (__mmask8)(mask), (int)(scale)))
   8017 
   8018 #define _mm_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
   8019   ((__m128i)__builtin_ia32_gather3siv2di((__v2di)(__m128i)(v1_old), \
   8020                                          (void const *)(addr), \
   8021                                          (__v4si)(__m128i)(index), \
   8022                                          (__mmask8)(mask), (int)(scale)))
   8023 
   8024 #define _mm256_mmask_i32gather_pd(v1_old, mask, index, addr, scale) \
   8025   ((__m256d)__builtin_ia32_gather3siv4df((__v4df)(__m256d)(v1_old), \
   8026                                          (void const *)(addr), \
   8027                                          (__v4si)(__m128i)(index), \
   8028                                          (__mmask8)(mask), (int)(scale)))
   8029 
   8030 #define _mm256_mmask_i32gather_epi64(v1_old, mask, index, addr, scale) \
   8031   ((__m256i)__builtin_ia32_gather3siv4di((__v4di)(__m256i)(v1_old), \
   8032                                          (void const *)(addr), \
   8033                                          (__v4si)(__m128i)(index), \
   8034                                          (__mmask8)(mask), (int)(scale)))
   8035 
   8036 #define _mm_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
   8037   ((__m128)__builtin_ia32_gather3siv4sf((__v4sf)(__m128)(v1_old), \
   8038                                         (void const *)(addr), \
   8039                                         (__v4si)(__m128i)(index), \
   8040                                         (__mmask8)(mask), (int)(scale)))
   8041 
   8042 #define _mm_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
   8043   ((__m128i)__builtin_ia32_gather3siv4si((__v4si)(__m128i)(v1_old), \
   8044                                          (void const *)(addr), \
   8045                                          (__v4si)(__m128i)(index), \
   8046                                          (__mmask8)(mask), (int)(scale)))
   8047 
   8048 #define _mm256_mmask_i32gather_ps(v1_old, mask, index, addr, scale) \
   8049   ((__m256)__builtin_ia32_gather3siv8sf((__v8sf)(__m256)(v1_old), \
   8050                                         (void const *)(addr), \
   8051                                         (__v8si)(__m256i)(index), \
   8052                                         (__mmask8)(mask), (int)(scale)))
   8053 
   8054 #define _mm256_mmask_i32gather_epi32(v1_old, mask, index, addr, scale) \
   8055   ((__m256i)__builtin_ia32_gather3siv8si((__v8si)(__m256i)(v1_old), \
   8056                                          (void const *)(addr), \
   8057                                          (__v8si)(__m256i)(index), \
   8058                                          (__mmask8)(mask), (int)(scale)))
   8059 
   8060 #define _mm256_permutex_pd(X, C) \
   8061   ((__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(X), (int)(C)))
   8062 
   8063 #define _mm256_mask_permutex_pd(W, U, X, C) \
   8064   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   8065                                        (__v4df)_mm256_permutex_pd((X), (C)), \
   8066                                        (__v4df)(__m256d)(W)))
   8067 
   8068 #define _mm256_maskz_permutex_pd(U, X, C) \
   8069   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   8070                                         (__v4df)_mm256_permutex_pd((X), (C)), \
   8071                                         (__v4df)_mm256_setzero_pd()))
   8072 
   8073 #define _mm256_permutex_epi64(X, C) \
   8074   ((__m256i)__builtin_ia32_permdi256((__v4di)(__m256i)(X), (int)(C)))
   8075 
   8076 #define _mm256_mask_permutex_epi64(W, U, X, C) \
   8077   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8078                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
   8079                                       (__v4di)(__m256i)(W)))
   8080 
   8081 #define _mm256_maskz_permutex_epi64(U, X, C) \
   8082   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8083                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
   8084                                       (__v4di)_mm256_setzero_si256()))
   8085 
   8086 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   8087 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
   8088 {
   8089   return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
   8090 }
   8091 
   8092 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   8093 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
   8094           __m256d __Y)
   8095 {
   8096   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   8097                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
   8098                                         (__v4df)__W);
   8099 }
   8100 
   8101 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   8102 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
   8103 {
   8104   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
   8105                                         (__v4df)_mm256_permutexvar_pd(__X, __Y),
   8106                                         (__v4df)_mm256_setzero_pd());
   8107 }
   8108 
   8109 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   8110 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
   8111 {
   8112   return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
   8113 }
   8114 
   8115 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   8116 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
   8117 {
   8118   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   8119                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
   8120                                      (__v4di)_mm256_setzero_si256());
   8121 }
   8122 
   8123 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   8124 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
   8125              __m256i __Y)
   8126 {
   8127   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   8128                                      (__v4di)_mm256_permutexvar_epi64(__X, __Y),
   8129                                      (__v4di)__W);
   8130 }
   8131 
   8132 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
   8133 
   8134 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8135 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
   8136 {
   8137   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8138                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
   8139                                         (__v8sf)__W);
   8140 }
   8141 
   8142 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8143 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
   8144 {
   8145   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8146                                         (__v8sf)_mm256_permutexvar_ps(__X, __Y),
   8147                                         (__v8sf)_mm256_setzero_ps());
   8148 }
   8149 
   8150 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
   8151 
   8152 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   8153 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
   8154                               __m256i __Y)
   8155 {
   8156   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   8157                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
   8158                                      (__v8si)__W);
   8159 }
   8160 
   8161 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   8162 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
   8163 {
   8164   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   8165                                      (__v8si)_mm256_permutexvar_epi32(__X, __Y),
   8166                                      (__v8si)_mm256_setzero_si256());
   8167 }
   8168 
   8169 #define _mm_alignr_epi32(A, B, imm) \
   8170   ((__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
   8171                                      (__v4si)(__m128i)(B), (int)(imm)))
   8172 
   8173 #define _mm_mask_alignr_epi32(W, U, A, B, imm) \
   8174   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8175                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
   8176                                     (__v4si)(__m128i)(W)))
   8177 
   8178 #define _mm_maskz_alignr_epi32(U, A, B, imm) \
   8179   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8180                                     (__v4si)_mm_alignr_epi32((A), (B), (imm)), \
   8181                                     (__v4si)_mm_setzero_si128()))
   8182 
   8183 #define _mm256_alignr_epi32(A, B, imm) \
   8184   ((__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
   8185                                      (__v8si)(__m256i)(B), (int)(imm)))
   8186 
   8187 #define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
   8188   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8189                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
   8190                                  (__v8si)(__m256i)(W)))
   8191 
   8192 #define _mm256_maskz_alignr_epi32(U, A, B, imm) \
   8193   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8194                                  (__v8si)_mm256_alignr_epi32((A), (B), (imm)), \
   8195                                  (__v8si)_mm256_setzero_si256()))
   8196 
   8197 #define _mm_alignr_epi64(A, B, imm) \
   8198   ((__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
   8199                                      (__v2di)(__m128i)(B), (int)(imm)))
   8200 
   8201 #define _mm_mask_alignr_epi64(W, U, A, B, imm) \
   8202   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
   8203                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
   8204                                     (__v2di)(__m128i)(W)))
   8205 
   8206 #define _mm_maskz_alignr_epi64(U, A, B, imm) \
   8207   ((__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
   8208                                     (__v2di)_mm_alignr_epi64((A), (B), (imm)), \
   8209                                     (__v2di)_mm_setzero_si128()))
   8210 
   8211 #define _mm256_alignr_epi64(A, B, imm) \
   8212   ((__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
   8213                                      (__v4di)(__m256i)(B), (int)(imm)))
   8214 
   8215 #define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
   8216   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8217                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
   8218                                  (__v4di)(__m256i)(W)))
   8219 
   8220 #define _mm256_maskz_alignr_epi64(U, A, B, imm) \
   8221   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   8222                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
   8223                                  (__v4di)_mm256_setzero_si256()))
   8224 
   8225 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8226 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8227 {
   8228   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8229                                              (__v4sf)_mm_movehdup_ps(__A),
   8230                                              (__v4sf)__W);
   8231 }
   8232 
   8233 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8234 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
   8235 {
   8236   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8237                                              (__v4sf)_mm_movehdup_ps(__A),
   8238                                              (__v4sf)_mm_setzero_ps());
   8239 }
   8240 
   8241 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8242 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8243 {
   8244   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8245                                              (__v8sf)_mm256_movehdup_ps(__A),
   8246                                              (__v8sf)__W);
   8247 }
   8248 
   8249 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8250 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
   8251 {
   8252   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8253                                              (__v8sf)_mm256_movehdup_ps(__A),
   8254                                              (__v8sf)_mm256_setzero_ps());
   8255 }
   8256 
   8257 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8258 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8259 {
   8260   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8261                                              (__v4sf)_mm_moveldup_ps(__A),
   8262                                              (__v4sf)__W);
   8263 }
   8264 
   8265 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8266 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
   8267 {
   8268   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
   8269                                              (__v4sf)_mm_moveldup_ps(__A),
   8270                                              (__v4sf)_mm_setzero_ps());
   8271 }
   8272 
   8273 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8274 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8275 {
   8276   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8277                                              (__v8sf)_mm256_moveldup_ps(__A),
   8278                                              (__v8sf)__W);
   8279 }
   8280 
   8281 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8282 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
   8283 {
   8284   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
   8285                                              (__v8sf)_mm256_moveldup_ps(__A),
   8286                                              (__v8sf)_mm256_setzero_ps());
   8287 }
   8288 
   8289 #define _mm256_mask_shuffle_epi32(W, U, A, I) \
   8290   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8291                                        (__v8si)_mm256_shuffle_epi32((A), (I)), \
   8292                                        (__v8si)(__m256i)(W)))
   8293 
   8294 #define _mm256_maskz_shuffle_epi32(U, A, I) \
   8295   ((__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
   8296                                        (__v8si)_mm256_shuffle_epi32((A), (I)), \
   8297                                        (__v8si)_mm256_setzero_si256()))
   8298 
   8299 #define _mm_mask_shuffle_epi32(W, U, A, I) \
   8300   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8301                                        (__v4si)_mm_shuffle_epi32((A), (I)), \
   8302                                        (__v4si)(__m128i)(W)))
   8303 
   8304 #define _mm_maskz_shuffle_epi32(U, A, I) \
   8305   ((__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
   8306                                        (__v4si)_mm_shuffle_epi32((A), (I)), \
   8307                                        (__v4si)_mm_setzero_si128()))
   8308 
   8309 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   8310 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
   8311 {
   8312   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   8313               (__v2df) __A,
   8314               (__v2df) __W);
   8315 }
   8316 
   8317 static __inline__ __m128d __DEFAULT_FN_ATTRS128
   8318 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
   8319 {
   8320   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
   8321               (__v2df) __A,
   8322               (__v2df) _mm_setzero_pd ());
   8323 }
   8324 
   8325 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   8326 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
   8327 {
   8328   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   8329               (__v4df) __A,
   8330               (__v4df) __W);
   8331 }
   8332 
   8333 static __inline__ __m256d __DEFAULT_FN_ATTRS256
   8334 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
   8335 {
   8336   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
   8337               (__v4df) __A,
   8338               (__v4df) _mm256_setzero_pd ());
   8339 }
   8340 
   8341 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8342 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
   8343 {
   8344   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   8345              (__v4sf) __A,
   8346              (__v4sf) __W);
   8347 }
   8348 
   8349 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8350 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
   8351 {
   8352   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
   8353              (__v4sf) __A,
   8354              (__v4sf) _mm_setzero_ps ());
   8355 }
   8356 
   8357 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8358 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
   8359 {
   8360   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   8361              (__v8sf) __A,
   8362              (__v8sf) __W);
   8363 }
   8364 
   8365 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8366 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
   8367 {
   8368   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
   8369              (__v8sf) __A,
   8370              (__v8sf) _mm256_setzero_ps ());
   8371 }
   8372 
   8373 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8374 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
   8375 {
   8376   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
   8377              (__v4sf) __W,
   8378              (__mmask8) __U);
   8379 }
   8380 
   8381 static __inline__ __m128 __DEFAULT_FN_ATTRS128
   8382 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
   8383 {
   8384   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
   8385              (__v4sf)
   8386              _mm_setzero_ps (),
   8387              (__mmask8) __U);
   8388 }
   8389 
   8390 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8391 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
   8392 {
   8393   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
   8394                 (__v8sf) __W,
   8395                 (__mmask8) __U);
   8396 }
   8397 
   8398 static __inline__ __m256 __DEFAULT_FN_ATTRS256
   8399 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
   8400 {
   8401   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
   8402                 (__v8sf)
   8403                 _mm256_setzero_ps (),
   8404                 (__mmask8) __U);
   8405 }
   8406 
   8407 #define _mm_mask_cvt_roundps_ph(W, U, A, I) \
   8408   ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
   8409                                           (__v8hi)(__m128i)(W), \
   8410                                           (__mmask8)(U)))
   8411 
   8412 #define _mm_maskz_cvt_roundps_ph(U, A, I) \
   8413   ((__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \
   8414                                           (__v8hi)_mm_setzero_si128(), \
   8415                                           (__mmask8)(U)))
   8416 
   8417 #define _mm_mask_cvtps_ph  _mm_mask_cvt_roundps_ph
   8418 #define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph
   8419 
   8420 #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \
   8421   ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
   8422                                              (__v8hi)(__m128i)(W), \
   8423                                              (__mmask8)(U)))
   8424 
   8425 #define _mm256_maskz_cvt_roundps_ph(U, A, I) \
   8426   ((__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \
   8427                                              (__v8hi)_mm_setzero_si128(), \
   8428                                              (__mmask8)(U)))
   8429 
   8430 #define _mm256_mask_cvtps_ph  _mm256_mask_cvt_roundps_ph
   8431 #define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph
   8432 
   8433 
   8434 #undef __DEFAULT_FN_ATTRS128
   8435 #undef __DEFAULT_FN_ATTRS256
   8436 
   8437 #endif /* __AVX512VLINTRIN_H */