zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx512vldqintrin.h (47827B) - Raw


      1 /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <avx512vldqintrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __AVX512VLDQINTRIN_H
     15 #define __AVX512VLDQINTRIN_H
     16 
     17 /* Define the default attributes for the functions in this file. */
     18 #define __DEFAULT_FN_ATTRS128                                                  \
     19   __attribute__((__always_inline__, __nodebug__,                               \
     20                  __target__("avx512vl,avx512dq,no-evex512"),                   \
     21                  __min_vector_width__(128)))
     22 #define __DEFAULT_FN_ATTRS256                                                  \
     23   __attribute__((__always_inline__, __nodebug__,                               \
     24                  __target__("avx512vl,avx512dq,no-evex512"),                   \
     25                  __min_vector_width__(256)))
     26 
     27 static __inline__ __m256i __DEFAULT_FN_ATTRS256
     28 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
     29   return (__m256i) ((__v4du) __A * (__v4du) __B);
     30 }
     31 
     32 static __inline__ __m256i __DEFAULT_FN_ATTRS256
     33 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
     34   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
     35                                              (__v4di)_mm256_mullo_epi64(__A, __B),
     36                                              (__v4di)__W);
     37 }
     38 
     39 static __inline__ __m256i __DEFAULT_FN_ATTRS256
     40 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
     41   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
     42                                              (__v4di)_mm256_mullo_epi64(__A, __B),
     43                                              (__v4di)_mm256_setzero_si256());
     44 }
     45 
     46 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     47 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
     48   return (__m128i) ((__v2du) __A * (__v2du) __B);
     49 }
     50 
     51 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     52 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
     53   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
     54                                              (__v2di)_mm_mullo_epi64(__A, __B),
     55                                              (__v2di)__W);
     56 }
     57 
     58 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     59 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
     60   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
     61                                              (__v2di)_mm_mullo_epi64(__A, __B),
     62                                              (__v2di)_mm_setzero_si128());
     63 }
     64 
     65 static __inline__ __m256d __DEFAULT_FN_ATTRS256
     66 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
     67   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
     68                                               (__v4df)_mm256_andnot_pd(__A, __B),
     69                                               (__v4df)__W);
     70 }
     71 
     72 static __inline__ __m256d __DEFAULT_FN_ATTRS256
     73 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
     74   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
     75                                               (__v4df)_mm256_andnot_pd(__A, __B),
     76                                               (__v4df)_mm256_setzero_pd());
     77 }
     78 
     79 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     80 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
     81   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
     82                                               (__v2df)_mm_andnot_pd(__A, __B),
     83                                               (__v2df)__W);
     84 }
     85 
     86 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     87 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
     88   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
     89                                               (__v2df)_mm_andnot_pd(__A, __B),
     90                                               (__v2df)_mm_setzero_pd());
     91 }
     92 
     93 static __inline__ __m256 __DEFAULT_FN_ATTRS256
     94 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
     95   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
     96                                              (__v8sf)_mm256_andnot_ps(__A, __B),
     97                                              (__v8sf)__W);
     98 }
     99 
    100 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    101 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
    102   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    103                                              (__v8sf)_mm256_andnot_ps(__A, __B),
    104                                              (__v8sf)_mm256_setzero_ps());
    105 }
    106 
    107 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    108 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    109   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    110                                              (__v4sf)_mm_andnot_ps(__A, __B),
    111                                              (__v4sf)__W);
    112 }
    113 
    114 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    115 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
    116   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    117                                              (__v4sf)_mm_andnot_ps(__A, __B),
    118                                              (__v4sf)_mm_setzero_ps());
    119 }
    120 
    121 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    122 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
    123   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    124                                               (__v4df)_mm256_and_pd(__A, __B),
    125                                               (__v4df)__W);
    126 }
    127 
    128 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    129 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
    130   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    131                                               (__v4df)_mm256_and_pd(__A, __B),
    132                                               (__v4df)_mm256_setzero_pd());
    133 }
    134 
    135 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    136 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    137   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    138                                               (__v2df)_mm_and_pd(__A, __B),
    139                                               (__v2df)__W);
    140 }
    141 
    142 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    143 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
    144   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    145                                               (__v2df)_mm_and_pd(__A, __B),
    146                                               (__v2df)_mm_setzero_pd());
    147 }
    148 
    149 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    150 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    151   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    152                                              (__v8sf)_mm256_and_ps(__A, __B),
    153                                              (__v8sf)__W);
    154 }
    155 
    156 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    157 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
    158   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    159                                              (__v8sf)_mm256_and_ps(__A, __B),
    160                                              (__v8sf)_mm256_setzero_ps());
    161 }
    162 
    163 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    164 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    165   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    166                                              (__v4sf)_mm_and_ps(__A, __B),
    167                                              (__v4sf)__W);
    168 }
    169 
    170 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    171 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
    172   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    173                                              (__v4sf)_mm_and_ps(__A, __B),
    174                                              (__v4sf)_mm_setzero_ps());
    175 }
    176 
    177 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    178 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
    179   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    180                                               (__v4df)_mm256_xor_pd(__A, __B),
    181                                               (__v4df)__W);
    182 }
    183 
    184 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    185 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
    186   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    187                                               (__v4df)_mm256_xor_pd(__A, __B),
    188                                               (__v4df)_mm256_setzero_pd());
    189 }
    190 
    191 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    192 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    193   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    194                                               (__v2df)_mm_xor_pd(__A, __B),
    195                                               (__v2df)__W);
    196 }
    197 
    198 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    199 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
    200   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    201                                               (__v2df)_mm_xor_pd(__A, __B),
    202                                               (__v2df)_mm_setzero_pd());
    203 }
    204 
    205 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    206 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    207   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    208                                              (__v8sf)_mm256_xor_ps(__A, __B),
    209                                              (__v8sf)__W);
    210 }
    211 
    212 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    213 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
    214   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    215                                              (__v8sf)_mm256_xor_ps(__A, __B),
    216                                              (__v8sf)_mm256_setzero_ps());
    217 }
    218 
    219 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    220 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    221   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    222                                              (__v4sf)_mm_xor_ps(__A, __B),
    223                                              (__v4sf)__W);
    224 }
    225 
    226 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    227 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
    228   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    229                                              (__v4sf)_mm_xor_ps(__A, __B),
    230                                              (__v4sf)_mm_setzero_ps());
    231 }
    232 
    233 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    234 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
    235   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    236                                               (__v4df)_mm256_or_pd(__A, __B),
    237                                               (__v4df)__W);
    238 }
    239 
    240 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    241 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
    242   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    243                                               (__v4df)_mm256_or_pd(__A, __B),
    244                                               (__v4df)_mm256_setzero_pd());
    245 }
    246 
    247 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    248 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
    249   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    250                                               (__v2df)_mm_or_pd(__A, __B),
    251                                               (__v2df)__W);
    252 }
    253 
    254 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    255 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
    256   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    257                                               (__v2df)_mm_or_pd(__A, __B),
    258                                               (__v2df)_mm_setzero_pd());
    259 }
    260 
    261 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    262 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
    263   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    264                                              (__v8sf)_mm256_or_ps(__A, __B),
    265                                              (__v8sf)__W);
    266 }
    267 
    268 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    269 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
    270   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
    271                                              (__v8sf)_mm256_or_ps(__A, __B),
    272                                              (__v8sf)_mm256_setzero_ps());
    273 }
    274 
    275 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    276 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
    277   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    278                                              (__v4sf)_mm_or_ps(__A, __B),
    279                                              (__v4sf)__W);
    280 }
    281 
    282 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    283 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
    284   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    285                                              (__v4sf)_mm_or_ps(__A, __B),
    286                                              (__v4sf)_mm_setzero_ps());
    287 }
    288 
    289 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    290 _mm_cvtpd_epi64 (__m128d __A) {
    291   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    292                 (__v2di) _mm_setzero_si128(),
    293                 (__mmask8) -1);
    294 }
    295 
    296 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    297 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
    298   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    299                 (__v2di) __W,
    300                 (__mmask8) __U);
    301 }
    302 
    303 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    304 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
    305   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
    306                 (__v2di) _mm_setzero_si128(),
    307                 (__mmask8) __U);
    308 }
    309 
    310 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    311 _mm256_cvtpd_epi64 (__m256d __A) {
    312   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    313                 (__v4di) _mm256_setzero_si256(),
    314                 (__mmask8) -1);
    315 }
    316 
    317 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    318 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
    319   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    320                 (__v4di) __W,
    321                 (__mmask8) __U);
    322 }
    323 
    324 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    325 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
    326   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
    327                 (__v4di) _mm256_setzero_si256(),
    328                 (__mmask8) __U);
    329 }
    330 
    331 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    332 _mm_cvtpd_epu64 (__m128d __A) {
    333   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    334                 (__v2di) _mm_setzero_si128(),
    335                 (__mmask8) -1);
    336 }
    337 
    338 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    339 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
    340   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    341                 (__v2di) __W,
    342                 (__mmask8) __U);
    343 }
    344 
    345 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    346 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
    347   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
    348                 (__v2di) _mm_setzero_si128(),
    349                 (__mmask8) __U);
    350 }
    351 
    352 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    353 _mm256_cvtpd_epu64 (__m256d __A) {
    354   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    355                 (__v4di) _mm256_setzero_si256(),
    356                 (__mmask8) -1);
    357 }
    358 
    359 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    360 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
    361   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    362                 (__v4di) __W,
    363                 (__mmask8) __U);
    364 }
    365 
    366 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    367 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
    368   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
    369                 (__v4di) _mm256_setzero_si256(),
    370                 (__mmask8) __U);
    371 }
    372 
    373 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    374 _mm_cvtps_epi64 (__m128 __A) {
    375   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    376                 (__v2di) _mm_setzero_si128(),
    377                 (__mmask8) -1);
    378 }
    379 
    380 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    381 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
    382   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    383                 (__v2di) __W,
    384                 (__mmask8) __U);
    385 }
    386 
    387 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    388 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
    389   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
    390                 (__v2di) _mm_setzero_si128(),
    391                 (__mmask8) __U);
    392 }
    393 
    394 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    395 _mm256_cvtps_epi64 (__m128 __A) {
    396   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    397                 (__v4di) _mm256_setzero_si256(),
    398                 (__mmask8) -1);
    399 }
    400 
    401 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    402 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
    403   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    404                 (__v4di) __W,
    405                 (__mmask8) __U);
    406 }
    407 
    408 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    409 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
    410   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
    411                 (__v4di) _mm256_setzero_si256(),
    412                 (__mmask8) __U);
    413 }
    414 
    415 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    416 _mm_cvtps_epu64 (__m128 __A) {
    417   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    418                 (__v2di) _mm_setzero_si128(),
    419                 (__mmask8) -1);
    420 }
    421 
    422 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    423 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
    424   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    425                 (__v2di) __W,
    426                 (__mmask8) __U);
    427 }
    428 
    429 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    430 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
    431   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
    432                 (__v2di) _mm_setzero_si128(),
    433                 (__mmask8) __U);
    434 }
    435 
    436 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    437 _mm256_cvtps_epu64 (__m128 __A) {
    438   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    439                 (__v4di) _mm256_setzero_si256(),
    440                 (__mmask8) -1);
    441 }
    442 
    443 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    444 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
    445   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    446                 (__v4di) __W,
    447                 (__mmask8) __U);
    448 }
    449 
    450 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    451 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
    452   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
    453                 (__v4di) _mm256_setzero_si256(),
    454                 (__mmask8) __U);
    455 }
    456 
    457 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    458 _mm_cvtepi64_pd (__m128i __A) {
    459   return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
    460 }
    461 
    462 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    463 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
    464   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    465                                               (__v2df)_mm_cvtepi64_pd(__A),
    466                                               (__v2df)__W);
    467 }
    468 
    469 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    470 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
    471   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    472                                               (__v2df)_mm_cvtepi64_pd(__A),
    473                                               (__v2df)_mm_setzero_pd());
    474 }
    475 
    476 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    477 _mm256_cvtepi64_pd (__m256i __A) {
    478   return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
    479 }
    480 
    481 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    482 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
    483   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    484                                               (__v4df)_mm256_cvtepi64_pd(__A),
    485                                               (__v4df)__W);
    486 }
    487 
    488 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    489 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
    490   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    491                                               (__v4df)_mm256_cvtepi64_pd(__A),
    492                                               (__v4df)_mm256_setzero_pd());
    493 }
    494 
    495 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    496 _mm_cvtepi64_ps (__m128i __A) {
    497   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    498                 (__v4sf) _mm_setzero_ps(),
    499                 (__mmask8) -1);
    500 }
    501 
    502 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    503 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
    504   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    505                 (__v4sf) __W,
    506                 (__mmask8) __U);
    507 }
    508 
    509 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    510 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
    511   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
    512                 (__v4sf) _mm_setzero_ps(),
    513                 (__mmask8) __U);
    514 }
    515 
    516 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    517 _mm256_cvtepi64_ps (__m256i __A) {
    518   return (__m128)__builtin_convertvector((__v4di)__A, __v4sf);
    519 }
    520 
    521 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    522 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
    523   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    524                                              (__v4sf)_mm256_cvtepi64_ps(__A),
    525                                              (__v4sf)__W);
    526 }
    527 
    528 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    529 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
    530   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    531                                              (__v4sf)_mm256_cvtepi64_ps(__A),
    532                                              (__v4sf)_mm_setzero_ps());
    533 }
    534 
    535 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    536 _mm_cvttpd_epi64 (__m128d __A) {
    537   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    538                 (__v2di) _mm_setzero_si128(),
    539                 (__mmask8) -1);
    540 }
    541 
    542 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    543 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
    544   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    545                 (__v2di) __W,
    546                 (__mmask8) __U);
    547 }
    548 
    549 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    550 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
    551   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
    552                 (__v2di) _mm_setzero_si128(),
    553                 (__mmask8) __U);
    554 }
    555 
    556 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    557 _mm256_cvttpd_epi64 (__m256d __A) {
    558   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    559                 (__v4di) _mm256_setzero_si256(),
    560                 (__mmask8) -1);
    561 }
    562 
    563 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    564 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
    565   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    566                 (__v4di) __W,
    567                 (__mmask8) __U);
    568 }
    569 
    570 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    571 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
    572   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
    573                 (__v4di) _mm256_setzero_si256(),
    574                 (__mmask8) __U);
    575 }
    576 
    577 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    578 _mm_cvttpd_epu64 (__m128d __A) {
    579   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    580                 (__v2di) _mm_setzero_si128(),
    581                 (__mmask8) -1);
    582 }
    583 
    584 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    585 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
    586   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    587                 (__v2di) __W,
    588                 (__mmask8) __U);
    589 }
    590 
    591 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    592 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
    593   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
    594                 (__v2di) _mm_setzero_si128(),
    595                 (__mmask8) __U);
    596 }
    597 
    598 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    599 _mm256_cvttpd_epu64 (__m256d __A) {
    600   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    601                 (__v4di) _mm256_setzero_si256(),
    602                 (__mmask8) -1);
    603 }
    604 
    605 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    606 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
    607   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    608                 (__v4di) __W,
    609                 (__mmask8) __U);
    610 }
    611 
    612 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    613 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
    614   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
    615                 (__v4di) _mm256_setzero_si256(),
    616                 (__mmask8) __U);
    617 }
    618 
    619 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    620 _mm_cvttps_epi64 (__m128 __A) {
    621   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    622                 (__v2di) _mm_setzero_si128(),
    623                 (__mmask8) -1);
    624 }
    625 
    626 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    627 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
    628   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    629                 (__v2di) __W,
    630                 (__mmask8) __U);
    631 }
    632 
    633 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    634 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
    635   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
    636                 (__v2di) _mm_setzero_si128(),
    637                 (__mmask8) __U);
    638 }
    639 
    640 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    641 _mm256_cvttps_epi64 (__m128 __A) {
    642   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    643                 (__v4di) _mm256_setzero_si256(),
    644                 (__mmask8) -1);
    645 }
    646 
    647 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    648 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
    649   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    650                 (__v4di) __W,
    651                 (__mmask8) __U);
    652 }
    653 
    654 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    655 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
    656   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
    657                 (__v4di) _mm256_setzero_si256(),
    658                 (__mmask8) __U);
    659 }
    660 
    661 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    662 _mm_cvttps_epu64 (__m128 __A) {
    663   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    664                 (__v2di) _mm_setzero_si128(),
    665                 (__mmask8) -1);
    666 }
    667 
    668 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    669 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
    670   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    671                 (__v2di) __W,
    672                 (__mmask8) __U);
    673 }
    674 
    675 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    676 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
    677   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
    678                 (__v2di) _mm_setzero_si128(),
    679                 (__mmask8) __U);
    680 }
    681 
    682 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    683 _mm256_cvttps_epu64 (__m128 __A) {
    684   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    685                 (__v4di) _mm256_setzero_si256(),
    686                 (__mmask8) -1);
    687 }
    688 
    689 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    690 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
    691   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    692                 (__v4di) __W,
    693                 (__mmask8) __U);
    694 }
    695 
    696 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    697 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
    698   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
    699                 (__v4di) _mm256_setzero_si256(),
    700                 (__mmask8) __U);
    701 }
    702 
    703 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    704 _mm_cvtepu64_pd (__m128i __A) {
    705   return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
    706 }
    707 
    708 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    709 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
    710   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    711                                               (__v2df)_mm_cvtepu64_pd(__A),
    712                                               (__v2df)__W);
    713 }
    714 
    715 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    716 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
    717   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
    718                                               (__v2df)_mm_cvtepu64_pd(__A),
    719                                               (__v2df)_mm_setzero_pd());
    720 }
    721 
    722 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    723 _mm256_cvtepu64_pd (__m256i __A) {
    724   return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
    725 }
    726 
    727 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    728 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
    729   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    730                                               (__v4df)_mm256_cvtepu64_pd(__A),
    731                                               (__v4df)__W);
    732 }
    733 
    734 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    735 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
    736   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
    737                                               (__v4df)_mm256_cvtepu64_pd(__A),
    738                                               (__v4df)_mm256_setzero_pd());
    739 }
    740 
    741 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    742 _mm_cvtepu64_ps (__m128i __A) {
    743   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    744                 (__v4sf) _mm_setzero_ps(),
    745                 (__mmask8) -1);
    746 }
    747 
    748 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    749 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
    750   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    751                 (__v4sf) __W,
    752                 (__mmask8) __U);
    753 }
    754 
    755 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    756 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
    757   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
    758                 (__v4sf) _mm_setzero_ps(),
    759                 (__mmask8) __U);
    760 }
    761 
    762 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    763 _mm256_cvtepu64_ps (__m256i __A) {
    764   return (__m128)__builtin_convertvector((__v4du)__A, __v4sf);
    765 }
    766 
    767 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    768 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
    769   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    770                                              (__v4sf)_mm256_cvtepu64_ps(__A),
    771                                              (__v4sf)__W);
    772 }
    773 
    774 static __inline__ __m128 __DEFAULT_FN_ATTRS256
    775 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
    776   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
    777                                              (__v4sf)_mm256_cvtepu64_ps(__A),
    778                                              (__v4sf)_mm_setzero_ps());
    779 }
    780 
    781 #define _mm_range_pd(A, B, C) \
    782   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    783                                            (__v2df)(__m128d)(B), (int)(C), \
    784                                            (__v2df)_mm_setzero_pd(), \
    785                                            (__mmask8)-1))
    786 
    787 #define _mm_mask_range_pd(W, U, A, B, C) \
    788   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    789                                            (__v2df)(__m128d)(B), (int)(C), \
    790                                            (__v2df)(__m128d)(W), \
    791                                            (__mmask8)(U)))
    792 
    793 #define _mm_maskz_range_pd(U, A, B, C) \
    794   ((__m128d)__builtin_ia32_rangepd128_mask((__v2df)(__m128d)(A), \
    795                                            (__v2df)(__m128d)(B), (int)(C), \
    796                                            (__v2df)_mm_setzero_pd(), \
    797                                            (__mmask8)(U)))
    798 
    799 #define _mm256_range_pd(A, B, C) \
    800   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    801                                            (__v4df)(__m256d)(B), (int)(C), \
    802                                            (__v4df)_mm256_setzero_pd(), \
    803                                            (__mmask8)-1))
    804 
    805 #define _mm256_mask_range_pd(W, U, A, B, C) \
    806   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    807                                            (__v4df)(__m256d)(B), (int)(C), \
    808                                            (__v4df)(__m256d)(W), \
    809                                            (__mmask8)(U)))
    810 
    811 #define _mm256_maskz_range_pd(U, A, B, C) \
    812   ((__m256d)__builtin_ia32_rangepd256_mask((__v4df)(__m256d)(A), \
    813                                            (__v4df)(__m256d)(B), (int)(C), \
    814                                            (__v4df)_mm256_setzero_pd(), \
    815                                            (__mmask8)(U)))
    816 
    817 #define _mm_range_ps(A, B, C) \
    818   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    819                                           (__v4sf)(__m128)(B), (int)(C), \
    820                                           (__v4sf)_mm_setzero_ps(), \
    821                                           (__mmask8)-1))
    822 
    823 #define _mm_mask_range_ps(W, U, A, B, C) \
    824   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    825                                           (__v4sf)(__m128)(B), (int)(C), \
    826                                           (__v4sf)(__m128)(W), (__mmask8)(U)))
    827 
    828 #define _mm_maskz_range_ps(U, A, B, C) \
    829   ((__m128)__builtin_ia32_rangeps128_mask((__v4sf)(__m128)(A), \
    830                                           (__v4sf)(__m128)(B), (int)(C), \
    831                                           (__v4sf)_mm_setzero_ps(), \
    832                                           (__mmask8)(U)))
    833 
    834 #define _mm256_range_ps(A, B, C) \
    835   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    836                                           (__v8sf)(__m256)(B), (int)(C), \
    837                                           (__v8sf)_mm256_setzero_ps(), \
    838                                           (__mmask8)-1))
    839 
    840 #define _mm256_mask_range_ps(W, U, A, B, C) \
    841   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    842                                           (__v8sf)(__m256)(B), (int)(C), \
    843                                           (__v8sf)(__m256)(W), (__mmask8)(U)))
    844 
    845 #define _mm256_maskz_range_ps(U, A, B, C) \
    846   ((__m256)__builtin_ia32_rangeps256_mask((__v8sf)(__m256)(A), \
    847                                           (__v8sf)(__m256)(B), (int)(C), \
    848                                           (__v8sf)_mm256_setzero_ps(), \
    849                                           (__mmask8)(U)))
    850 
    851 #define _mm_reduce_pd(A, B) \
    852   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    853                                             (__v2df)_mm_setzero_pd(), \
    854                                             (__mmask8)-1))
    855 
    856 #define _mm_mask_reduce_pd(W, U, A, B) \
    857   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    858                                             (__v2df)(__m128d)(W), \
    859                                             (__mmask8)(U)))
    860 
    861 #define _mm_maskz_reduce_pd(U, A, B) \
    862   ((__m128d)__builtin_ia32_reducepd128_mask((__v2df)(__m128d)(A), (int)(B), \
    863                                             (__v2df)_mm_setzero_pd(), \
    864                                             (__mmask8)(U)))
    865 
    866 #define _mm256_reduce_pd(A, B) \
    867   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    868                                             (__v4df)_mm256_setzero_pd(), \
    869                                             (__mmask8)-1))
    870 
    871 #define _mm256_mask_reduce_pd(W, U, A, B) \
    872   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    873                                             (__v4df)(__m256d)(W), \
    874                                             (__mmask8)(U)))
    875 
    876 #define _mm256_maskz_reduce_pd(U, A, B) \
    877   ((__m256d)__builtin_ia32_reducepd256_mask((__v4df)(__m256d)(A), (int)(B), \
    878                                             (__v4df)_mm256_setzero_pd(), \
    879                                             (__mmask8)(U)))
    880 
    881 #define _mm_reduce_ps(A, B) \
    882   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    883                                            (__v4sf)_mm_setzero_ps(), \
    884                                            (__mmask8)-1))
    885 
    886 #define _mm_mask_reduce_ps(W, U, A, B) \
    887   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    888                                            (__v4sf)(__m128)(W), \
    889                                            (__mmask8)(U)))
    890 
    891 #define _mm_maskz_reduce_ps(U, A, B) \
    892   ((__m128)__builtin_ia32_reduceps128_mask((__v4sf)(__m128)(A), (int)(B), \
    893                                            (__v4sf)_mm_setzero_ps(), \
    894                                            (__mmask8)(U)))
    895 
    896 #define _mm256_reduce_ps(A, B) \
    897   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    898                                            (__v8sf)_mm256_setzero_ps(), \
    899                                            (__mmask8)-1))
    900 
    901 #define _mm256_mask_reduce_ps(W, U, A, B) \
    902   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    903                                            (__v8sf)(__m256)(W), \
    904                                            (__mmask8)(U)))
    905 
    906 #define _mm256_maskz_reduce_ps(U, A, B) \
    907   ((__m256)__builtin_ia32_reduceps256_mask((__v8sf)(__m256)(A), (int)(B), \
    908                                            (__v8sf)_mm256_setzero_ps(), \
    909                                            (__mmask8)(U)))
    910 
    911 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
    912 _mm_movepi32_mask (__m128i __A)
    913 {
    914   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
    915 }
    916 
    917 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
    918 _mm256_movepi32_mask (__m256i __A)
    919 {
    920   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
    921 }
    922 
    923 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    924 _mm_movm_epi32 (__mmask8 __A)
    925 {
    926   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
    927 }
    928 
    929 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    930 _mm256_movm_epi32 (__mmask8 __A)
    931 {
    932   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
    933 }
    934 
    935 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    936 _mm_movm_epi64 (__mmask8 __A)
    937 {
    938   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
    939 }
    940 
    941 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    942 _mm256_movm_epi64 (__mmask8 __A)
    943 {
    944   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
    945 }
    946 
    947 static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
    948 _mm_movepi64_mask (__m128i __A)
    949 {
    950   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
    951 }
    952 
    953 static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
    954 _mm256_movepi64_mask (__m256i __A)
    955 {
    956   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
    957 }
    958 
    959 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    960 _mm256_broadcast_f32x2 (__m128 __A)
    961 {
    962   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
    963                                          0, 1, 0, 1, 0, 1, 0, 1);
    964 }
    965 
    966 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    967 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
    968 {
    969   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
    970                                              (__v8sf)_mm256_broadcast_f32x2(__A),
    971                                              (__v8sf)__O);
    972 }
    973 
    974 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    975 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
    976 {
    977   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
    978                                              (__v8sf)_mm256_broadcast_f32x2(__A),
    979                                              (__v8sf)_mm256_setzero_ps());
    980 }
    981 
    982 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    983 _mm256_broadcast_f64x2(__m128d __A)
    984 {
    985   return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
    986                                           0, 1, 0, 1);
    987 }
    988 
    989 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    990 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
    991 {
    992   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
    993                                             (__v4df)_mm256_broadcast_f64x2(__A),
    994                                             (__v4df)__O);
    995 }
    996 
    997 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    998 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
    999 {
   1000   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
   1001                                             (__v4df)_mm256_broadcast_f64x2(__A),
   1002                                             (__v4df)_mm256_setzero_pd());
   1003 }
   1004 
   1005 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1006 _mm_broadcast_i32x2 (__m128i __A)
   1007 {
   1008   return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   1009                                           0, 1, 0, 1);
   1010 }
   1011 
   1012 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1013 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
   1014 {
   1015   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   1016                                              (__v4si)_mm_broadcast_i32x2(__A),
   1017                                              (__v4si)__O);
   1018 }
   1019 
   1020 static __inline__ __m128i __DEFAULT_FN_ATTRS128
   1021 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   1022 {
   1023   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
   1024                                              (__v4si)_mm_broadcast_i32x2(__A),
   1025                                              (__v4si)_mm_setzero_si128());
   1026 }
   1027 
   1028 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1029 _mm256_broadcast_i32x2 (__m128i __A)
   1030 {
   1031   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
   1032                                           0, 1, 0, 1, 0, 1, 0, 1);
   1033 }
   1034 
   1035 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1036 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
   1037 {
   1038   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   1039                                              (__v8si)_mm256_broadcast_i32x2(__A),
   1040                                              (__v8si)__O);
   1041 }
   1042 
   1043 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1044 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
   1045 {
   1046   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
   1047                                              (__v8si)_mm256_broadcast_i32x2(__A),
   1048                                              (__v8si)_mm256_setzero_si256());
   1049 }
   1050 
   1051 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1052 _mm256_broadcast_i64x2(__m128i __A)
   1053 {
   1054   return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
   1055                                           0, 1, 0, 1);
   1056 }
   1057 
   1058 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1059 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
   1060 {
   1061   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   1062                                             (__v4di)_mm256_broadcast_i64x2(__A),
   1063                                             (__v4di)__O);
   1064 }
   1065 
   1066 static __inline__ __m256i __DEFAULT_FN_ATTRS256
   1067 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
   1068 {
   1069   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
   1070                                             (__v4di)_mm256_broadcast_i64x2(__A),
   1071                                             (__v4di)_mm256_setzero_si256());
   1072 }
   1073 
   1074 #define _mm256_extractf64x2_pd(A, imm) \
   1075   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1076                                                  (int)(imm), \
   1077                                                  (__v2df)_mm_undefined_pd(), \
   1078                                                  (__mmask8)-1))
   1079 
   1080 #define _mm256_mask_extractf64x2_pd(W, U, A, imm) \
   1081   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1082                                                  (int)(imm), \
   1083                                                  (__v2df)(__m128d)(W), \
   1084                                                  (__mmask8)(U)))
   1085 
   1086 #define _mm256_maskz_extractf64x2_pd(U, A, imm) \
   1087   ((__m128d)__builtin_ia32_extractf64x2_256_mask((__v4df)(__m256d)(A), \
   1088                                                  (int)(imm), \
   1089                                                  (__v2df)_mm_setzero_pd(), \
   1090                                                  (__mmask8)(U)))
   1091 
   1092 #define _mm256_extracti64x2_epi64(A, imm) \
   1093   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1094                                                 (int)(imm), \
   1095                                                 (__v2di)_mm_undefined_si128(), \
   1096                                                 (__mmask8)-1))
   1097 
   1098 #define _mm256_mask_extracti64x2_epi64(W, U, A, imm) \
   1099   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1100                                                  (int)(imm), \
   1101                                                  (__v2di)(__m128i)(W), \
   1102                                                  (__mmask8)(U)))
   1103 
   1104 #define _mm256_maskz_extracti64x2_epi64(U, A, imm) \
   1105   ((__m128i)__builtin_ia32_extracti64x2_256_mask((__v4di)(__m256i)(A), \
   1106                                                  (int)(imm), \
   1107                                                  (__v2di)_mm_setzero_si128(), \
   1108                                                  (__mmask8)(U)))
   1109 
   1110 #define _mm256_insertf64x2(A, B, imm) \
   1111   ((__m256d)__builtin_ia32_insertf64x2_256((__v4df)(__m256d)(A), \
   1112                                            (__v2df)(__m128d)(B), (int)(imm)))
   1113 
   1114 #define _mm256_mask_insertf64x2(W, U, A, B, imm) \
   1115   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   1116                                   (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
   1117                                   (__v4df)(__m256d)(W)))
   1118 
   1119 #define _mm256_maskz_insertf64x2(U, A, B, imm) \
   1120   ((__m256d)__builtin_ia32_selectpd_256((__mmask8)(U), \
   1121                                   (__v4df)_mm256_insertf64x2((A), (B), (imm)), \
   1122                                   (__v4df)_mm256_setzero_pd()))
   1123 
   1124 #define _mm256_inserti64x2(A, B, imm) \
   1125   ((__m256i)__builtin_ia32_inserti64x2_256((__v4di)(__m256i)(A), \
   1126                                            (__v2di)(__m128i)(B), (int)(imm)))
   1127 
   1128 #define _mm256_mask_inserti64x2(W, U, A, B, imm) \
   1129   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   1130                                    (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
   1131                                    (__v4di)(__m256i)(W)))
   1132 
   1133 #define _mm256_maskz_inserti64x2(U, A, B, imm) \
   1134   ((__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
   1135                                    (__v4di)_mm256_inserti64x2((A), (B), (imm)), \
   1136                                    (__v4di)_mm256_setzero_si256()))
   1137 
   1138 #define _mm_mask_fpclass_pd_mask(U, A, imm) \
   1139   ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
   1140                                               (__mmask8)(U)))
   1141 
   1142 #define _mm_fpclass_pd_mask(A, imm) \
   1143   ((__mmask8)__builtin_ia32_fpclasspd128_mask((__v2df)(__m128d)(A), (int)(imm), \
   1144                                               (__mmask8)-1))
   1145 
   1146 #define _mm256_mask_fpclass_pd_mask(U, A, imm) \
   1147   ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
   1148                                               (__mmask8)(U)))
   1149 
   1150 #define _mm256_fpclass_pd_mask(A, imm) \
   1151   ((__mmask8)__builtin_ia32_fpclasspd256_mask((__v4df)(__m256d)(A), (int)(imm), \
   1152                                               (__mmask8)-1))
   1153 
   1154 #define _mm_mask_fpclass_ps_mask(U, A, imm) \
   1155   ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
   1156                                               (__mmask8)(U)))
   1157 
   1158 #define _mm_fpclass_ps_mask(A, imm) \
   1159   ((__mmask8)__builtin_ia32_fpclassps128_mask((__v4sf)(__m128)(A), (int)(imm), \
   1160                                               (__mmask8)-1))
   1161 
   1162 #define _mm256_mask_fpclass_ps_mask(U, A, imm) \
   1163   ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
   1164                                               (__mmask8)(U)))
   1165 
   1166 #define _mm256_fpclass_ps_mask(A, imm) \
   1167   ((__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
   1168                                               (__mmask8)-1))
   1169 
   1170 #undef __DEFAULT_FN_ATTRS128
   1171 #undef __DEFAULT_FN_ATTRS256
   1172 
   1173 #endif