zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx512bwintrin.h (77845B) - Raw


      1 /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------===
      2  *
      3  *
      4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      5  * See https://llvm.org/LICENSE.txt for license information.
      6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      7  *
      8  *===-----------------------------------------------------------------------===
      9  */
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <avx512bwintrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __AVX512BWINTRIN_H
     15 #define __AVX512BWINTRIN_H
     16 
     17 typedef unsigned int __mmask32;
     18 typedef unsigned long long __mmask64;
     19 
     20 /* Define the default attributes for the functions in this file. */
     21 #define __DEFAULT_FN_ATTRS512                                                  \
     22   __attribute__((__always_inline__, __nodebug__,                               \
     23                  __target__("avx512bw,evex512"), __min_vector_width__(512)))
     24 #define __DEFAULT_FN_ATTRS                                                     \
     25   __attribute__((__always_inline__, __nodebug__,                               \
     26                  __target__("avx512bw,no-evex512")))
     27 
     28 static __inline __mmask32 __DEFAULT_FN_ATTRS
     29 _knot_mask32(__mmask32 __M)
     30 {
     31   return __builtin_ia32_knotsi(__M);
     32 }
     33 
     34 static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
     35   return __builtin_ia32_knotdi(__M);
     36 }
     37 
     38 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
     39 _kand_mask32(__mmask32 __A, __mmask32 __B)
     40 {
     41   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
     42 }
     43 
     44 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
     45                                                             __mmask64 __B) {
     46   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
     47 }
     48 
     49 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
     50 _kandn_mask32(__mmask32 __A, __mmask32 __B)
     51 {
     52   return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
     53 }
     54 
     55 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
     56                                                              __mmask64 __B) {
     57   return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
     58 }
     59 
     60 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
     61 _kor_mask32(__mmask32 __A, __mmask32 __B)
     62 {
     63   return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
     64 }
     65 
     66 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
     67                                                            __mmask64 __B) {
     68   return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
     69 }
     70 
     71 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
     72 _kxnor_mask32(__mmask32 __A, __mmask32 __B)
     73 {
     74   return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
     75 }
     76 
     77 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
     78                                                              __mmask64 __B) {
     79   return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
     80 }
     81 
     82 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
     83 _kxor_mask32(__mmask32 __A, __mmask32 __B)
     84 {
     85   return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
     86 }
     87 
     88 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
     89                                                             __mmask64 __B) {
     90   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
     91 }
     92 
     93 static __inline__ unsigned char __DEFAULT_FN_ATTRS
     94 _kortestc_mask32_u8(__mmask32 __A, __mmask32 __B)
     95 {
     96   return (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
     97 }
     98 
     99 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    100 _kortestz_mask32_u8(__mmask32 __A, __mmask32 __B)
    101 {
    102   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
    103 }
    104 
    105 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    106 _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
    107   *__C = (unsigned char)__builtin_ia32_kortestcsi(__A, __B);
    108   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
    109 }
    110 
    111 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    112 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
    113   return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
    114 }
    115 
    116 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    117 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
    118   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
    119 }
    120 
    121 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    122 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
    123   *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
    124   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
    125 }
    126 
    127 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    128 _ktestc_mask32_u8(__mmask32 __A, __mmask32 __B)
    129 {
    130   return (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
    131 }
    132 
    133 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    134 _ktestz_mask32_u8(__mmask32 __A, __mmask32 __B)
    135 {
    136   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
    137 }
    138 
    139 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    140 _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
    141   *__C = (unsigned char)__builtin_ia32_ktestcsi(__A, __B);
    142   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
    143 }
    144 
    145 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    146 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
    147   return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
    148 }
    149 
    150 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    151 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
    152   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
    153 }
    154 
    155 static __inline__ unsigned char __DEFAULT_FN_ATTRS
    156 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
    157   *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
    158   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
    159 }
    160 
    161 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
    162 _kadd_mask32(__mmask32 __A, __mmask32 __B)
    163 {
    164   return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
    165 }
    166 
    167 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
    168                                                             __mmask64 __B) {
    169   return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
    170 }
    171 
    172 #define _kshiftli_mask32(A, I) \
    173   ((__mmask32)__builtin_ia32_kshiftlisi((__mmask32)(A), (unsigned int)(I)))
    174 
    175 #define _kshiftri_mask32(A, I) \
    176   ((__mmask32)__builtin_ia32_kshiftrisi((__mmask32)(A), (unsigned int)(I)))
    177 
    178 #define _kshiftli_mask64(A, I) \
    179   ((__mmask64)__builtin_ia32_kshiftlidi((__mmask64)(A), (unsigned int)(I)))
    180 
    181 #define _kshiftri_mask64(A, I) \
    182   ((__mmask64)__builtin_ia32_kshiftridi((__mmask64)(A), (unsigned int)(I)))
    183 
    184 static __inline__ unsigned int __DEFAULT_FN_ATTRS
    185 _cvtmask32_u32(__mmask32 __A) {
    186   return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
    187 }
    188 
    189 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
    190 _cvtmask64_u64(__mmask64 __A) {
    191   return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
    192 }
    193 
    194 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
    195 _cvtu32_mask32(unsigned int __A) {
    196   return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
    197 }
    198 
    199 static __inline__ __mmask64 __DEFAULT_FN_ATTRS
    200 _cvtu64_mask64(unsigned long long __A) {
    201   return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
    202 }
    203 
    204 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
    205 _load_mask32(__mmask32 *__A) {
    206   return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
    207 }
    208 
    209 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
    210   return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
    211 }
    212 
    213 static __inline__ void __DEFAULT_FN_ATTRS
    214 _store_mask32(__mmask32 *__A, __mmask32 __B) {
    215   *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
    216 }
    217 
    218 static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
    219                                                         __mmask64 __B) {
    220   *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
    221 }
    222 
    223 /* Integer compare */
    224 
    225 #define _mm512_cmp_epi8_mask(a, b, p) \
    226   ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
    227                                           (__v64qi)(__m512i)(b), (int)(p), \
    228                                           (__mmask64)-1))
    229 
    230 #define _mm512_mask_cmp_epi8_mask(m, a, b, p) \
    231   ((__mmask64)__builtin_ia32_cmpb512_mask((__v64qi)(__m512i)(a), \
    232                                           (__v64qi)(__m512i)(b), (int)(p), \
    233                                           (__mmask64)(m)))
    234 
    235 #define _mm512_cmp_epu8_mask(a, b, p) \
    236   ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
    237                                            (__v64qi)(__m512i)(b), (int)(p), \
    238                                            (__mmask64)-1))
    239 
    240 #define _mm512_mask_cmp_epu8_mask(m, a, b, p) \
    241   ((__mmask64)__builtin_ia32_ucmpb512_mask((__v64qi)(__m512i)(a), \
    242                                            (__v64qi)(__m512i)(b), (int)(p), \
    243                                            (__mmask64)(m)))
    244 
    245 #define _mm512_cmp_epi16_mask(a, b, p) \
    246   ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
    247                                           (__v32hi)(__m512i)(b), (int)(p), \
    248                                           (__mmask32)-1))
    249 
    250 #define _mm512_mask_cmp_epi16_mask(m, a, b, p) \
    251   ((__mmask32)__builtin_ia32_cmpw512_mask((__v32hi)(__m512i)(a), \
    252                                           (__v32hi)(__m512i)(b), (int)(p), \
    253                                           (__mmask32)(m)))
    254 
    255 #define _mm512_cmp_epu16_mask(a, b, p) \
    256   ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
    257                                            (__v32hi)(__m512i)(b), (int)(p), \
    258                                            (__mmask32)-1))
    259 
    260 #define _mm512_mask_cmp_epu16_mask(m, a, b, p) \
    261   ((__mmask32)__builtin_ia32_ucmpw512_mask((__v32hi)(__m512i)(a), \
    262                                            (__v32hi)(__m512i)(b), (int)(p), \
    263                                            (__mmask32)(m)))
    264 
    265 #define _mm512_cmpeq_epi8_mask(A, B) \
    266     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_EQ)
    267 #define _mm512_mask_cmpeq_epi8_mask(k, A, B) \
    268     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_EQ)
    269 #define _mm512_cmpge_epi8_mask(A, B) \
    270     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GE)
    271 #define _mm512_mask_cmpge_epi8_mask(k, A, B) \
    272     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GE)
    273 #define _mm512_cmpgt_epi8_mask(A, B) \
    274     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_GT)
    275 #define _mm512_mask_cmpgt_epi8_mask(k, A, B) \
    276     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_GT)
    277 #define _mm512_cmple_epi8_mask(A, B) \
    278     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LE)
    279 #define _mm512_mask_cmple_epi8_mask(k, A, B) \
    280     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LE)
    281 #define _mm512_cmplt_epi8_mask(A, B) \
    282     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_LT)
    283 #define _mm512_mask_cmplt_epi8_mask(k, A, B) \
    284     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_LT)
    285 #define _mm512_cmpneq_epi8_mask(A, B) \
    286     _mm512_cmp_epi8_mask((A), (B), _MM_CMPINT_NE)
    287 #define _mm512_mask_cmpneq_epi8_mask(k, A, B) \
    288     _mm512_mask_cmp_epi8_mask((k), (A), (B), _MM_CMPINT_NE)
    289 
    290 #define _mm512_cmpeq_epu8_mask(A, B) \
    291     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_EQ)
    292 #define _mm512_mask_cmpeq_epu8_mask(k, A, B) \
    293     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_EQ)
    294 #define _mm512_cmpge_epu8_mask(A, B) \
    295     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GE)
    296 #define _mm512_mask_cmpge_epu8_mask(k, A, B) \
    297     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GE)
    298 #define _mm512_cmpgt_epu8_mask(A, B) \
    299     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_GT)
    300 #define _mm512_mask_cmpgt_epu8_mask(k, A, B) \
    301     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_GT)
    302 #define _mm512_cmple_epu8_mask(A, B) \
    303     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LE)
    304 #define _mm512_mask_cmple_epu8_mask(k, A, B) \
    305     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LE)
    306 #define _mm512_cmplt_epu8_mask(A, B) \
    307     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_LT)
    308 #define _mm512_mask_cmplt_epu8_mask(k, A, B) \
    309     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_LT)
    310 #define _mm512_cmpneq_epu8_mask(A, B) \
    311     _mm512_cmp_epu8_mask((A), (B), _MM_CMPINT_NE)
    312 #define _mm512_mask_cmpneq_epu8_mask(k, A, B) \
    313     _mm512_mask_cmp_epu8_mask((k), (A), (B), _MM_CMPINT_NE)
    314 
    315 #define _mm512_cmpeq_epi16_mask(A, B) \
    316     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_EQ)
    317 #define _mm512_mask_cmpeq_epi16_mask(k, A, B) \
    318     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_EQ)
    319 #define _mm512_cmpge_epi16_mask(A, B) \
    320     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GE)
    321 #define _mm512_mask_cmpge_epi16_mask(k, A, B) \
    322     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GE)
    323 #define _mm512_cmpgt_epi16_mask(A, B) \
    324     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_GT)
    325 #define _mm512_mask_cmpgt_epi16_mask(k, A, B) \
    326     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_GT)
    327 #define _mm512_cmple_epi16_mask(A, B) \
    328     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LE)
    329 #define _mm512_mask_cmple_epi16_mask(k, A, B) \
    330     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LE)
    331 #define _mm512_cmplt_epi16_mask(A, B) \
    332     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_LT)
    333 #define _mm512_mask_cmplt_epi16_mask(k, A, B) \
    334     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_LT)
    335 #define _mm512_cmpneq_epi16_mask(A, B) \
    336     _mm512_cmp_epi16_mask((A), (B), _MM_CMPINT_NE)
    337 #define _mm512_mask_cmpneq_epi16_mask(k, A, B) \
    338     _mm512_mask_cmp_epi16_mask((k), (A), (B), _MM_CMPINT_NE)
    339 
    340 #define _mm512_cmpeq_epu16_mask(A, B) \
    341     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_EQ)
    342 #define _mm512_mask_cmpeq_epu16_mask(k, A, B) \
    343     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_EQ)
    344 #define _mm512_cmpge_epu16_mask(A, B) \
    345     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GE)
    346 #define _mm512_mask_cmpge_epu16_mask(k, A, B) \
    347     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GE)
    348 #define _mm512_cmpgt_epu16_mask(A, B) \
    349     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_GT)
    350 #define _mm512_mask_cmpgt_epu16_mask(k, A, B) \
    351     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_GT)
    352 #define _mm512_cmple_epu16_mask(A, B) \
    353     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LE)
    354 #define _mm512_mask_cmple_epu16_mask(k, A, B) \
    355     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LE)
    356 #define _mm512_cmplt_epu16_mask(A, B) \
    357     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_LT)
    358 #define _mm512_mask_cmplt_epu16_mask(k, A, B) \
    359     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_LT)
    360 #define _mm512_cmpneq_epu16_mask(A, B) \
    361     _mm512_cmp_epu16_mask((A), (B), _MM_CMPINT_NE)
    362 #define _mm512_mask_cmpneq_epu16_mask(k, A, B) \
    363     _mm512_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
    364 
    365 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    366 _mm512_add_epi8 (__m512i __A, __m512i __B) {
    367   return (__m512i) ((__v64qu) __A + (__v64qu) __B);
    368 }
    369 
    370 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    371 _mm512_mask_add_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
    372   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    373                                              (__v64qi)_mm512_add_epi8(__A, __B),
    374                                              (__v64qi)__W);
    375 }
    376 
    377 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    378 _mm512_maskz_add_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
    379   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    380                                              (__v64qi)_mm512_add_epi8(__A, __B),
    381                                              (__v64qi)_mm512_setzero_si512());
    382 }
    383 
    384 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    385 _mm512_sub_epi8 (__m512i __A, __m512i __B) {
    386   return (__m512i) ((__v64qu) __A - (__v64qu) __B);
    387 }
    388 
    389 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    390 _mm512_mask_sub_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
    391   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    392                                              (__v64qi)_mm512_sub_epi8(__A, __B),
    393                                              (__v64qi)__W);
    394 }
    395 
    396 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    397 _mm512_maskz_sub_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
    398   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    399                                              (__v64qi)_mm512_sub_epi8(__A, __B),
    400                                              (__v64qi)_mm512_setzero_si512());
    401 }
    402 
    403 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    404 _mm512_add_epi16 (__m512i __A, __m512i __B) {
    405   return (__m512i) ((__v32hu) __A + (__v32hu) __B);
    406 }
    407 
    408 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    409 _mm512_mask_add_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
    410   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    411                                              (__v32hi)_mm512_add_epi16(__A, __B),
    412                                              (__v32hi)__W);
    413 }
    414 
    415 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    416 _mm512_maskz_add_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
    417   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    418                                              (__v32hi)_mm512_add_epi16(__A, __B),
    419                                              (__v32hi)_mm512_setzero_si512());
    420 }
    421 
    422 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    423 _mm512_sub_epi16 (__m512i __A, __m512i __B) {
    424   return (__m512i) ((__v32hu) __A - (__v32hu) __B);
    425 }
    426 
    427 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    428 _mm512_mask_sub_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
    429   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    430                                              (__v32hi)_mm512_sub_epi16(__A, __B),
    431                                              (__v32hi)__W);
    432 }
    433 
    434 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    435 _mm512_maskz_sub_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
    436   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    437                                              (__v32hi)_mm512_sub_epi16(__A, __B),
    438                                              (__v32hi)_mm512_setzero_si512());
    439 }
    440 
    441 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    442 _mm512_mullo_epi16 (__m512i __A, __m512i __B) {
    443   return (__m512i) ((__v32hu) __A * (__v32hu) __B);
    444 }
    445 
    446 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    447 _mm512_mask_mullo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
    448   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    449                                              (__v32hi)_mm512_mullo_epi16(__A, __B),
    450                                              (__v32hi)__W);
    451 }
    452 
    453 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    454 _mm512_maskz_mullo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
    455   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    456                                              (__v32hi)_mm512_mullo_epi16(__A, __B),
    457                                              (__v32hi)_mm512_setzero_si512());
    458 }
    459 
    460 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    461 _mm512_mask_blend_epi8 (__mmask64 __U, __m512i __A, __m512i __W)
    462 {
    463   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
    464               (__v64qi) __W,
    465               (__v64qi) __A);
    466 }
    467 
    468 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    469 _mm512_mask_blend_epi16 (__mmask32 __U, __m512i __A, __m512i __W)
    470 {
    471   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
    472               (__v32hi) __W,
    473               (__v32hi) __A);
    474 }
    475 
    476 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    477 _mm512_abs_epi8 (__m512i __A)
    478 {
    479   return (__m512i)__builtin_elementwise_abs((__v64qs)__A);
    480 }
    481 
    482 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    483 _mm512_mask_abs_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
    484 {
    485   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    486                                              (__v64qi)_mm512_abs_epi8(__A),
    487                                              (__v64qi)__W);
    488 }
    489 
    490 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    491 _mm512_maskz_abs_epi8 (__mmask64 __U, __m512i __A)
    492 {
    493   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    494                                              (__v64qi)_mm512_abs_epi8(__A),
    495                                              (__v64qi)_mm512_setzero_si512());
    496 }
    497 
    498 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    499 _mm512_abs_epi16 (__m512i __A)
    500 {
    501   return (__m512i)__builtin_elementwise_abs((__v32hi)__A);
    502 }
    503 
    504 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    505 _mm512_mask_abs_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
    506 {
    507   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    508                                              (__v32hi)_mm512_abs_epi16(__A),
    509                                              (__v32hi)__W);
    510 }
    511 
    512 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    513 _mm512_maskz_abs_epi16 (__mmask32 __U, __m512i __A)
    514 {
    515   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    516                                              (__v32hi)_mm512_abs_epi16(__A),
    517                                              (__v32hi)_mm512_setzero_si512());
    518 }
    519 
    520 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    521 _mm512_packs_epi32(__m512i __A, __m512i __B)
    522 {
    523   return (__m512i)__builtin_ia32_packssdw512((__v16si)__A, (__v16si)__B);
    524 }
    525 
    526 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    527 _mm512_maskz_packs_epi32(__mmask32 __M, __m512i __A, __m512i __B)
    528 {
    529   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    530                                        (__v32hi)_mm512_packs_epi32(__A, __B),
    531                                        (__v32hi)_mm512_setzero_si512());
    532 }
    533 
    534 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    535 _mm512_mask_packs_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
    536 {
    537   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    538                                        (__v32hi)_mm512_packs_epi32(__A, __B),
    539                                        (__v32hi)__W);
    540 }
    541 
    542 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    543 _mm512_packs_epi16(__m512i __A, __m512i __B)
    544 {
    545   return (__m512i)__builtin_ia32_packsswb512((__v32hi)__A, (__v32hi) __B);
    546 }
    547 
    548 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    549 _mm512_mask_packs_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    550 {
    551   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    552                                         (__v64qi)_mm512_packs_epi16(__A, __B),
    553                                         (__v64qi)__W);
    554 }
    555 
    556 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    557 _mm512_maskz_packs_epi16(__mmask64 __M, __m512i __A, __m512i __B)
    558 {
    559   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    560                                         (__v64qi)_mm512_packs_epi16(__A, __B),
    561                                         (__v64qi)_mm512_setzero_si512());
    562 }
    563 
    564 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    565 _mm512_packus_epi32(__m512i __A, __m512i __B)
    566 {
    567   return (__m512i)__builtin_ia32_packusdw512((__v16si) __A, (__v16si) __B);
    568 }
    569 
    570 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    571 _mm512_maskz_packus_epi32(__mmask32 __M, __m512i __A, __m512i __B)
    572 {
    573   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    574                                        (__v32hi)_mm512_packus_epi32(__A, __B),
    575                                        (__v32hi)_mm512_setzero_si512());
    576 }
    577 
    578 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    579 _mm512_mask_packus_epi32(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
    580 {
    581   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    582                                        (__v32hi)_mm512_packus_epi32(__A, __B),
    583                                        (__v32hi)__W);
    584 }
    585 
    586 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    587 _mm512_packus_epi16(__m512i __A, __m512i __B)
    588 {
    589   return (__m512i)__builtin_ia32_packuswb512((__v32hi) __A, (__v32hi) __B);
    590 }
    591 
    592 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    593 _mm512_mask_packus_epi16(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    594 {
    595   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    596                                         (__v64qi)_mm512_packus_epi16(__A, __B),
    597                                         (__v64qi)__W);
    598 }
    599 
    600 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    601 _mm512_maskz_packus_epi16(__mmask64 __M, __m512i __A, __m512i __B)
    602 {
    603   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    604                                         (__v64qi)_mm512_packus_epi16(__A, __B),
    605                                         (__v64qi)_mm512_setzero_si512());
    606 }
    607 
    608 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    609 _mm512_adds_epi8 (__m512i __A, __m512i __B)
    610 {
    611   return (__m512i)__builtin_elementwise_add_sat((__v64qs)__A, (__v64qs)__B);
    612 }
    613 
    614 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    615 _mm512_mask_adds_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
    616 {
    617   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    618                                         (__v64qi)_mm512_adds_epi8(__A, __B),
    619                                         (__v64qi)__W);
    620 }
    621 
    622 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    623 _mm512_maskz_adds_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
    624 {
    625   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    626                                         (__v64qi)_mm512_adds_epi8(__A, __B),
    627                                         (__v64qi)_mm512_setzero_si512());
    628 }
    629 
    630 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    631 _mm512_adds_epi16 (__m512i __A, __m512i __B)
    632 {
    633   return (__m512i)__builtin_elementwise_add_sat((__v32hi)__A, (__v32hi)__B);
    634 }
    635 
    636 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    637 _mm512_mask_adds_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
    638 {
    639   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    640                                         (__v32hi)_mm512_adds_epi16(__A, __B),
    641                                         (__v32hi)__W);
    642 }
    643 
    644 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    645 _mm512_maskz_adds_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
    646 {
    647   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    648                                         (__v32hi)_mm512_adds_epi16(__A, __B),
    649                                         (__v32hi)_mm512_setzero_si512());
    650 }
    651 
    652 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    653 _mm512_adds_epu8 (__m512i __A, __m512i __B)
    654 {
    655   return (__m512i)__builtin_elementwise_add_sat((__v64qu) __A, (__v64qu) __B);
    656 }
    657 
    658 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    659 _mm512_mask_adds_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
    660 {
    661   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    662                                         (__v64qi)_mm512_adds_epu8(__A, __B),
    663                                         (__v64qi)__W);
    664 }
    665 
    666 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    667 _mm512_maskz_adds_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
    668 {
    669   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    670                                         (__v64qi)_mm512_adds_epu8(__A, __B),
    671                                         (__v64qi)_mm512_setzero_si512());
    672 }
    673 
    674 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    675 _mm512_adds_epu16 (__m512i __A, __m512i __B)
    676 {
    677   return (__m512i)__builtin_elementwise_add_sat((__v32hu) __A, (__v32hu) __B);
    678 }
    679 
    680 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    681 _mm512_mask_adds_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
    682 {
    683   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    684                                         (__v32hi)_mm512_adds_epu16(__A, __B),
    685                                         (__v32hi)__W);
    686 }
    687 
    688 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    689 _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
    690 {
    691   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    692                                         (__v32hi)_mm512_adds_epu16(__A, __B),
    693                                         (__v32hi)_mm512_setzero_si512());
    694 }
    695 
    696 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    697 _mm512_avg_epu8 (__m512i __A, __m512i __B)
    698 {
    699   return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
    700 }
    701 
    702 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    703 _mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
    704           __m512i __B)
    705 {
    706   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    707               (__v64qi)_mm512_avg_epu8(__A, __B),
    708               (__v64qi)__W);
    709 }
    710 
    711 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    712 _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
    713 {
    714   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    715               (__v64qi)_mm512_avg_epu8(__A, __B),
    716               (__v64qi)_mm512_setzero_si512());
    717 }
    718 
    719 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    720 _mm512_avg_epu16 (__m512i __A, __m512i __B)
    721 {
    722   return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
    723 }
    724 
    725 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    726 _mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
    727            __m512i __B)
    728 {
    729   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    730               (__v32hi)_mm512_avg_epu16(__A, __B),
    731               (__v32hi)__W);
    732 }
    733 
    734 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    735 _mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
    736 {
    737   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    738               (__v32hi)_mm512_avg_epu16(__A, __B),
    739               (__v32hi) _mm512_setzero_si512());
    740 }
    741 
    742 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    743 _mm512_max_epi8 (__m512i __A, __m512i __B)
    744 {
    745   return (__m512i)__builtin_elementwise_max((__v64qs) __A, (__v64qs) __B);
    746 }
    747 
    748 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    749 _mm512_maskz_max_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
    750 {
    751   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    752                                              (__v64qi)_mm512_max_epi8(__A, __B),
    753                                              (__v64qi)_mm512_setzero_si512());
    754 }
    755 
    756 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    757 _mm512_mask_max_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    758 {
    759   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    760                                              (__v64qi)_mm512_max_epi8(__A, __B),
    761                                              (__v64qi)__W);
    762 }
    763 
    764 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    765 _mm512_max_epi16 (__m512i __A, __m512i __B)
    766 {
    767   return (__m512i)__builtin_elementwise_max((__v32hi) __A, (__v32hi) __B);
    768 }
    769 
    770 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    771 _mm512_maskz_max_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
    772 {
    773   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    774                                             (__v32hi)_mm512_max_epi16(__A, __B),
    775                                             (__v32hi)_mm512_setzero_si512());
    776 }
    777 
    778 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    779 _mm512_mask_max_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
    780            __m512i __B)
    781 {
    782   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    783                                             (__v32hi)_mm512_max_epi16(__A, __B),
    784                                             (__v32hi)__W);
    785 }
    786 
    787 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    788 _mm512_max_epu8 (__m512i __A, __m512i __B)
    789 {
    790   return (__m512i)__builtin_elementwise_max((__v64qu)__A, (__v64qu)__B);
    791 }
    792 
    793 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    794 _mm512_maskz_max_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
    795 {
    796   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    797                                              (__v64qi)_mm512_max_epu8(__A, __B),
    798                                              (__v64qi)_mm512_setzero_si512());
    799 }
    800 
    801 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    802 _mm512_mask_max_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    803 {
    804   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    805                                              (__v64qi)_mm512_max_epu8(__A, __B),
    806                                              (__v64qi)__W);
    807 }
    808 
    809 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    810 _mm512_max_epu16 (__m512i __A, __m512i __B)
    811 {
    812   return (__m512i)__builtin_elementwise_max((__v32hu)__A, (__v32hu)__B);
    813 }
    814 
    815 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    816 _mm512_maskz_max_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
    817 {
    818   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    819                                             (__v32hi)_mm512_max_epu16(__A, __B),
    820                                             (__v32hi)_mm512_setzero_si512());
    821 }
    822 
    823 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    824 _mm512_mask_max_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
    825 {
    826   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    827                                             (__v32hi)_mm512_max_epu16(__A, __B),
    828                                             (__v32hi)__W);
    829 }
    830 
    831 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    832 _mm512_min_epi8 (__m512i __A, __m512i __B)
    833 {
    834   return (__m512i)__builtin_elementwise_min((__v64qs) __A, (__v64qs) __B);
    835 }
    836 
    837 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    838 _mm512_maskz_min_epi8 (__mmask64 __M, __m512i __A, __m512i __B)
    839 {
    840   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    841                                              (__v64qi)_mm512_min_epi8(__A, __B),
    842                                              (__v64qi)_mm512_setzero_si512());
    843 }
    844 
    845 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    846 _mm512_mask_min_epi8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    847 {
    848   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    849                                              (__v64qi)_mm512_min_epi8(__A, __B),
    850                                              (__v64qi)__W);
    851 }
    852 
    853 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    854 _mm512_min_epi16 (__m512i __A, __m512i __B)
    855 {
    856   return (__m512i)__builtin_elementwise_min((__v32hi) __A, (__v32hi) __B);
    857 }
    858 
    859 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    860 _mm512_maskz_min_epi16 (__mmask32 __M, __m512i __A, __m512i __B)
    861 {
    862   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    863                                             (__v32hi)_mm512_min_epi16(__A, __B),
    864                                             (__v32hi)_mm512_setzero_si512());
    865 }
    866 
    867 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    868 _mm512_mask_min_epi16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
    869 {
    870   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    871                                             (__v32hi)_mm512_min_epi16(__A, __B),
    872                                             (__v32hi)__W);
    873 }
    874 
    875 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    876 _mm512_min_epu8 (__m512i __A, __m512i __B)
    877 {
    878   return (__m512i)__builtin_elementwise_min((__v64qu)__A, (__v64qu)__B);
    879 }
    880 
    881 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    882 _mm512_maskz_min_epu8 (__mmask64 __M, __m512i __A, __m512i __B)
    883 {
    884   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    885                                              (__v64qi)_mm512_min_epu8(__A, __B),
    886                                              (__v64qi)_mm512_setzero_si512());
    887 }
    888 
    889 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    890 _mm512_mask_min_epu8 (__m512i __W, __mmask64 __M, __m512i __A, __m512i __B)
    891 {
    892   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
    893                                              (__v64qi)_mm512_min_epu8(__A, __B),
    894                                              (__v64qi)__W);
    895 }
    896 
    897 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    898 _mm512_min_epu16 (__m512i __A, __m512i __B)
    899 {
    900   return (__m512i)__builtin_elementwise_min((__v32hu)__A, (__v32hu)__B);
    901 }
    902 
    903 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    904 _mm512_maskz_min_epu16 (__mmask32 __M, __m512i __A, __m512i __B)
    905 {
    906   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    907                                             (__v32hi)_mm512_min_epu16(__A, __B),
    908                                             (__v32hi)_mm512_setzero_si512());
    909 }
    910 
    911 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    912 _mm512_mask_min_epu16 (__m512i __W, __mmask32 __M, __m512i __A, __m512i __B)
    913 {
    914   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
    915                                             (__v32hi)_mm512_min_epu16(__A, __B),
    916                                             (__v32hi)__W);
    917 }
    918 
    919 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    920 _mm512_shuffle_epi8(__m512i __A, __m512i __B)
    921 {
    922   return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
    923 }
    924 
    925 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    926 _mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
    927 {
    928   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    929                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
    930                                          (__v64qi)__W);
    931 }
    932 
    933 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    934 _mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
    935 {
    936   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    937                                          (__v64qi)_mm512_shuffle_epi8(__A, __B),
    938                                          (__v64qi)_mm512_setzero_si512());
    939 }
    940 
    941 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    942 _mm512_subs_epi8 (__m512i __A, __m512i __B)
    943 {
    944   return (__m512i)__builtin_elementwise_sub_sat((__v64qs)__A, (__v64qs)__B);
    945 }
    946 
    947 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    948 _mm512_mask_subs_epi8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
    949 {
    950   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    951                                         (__v64qi)_mm512_subs_epi8(__A, __B),
    952                                         (__v64qi)__W);
    953 }
    954 
    955 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    956 _mm512_maskz_subs_epi8 (__mmask64 __U, __m512i __A, __m512i __B)
    957 {
    958   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    959                                         (__v64qi)_mm512_subs_epi8(__A, __B),
    960                                         (__v64qi)_mm512_setzero_si512());
    961 }
    962 
    963 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    964 _mm512_subs_epi16 (__m512i __A, __m512i __B)
    965 {
    966   return (__m512i)__builtin_elementwise_sub_sat((__v32hi)__A, (__v32hi)__B);
    967 }
    968 
    969 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    970 _mm512_mask_subs_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
    971 {
    972   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    973                                         (__v32hi)_mm512_subs_epi16(__A, __B),
    974                                         (__v32hi)__W);
    975 }
    976 
    977 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    978 _mm512_maskz_subs_epi16 (__mmask32 __U, __m512i __A, __m512i __B)
    979 {
    980   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
    981                                         (__v32hi)_mm512_subs_epi16(__A, __B),
    982                                         (__v32hi)_mm512_setzero_si512());
    983 }
    984 
    985 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    986 _mm512_subs_epu8 (__m512i __A, __m512i __B)
    987 {
    988   return (__m512i)__builtin_elementwise_sub_sat((__v64qu) __A, (__v64qu) __B);
    989 }
    990 
    991 static __inline__ __m512i __DEFAULT_FN_ATTRS512
    992 _mm512_mask_subs_epu8 (__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
    993 {
    994   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
    995                                         (__v64qi)_mm512_subs_epu8(__A, __B),
    996                                         (__v64qi)__W);
    997 }
    998 
    999 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1000 _mm512_maskz_subs_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
   1001 {
   1002   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
   1003                                         (__v64qi)_mm512_subs_epu8(__A, __B),
   1004                                         (__v64qi)_mm512_setzero_si512());
   1005 }
   1006 
   1007 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1008 _mm512_subs_epu16 (__m512i __A, __m512i __B)
   1009 {
   1010   return (__m512i)__builtin_elementwise_sub_sat((__v32hu) __A, (__v32hu) __B);
   1011 }
   1012 
   1013 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1014 _mm512_mask_subs_epu16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1015 {
   1016   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1017                                         (__v32hi)_mm512_subs_epu16(__A, __B),
   1018                                         (__v32hi)__W);
   1019 }
   1020 
   1021 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1022 _mm512_maskz_subs_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
   1023 {
   1024   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1025                                         (__v32hi)_mm512_subs_epu16(__A, __B),
   1026                                         (__v32hi)_mm512_setzero_si512());
   1027 }
   1028 
   1029 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1030 _mm512_permutex2var_epi16(__m512i __A, __m512i __I, __m512i __B)
   1031 {
   1032   return (__m512i)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
   1033                                                  (__v32hi)__B);
   1034 }
   1035 
   1036 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1037 _mm512_mask_permutex2var_epi16(__m512i __A, __mmask32 __U, __m512i __I,
   1038                                __m512i __B)
   1039 {
   1040   return (__m512i)__builtin_ia32_selectw_512(__U,
   1041                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
   1042                               (__v32hi)__A);
   1043 }
   1044 
   1045 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1046 _mm512_mask2_permutex2var_epi16(__m512i __A, __m512i __I, __mmask32 __U,
   1047                                 __m512i __B)
   1048 {
   1049   return (__m512i)__builtin_ia32_selectw_512(__U,
   1050                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
   1051                               (__v32hi)__I);
   1052 }
   1053 
   1054 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1055 _mm512_maskz_permutex2var_epi16(__mmask32 __U, __m512i __A, __m512i __I,
   1056                                 __m512i __B)
   1057 {
   1058   return (__m512i)__builtin_ia32_selectw_512(__U,
   1059                               (__v32hi)_mm512_permutex2var_epi16(__A, __I, __B),
   1060                               (__v32hi)_mm512_setzero_si512());
   1061 }
   1062 
   1063 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1064 _mm512_mulhrs_epi16(__m512i __A, __m512i __B)
   1065 {
   1066   return (__m512i)__builtin_ia32_pmulhrsw512((__v32hi)__A, (__v32hi)__B);
   1067 }
   1068 
   1069 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1070 _mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1071 {
   1072   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1073                                          (__v32hi)_mm512_mulhrs_epi16(__A, __B),
   1074                                          (__v32hi)__W);
   1075 }
   1076 
   1077 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1078 _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
   1079 {
   1080   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1081                                          (__v32hi)_mm512_mulhrs_epi16(__A, __B),
   1082                                          (__v32hi)_mm512_setzero_si512());
   1083 }
   1084 
   1085 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1086 _mm512_mulhi_epi16(__m512i __A, __m512i __B)
   1087 {
   1088   return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
   1089 }
   1090 
   1091 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1092 _mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
   1093        __m512i __B)
   1094 {
   1095   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1096                                           (__v32hi)_mm512_mulhi_epi16(__A, __B),
   1097                                           (__v32hi)__W);
   1098 }
   1099 
   1100 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1101 _mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
   1102 {
   1103   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1104                                           (__v32hi)_mm512_mulhi_epi16(__A, __B),
   1105                                           (__v32hi)_mm512_setzero_si512());
   1106 }
   1107 
   1108 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1109 _mm512_mulhi_epu16(__m512i __A, __m512i __B)
   1110 {
   1111   return (__m512i)__builtin_ia32_pmulhuw512((__v32hi) __A, (__v32hi) __B);
   1112 }
   1113 
   1114 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1115 _mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1116 {
   1117   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1118                                           (__v32hi)_mm512_mulhi_epu16(__A, __B),
   1119                                           (__v32hi)__W);
   1120 }
   1121 
   1122 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1123 _mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
   1124 {
   1125   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1126                                           (__v32hi)_mm512_mulhi_epu16(__A, __B),
   1127                                           (__v32hi)_mm512_setzero_si512());
   1128 }
   1129 
   1130 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1131 _mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
   1132   return (__m512i)__builtin_ia32_pmaddubsw512((__v64qi)__X, (__v64qi)__Y);
   1133 }
   1134 
   1135 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1136 _mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X,
   1137                           __m512i __Y) {
   1138   return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
   1139                                         (__v32hi)_mm512_maddubs_epi16(__X, __Y),
   1140                                         (__v32hi)__W);
   1141 }
   1142 
   1143 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1144 _mm512_maskz_maddubs_epi16(__mmask32 __U, __m512i __X, __m512i __Y) {
   1145   return (__m512i)__builtin_ia32_selectw_512((__mmask32) __U,
   1146                                         (__v32hi)_mm512_maddubs_epi16(__X, __Y),
   1147                                         (__v32hi)_mm512_setzero_si512());
   1148 }
   1149 
   1150 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1151 _mm512_madd_epi16(__m512i __A, __m512i __B) {
   1152   return (__m512i)__builtin_ia32_pmaddwd512((__v32hi)__A, (__v32hi)__B);
   1153 }
   1154 
   1155 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1156 _mm512_mask_madd_epi16(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) {
   1157   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   1158                                            (__v16si)_mm512_madd_epi16(__A, __B),
   1159                                            (__v16si)__W);
   1160 }
   1161 
   1162 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1163 _mm512_maskz_madd_epi16(__mmask16 __U, __m512i __A, __m512i __B) {
   1164   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
   1165                                            (__v16si)_mm512_madd_epi16(__A, __B),
   1166                                            (__v16si)_mm512_setzero_si512());
   1167 }
   1168 
   1169 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1170 _mm512_cvtsepi16_epi8 (__m512i __A) {
   1171   return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
   1172                (__v32qi)_mm256_setzero_si256(),
   1173                (__mmask32) -1);
   1174 }
   1175 
   1176 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1177 _mm512_mask_cvtsepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
   1178   return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
   1179                (__v32qi)__O,
   1180                __M);
   1181 }
   1182 
   1183 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1184 _mm512_maskz_cvtsepi16_epi8 (__mmask32 __M, __m512i __A) {
   1185   return (__m256i) __builtin_ia32_pmovswb512_mask ((__v32hi) __A,
   1186                (__v32qi) _mm256_setzero_si256(),
   1187                __M);
   1188 }
   1189 
   1190 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1191 _mm512_cvtusepi16_epi8 (__m512i __A) {
   1192   return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
   1193                 (__v32qi) _mm256_setzero_si256(),
   1194                 (__mmask32) -1);
   1195 }
   1196 
   1197 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1198 _mm512_mask_cvtusepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
   1199   return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
   1200                 (__v32qi) __O,
   1201                 __M);
   1202 }
   1203 
   1204 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1205 _mm512_maskz_cvtusepi16_epi8 (__mmask32 __M, __m512i __A) {
   1206   return (__m256i) __builtin_ia32_pmovuswb512_mask ((__v32hi) __A,
   1207                 (__v32qi) _mm256_setzero_si256(),
   1208                 __M);
   1209 }
   1210 
   1211 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1212 _mm512_cvtepi16_epi8 (__m512i __A) {
   1213   return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
   1214               (__v32qi) _mm256_undefined_si256(),
   1215               (__mmask32) -1);
   1216 }
   1217 
   1218 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1219 _mm512_mask_cvtepi16_epi8 (__m256i __O, __mmask32 __M, __m512i __A) {
   1220   return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
   1221               (__v32qi) __O,
   1222               __M);
   1223 }
   1224 
   1225 static __inline__ __m256i __DEFAULT_FN_ATTRS512
   1226 _mm512_maskz_cvtepi16_epi8 (__mmask32 __M, __m512i __A) {
   1227   return (__m256i) __builtin_ia32_pmovwb512_mask ((__v32hi) __A,
   1228               (__v32qi) _mm256_setzero_si256(),
   1229               __M);
   1230 }
   1231 
   1232 static __inline__ void __DEFAULT_FN_ATTRS512
   1233 _mm512_mask_cvtepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
   1234 {
   1235   __builtin_ia32_pmovwb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
   1236 }
   1237 
   1238 static __inline__ void __DEFAULT_FN_ATTRS512
   1239 _mm512_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
   1240 {
   1241   __builtin_ia32_pmovswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
   1242 }
   1243 
   1244 static __inline__ void __DEFAULT_FN_ATTRS512
   1245 _mm512_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask32 __M, __m512i __A)
   1246 {
   1247   __builtin_ia32_pmovuswb512mem_mask ((__v32qi *) __P, (__v32hi) __A, __M);
   1248 }
   1249 
   1250 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1251 _mm512_unpackhi_epi8(__m512i __A, __m512i __B) {
   1252   return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
   1253                                           8,  64+8,   9, 64+9,
   1254                                           10, 64+10, 11, 64+11,
   1255                                           12, 64+12, 13, 64+13,
   1256                                           14, 64+14, 15, 64+15,
   1257                                           24, 64+24, 25, 64+25,
   1258                                           26, 64+26, 27, 64+27,
   1259                                           28, 64+28, 29, 64+29,
   1260                                           30, 64+30, 31, 64+31,
   1261                                           40, 64+40, 41, 64+41,
   1262                                           42, 64+42, 43, 64+43,
   1263                                           44, 64+44, 45, 64+45,
   1264                                           46, 64+46, 47, 64+47,
   1265                                           56, 64+56, 57, 64+57,
   1266                                           58, 64+58, 59, 64+59,
   1267                                           60, 64+60, 61, 64+61,
   1268                                           62, 64+62, 63, 64+63);
   1269 }
   1270 
   1271 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1272 _mm512_mask_unpackhi_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   1273   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
   1274                                         (__v64qi)_mm512_unpackhi_epi8(__A, __B),
   1275                                         (__v64qi)__W);
   1276 }
   1277 
   1278 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1279 _mm512_maskz_unpackhi_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   1280   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
   1281                                         (__v64qi)_mm512_unpackhi_epi8(__A, __B),
   1282                                         (__v64qi)_mm512_setzero_si512());
   1283 }
   1284 
   1285 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1286 _mm512_unpackhi_epi16(__m512i __A, __m512i __B) {
   1287   return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
   1288                                           4,  32+4,   5, 32+5,
   1289                                           6,  32+6,   7, 32+7,
   1290                                           12, 32+12, 13, 32+13,
   1291                                           14, 32+14, 15, 32+15,
   1292                                           20, 32+20, 21, 32+21,
   1293                                           22, 32+22, 23, 32+23,
   1294                                           28, 32+28, 29, 32+29,
   1295                                           30, 32+30, 31, 32+31);
   1296 }
   1297 
   1298 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1299 _mm512_mask_unpackhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   1300   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1301                                        (__v32hi)_mm512_unpackhi_epi16(__A, __B),
   1302                                        (__v32hi)__W);
   1303 }
   1304 
   1305 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1306 _mm512_maskz_unpackhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   1307   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1308                                        (__v32hi)_mm512_unpackhi_epi16(__A, __B),
   1309                                        (__v32hi)_mm512_setzero_si512());
   1310 }
   1311 
   1312 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1313 _mm512_unpacklo_epi8(__m512i __A, __m512i __B) {
   1314   return (__m512i)__builtin_shufflevector((__v64qi)__A, (__v64qi)__B,
   1315                                           0,  64+0,   1, 64+1,
   1316                                           2,  64+2,   3, 64+3,
   1317                                           4,  64+4,   5, 64+5,
   1318                                           6,  64+6,   7, 64+7,
   1319                                           16, 64+16, 17, 64+17,
   1320                                           18, 64+18, 19, 64+19,
   1321                                           20, 64+20, 21, 64+21,
   1322                                           22, 64+22, 23, 64+23,
   1323                                           32, 64+32, 33, 64+33,
   1324                                           34, 64+34, 35, 64+35,
   1325                                           36, 64+36, 37, 64+37,
   1326                                           38, 64+38, 39, 64+39,
   1327                                           48, 64+48, 49, 64+49,
   1328                                           50, 64+50, 51, 64+51,
   1329                                           52, 64+52, 53, 64+53,
   1330                                           54, 64+54, 55, 64+55);
   1331 }
   1332 
   1333 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1334 _mm512_mask_unpacklo_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
   1335   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
   1336                                         (__v64qi)_mm512_unpacklo_epi8(__A, __B),
   1337                                         (__v64qi)__W);
   1338 }
   1339 
   1340 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1341 _mm512_maskz_unpacklo_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
   1342   return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
   1343                                         (__v64qi)_mm512_unpacklo_epi8(__A, __B),
   1344                                         (__v64qi)_mm512_setzero_si512());
   1345 }
   1346 
   1347 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1348 _mm512_unpacklo_epi16(__m512i __A, __m512i __B) {
   1349   return (__m512i)__builtin_shufflevector((__v32hi)__A, (__v32hi)__B,
   1350                                           0,  32+0,   1, 32+1,
   1351                                           2,  32+2,   3, 32+3,
   1352                                           8,  32+8,   9, 32+9,
   1353                                           10, 32+10, 11, 32+11,
   1354                                           16, 32+16, 17, 32+17,
   1355                                           18, 32+18, 19, 32+19,
   1356                                           24, 32+24, 25, 32+25,
   1357                                           26, 32+26, 27, 32+27);
   1358 }
   1359 
   1360 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1361 _mm512_mask_unpacklo_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
   1362   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1363                                        (__v32hi)_mm512_unpacklo_epi16(__A, __B),
   1364                                        (__v32hi)__W);
   1365 }
   1366 
   1367 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1368 _mm512_maskz_unpacklo_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
   1369   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1370                                        (__v32hi)_mm512_unpacklo_epi16(__A, __B),
   1371                                        (__v32hi)_mm512_setzero_si512());
   1372 }
   1373 
   1374 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1375 _mm512_cvtepi8_epi16(__m256i __A)
   1376 {
   1377   /* This function always performs a signed extension, but __v32qi is a char
   1378      which may be signed or unsigned, so use __v32qs. */
   1379   return (__m512i)__builtin_convertvector((__v32qs)__A, __v32hi);
   1380 }
   1381 
   1382 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1383 _mm512_mask_cvtepi8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
   1384 {
   1385   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1386                                              (__v32hi)_mm512_cvtepi8_epi16(__A),
   1387                                              (__v32hi)__W);
   1388 }
   1389 
   1390 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1391 _mm512_maskz_cvtepi8_epi16(__mmask32 __U, __m256i __A)
   1392 {
   1393   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1394                                              (__v32hi)_mm512_cvtepi8_epi16(__A),
   1395                                              (__v32hi)_mm512_setzero_si512());
   1396 }
   1397 
   1398 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1399 _mm512_cvtepu8_epi16(__m256i __A)
   1400 {
   1401   return (__m512i)__builtin_convertvector((__v32qu)__A, __v32hi);
   1402 }
   1403 
   1404 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1405 _mm512_mask_cvtepu8_epi16(__m512i __W, __mmask32 __U, __m256i __A)
   1406 {
   1407   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1408                                              (__v32hi)_mm512_cvtepu8_epi16(__A),
   1409                                              (__v32hi)__W);
   1410 }
   1411 
   1412 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1413 _mm512_maskz_cvtepu8_epi16(__mmask32 __U, __m256i __A)
   1414 {
   1415   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1416                                              (__v32hi)_mm512_cvtepu8_epi16(__A),
   1417                                              (__v32hi)_mm512_setzero_si512());
   1418 }
   1419 
   1420 
   1421 #define _mm512_shufflehi_epi16(A, imm) \
   1422   ((__m512i)__builtin_ia32_pshufhw512((__v32hi)(__m512i)(A), (int)(imm)))
   1423 
   1424 #define _mm512_mask_shufflehi_epi16(W, U, A, imm) \
   1425   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   1426                                        (__v32hi)_mm512_shufflehi_epi16((A), \
   1427                                                                        (imm)), \
   1428                                        (__v32hi)(__m512i)(W)))
   1429 
   1430 #define _mm512_maskz_shufflehi_epi16(U, A, imm) \
   1431   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   1432                                        (__v32hi)_mm512_shufflehi_epi16((A), \
   1433                                                                        (imm)), \
   1434                                        (__v32hi)_mm512_setzero_si512()))
   1435 
   1436 #define _mm512_shufflelo_epi16(A, imm) \
   1437   ((__m512i)__builtin_ia32_pshuflw512((__v32hi)(__m512i)(A), (int)(imm)))
   1438 
   1439 
   1440 #define _mm512_mask_shufflelo_epi16(W, U, A, imm) \
   1441   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   1442                                        (__v32hi)_mm512_shufflelo_epi16((A), \
   1443                                                                        (imm)), \
   1444                                        (__v32hi)(__m512i)(W)))
   1445 
   1446 
   1447 #define _mm512_maskz_shufflelo_epi16(U, A, imm) \
   1448   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   1449                                        (__v32hi)_mm512_shufflelo_epi16((A), \
   1450                                                                        (imm)), \
   1451                                        (__v32hi)_mm512_setzero_si512()))
   1452 
   1453 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1454 _mm512_sllv_epi16(__m512i __A, __m512i __B)
   1455 {
   1456   return (__m512i)__builtin_ia32_psllv32hi((__v32hi) __A, (__v32hi) __B);
   1457 }
   1458 
   1459 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1460 _mm512_mask_sllv_epi16 (__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1461 {
   1462   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1463                                            (__v32hi)_mm512_sllv_epi16(__A, __B),
   1464                                            (__v32hi)__W);
   1465 }
   1466 
   1467 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1468 _mm512_maskz_sllv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
   1469 {
   1470   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1471                                            (__v32hi)_mm512_sllv_epi16(__A, __B),
   1472                                            (__v32hi)_mm512_setzero_si512());
   1473 }
   1474 
   1475 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1476 _mm512_sll_epi16(__m512i __A, __m128i __B)
   1477 {
   1478   return (__m512i)__builtin_ia32_psllw512((__v32hi) __A, (__v8hi) __B);
   1479 }
   1480 
   1481 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1482 _mm512_mask_sll_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
   1483 {
   1484   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1485                                           (__v32hi)_mm512_sll_epi16(__A, __B),
   1486                                           (__v32hi)__W);
   1487 }
   1488 
   1489 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1490 _mm512_maskz_sll_epi16(__mmask32 __U, __m512i __A, __m128i __B)
   1491 {
   1492   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1493                                           (__v32hi)_mm512_sll_epi16(__A, __B),
   1494                                           (__v32hi)_mm512_setzero_si512());
   1495 }
   1496 
   1497 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1498 _mm512_slli_epi16(__m512i __A, unsigned int __B)
   1499 {
   1500   return (__m512i)__builtin_ia32_psllwi512((__v32hi)__A, (int)__B);
   1501 }
   1502 
   1503 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1504 _mm512_mask_slli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
   1505                        unsigned int __B)
   1506 {
   1507   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1508                                          (__v32hi)_mm512_slli_epi16(__A, __B),
   1509                                          (__v32hi)__W);
   1510 }
   1511 
   1512 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1513 _mm512_maskz_slli_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
   1514 {
   1515   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1516                                          (__v32hi)_mm512_slli_epi16(__A, __B),
   1517                                          (__v32hi)_mm512_setzero_si512());
   1518 }
   1519 
   1520 #define _mm512_bslli_epi128(a, imm) \
   1521   ((__m512i)__builtin_ia32_pslldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
   1522 
   1523 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1524 _mm512_srlv_epi16(__m512i __A, __m512i __B)
   1525 {
   1526   return (__m512i)__builtin_ia32_psrlv32hi((__v32hi)__A, (__v32hi)__B);
   1527 }
   1528 
   1529 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1530 _mm512_mask_srlv_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1531 {
   1532   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1533                                            (__v32hi)_mm512_srlv_epi16(__A, __B),
   1534                                            (__v32hi)__W);
   1535 }
   1536 
   1537 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1538 _mm512_maskz_srlv_epi16(__mmask32 __U, __m512i __A, __m512i __B)
   1539 {
   1540   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1541                                            (__v32hi)_mm512_srlv_epi16(__A, __B),
   1542                                            (__v32hi)_mm512_setzero_si512());
   1543 }
   1544 
   1545 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1546 _mm512_srav_epi16(__m512i __A, __m512i __B)
   1547 {
   1548   return (__m512i)__builtin_ia32_psrav32hi((__v32hi)__A, (__v32hi)__B);
   1549 }
   1550 
   1551 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1552 _mm512_mask_srav_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
   1553 {
   1554   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1555                                            (__v32hi)_mm512_srav_epi16(__A, __B),
   1556                                            (__v32hi)__W);
   1557 }
   1558 
   1559 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1560 _mm512_maskz_srav_epi16(__mmask32 __U, __m512i __A, __m512i __B)
   1561 {
   1562   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1563                                            (__v32hi)_mm512_srav_epi16(__A, __B),
   1564                                            (__v32hi)_mm512_setzero_si512());
   1565 }
   1566 
   1567 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1568 _mm512_sra_epi16(__m512i __A, __m128i __B)
   1569 {
   1570   return (__m512i)__builtin_ia32_psraw512((__v32hi) __A, (__v8hi) __B);
   1571 }
   1572 
   1573 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1574 _mm512_mask_sra_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
   1575 {
   1576   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1577                                           (__v32hi)_mm512_sra_epi16(__A, __B),
   1578                                           (__v32hi)__W);
   1579 }
   1580 
   1581 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1582 _mm512_maskz_sra_epi16(__mmask32 __U, __m512i __A, __m128i __B)
   1583 {
   1584   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1585                                           (__v32hi)_mm512_sra_epi16(__A, __B),
   1586                                           (__v32hi)_mm512_setzero_si512());
   1587 }
   1588 
   1589 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1590 _mm512_srai_epi16(__m512i __A, unsigned int __B)
   1591 {
   1592   return (__m512i)__builtin_ia32_psrawi512((__v32hi)__A, (int)__B);
   1593 }
   1594 
   1595 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1596 _mm512_mask_srai_epi16(__m512i __W, __mmask32 __U, __m512i __A,
   1597                        unsigned int __B)
   1598 {
   1599   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1600                                          (__v32hi)_mm512_srai_epi16(__A, __B),
   1601                                          (__v32hi)__W);
   1602 }
   1603 
   1604 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1605 _mm512_maskz_srai_epi16(__mmask32 __U, __m512i __A, unsigned int __B)
   1606 {
   1607   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1608                                          (__v32hi)_mm512_srai_epi16(__A, __B),
   1609                                          (__v32hi)_mm512_setzero_si512());
   1610 }
   1611 
   1612 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1613 _mm512_srl_epi16(__m512i __A, __m128i __B)
   1614 {
   1615   return (__m512i)__builtin_ia32_psrlw512((__v32hi) __A, (__v8hi) __B);
   1616 }
   1617 
   1618 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1619 _mm512_mask_srl_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m128i __B)
   1620 {
   1621   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1622                                           (__v32hi)_mm512_srl_epi16(__A, __B),
   1623                                           (__v32hi)__W);
   1624 }
   1625 
   1626 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1627 _mm512_maskz_srl_epi16(__mmask32 __U, __m512i __A, __m128i __B)
   1628 {
   1629   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1630                                           (__v32hi)_mm512_srl_epi16(__A, __B),
   1631                                           (__v32hi)_mm512_setzero_si512());
   1632 }
   1633 
   1634 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1635 _mm512_srli_epi16(__m512i __A, unsigned int __B)
   1636 {
   1637   return (__m512i)__builtin_ia32_psrlwi512((__v32hi)__A, (int)__B);
   1638 }
   1639 
   1640 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1641 _mm512_mask_srli_epi16(__m512i __W, __mmask32 __U, __m512i __A,
   1642                        unsigned int __B)
   1643 {
   1644   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1645                                          (__v32hi)_mm512_srli_epi16(__A, __B),
   1646                                          (__v32hi)__W);
   1647 }
   1648 
   1649 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1650 _mm512_maskz_srli_epi16(__mmask32 __U, __m512i __A, int __B)
   1651 {
   1652   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
   1653                                          (__v32hi)_mm512_srli_epi16(__A, (unsigned int)__B),
   1654                                          (__v32hi)_mm512_setzero_si512());
   1655 }
   1656 
   1657 #define _mm512_bsrli_epi128(a, imm) \
   1658   ((__m512i)__builtin_ia32_psrldqi512_byteshift((__v8di)(__m512i)(a), (int)(imm)))
   1659 
   1660 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1661 _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A)
   1662 {
   1663   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
   1664                 (__v32hi) __A,
   1665                 (__v32hi) __W);
   1666 }
   1667 
   1668 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1669 _mm512_maskz_mov_epi16 (__mmask32 __U, __m512i __A)
   1670 {
   1671   return (__m512i) __builtin_ia32_selectw_512 ((__mmask32) __U,
   1672                 (__v32hi) __A,
   1673                 (__v32hi) _mm512_setzero_si512 ());
   1674 }
   1675 
   1676 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1677 _mm512_mask_mov_epi8 (__m512i __W, __mmask64 __U, __m512i __A)
   1678 {
   1679   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
   1680                 (__v64qi) __A,
   1681                 (__v64qi) __W);
   1682 }
   1683 
   1684 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1685 _mm512_maskz_mov_epi8 (__mmask64 __U, __m512i __A)
   1686 {
   1687   return (__m512i) __builtin_ia32_selectb_512 ((__mmask64) __U,
   1688                 (__v64qi) __A,
   1689                 (__v64qi) _mm512_setzero_si512 ());
   1690 }
   1691 
   1692 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1693 _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A)
   1694 {
   1695   return (__m512i) __builtin_ia32_selectb_512(__M,
   1696                                               (__v64qi)_mm512_set1_epi8(__A),
   1697                                               (__v64qi) __O);
   1698 }
   1699 
   1700 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1701 _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
   1702 {
   1703   return (__m512i) __builtin_ia32_selectb_512(__M,
   1704                                               (__v64qi) _mm512_set1_epi8(__A),
   1705                                               (__v64qi) _mm512_setzero_si512());
   1706 }
   1707 
   1708 static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
   1709                                                                __mmask64 __B) {
   1710   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
   1711                 (__mmask64) __B);
   1712 }
   1713 
   1714 static __inline__ __mmask32 __DEFAULT_FN_ATTRS
   1715 _mm512_kunpackw (__mmask32 __A, __mmask32 __B)
   1716 {
   1717   return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
   1718                 (__mmask32) __B);
   1719 }
   1720 
   1721 static __inline __m512i __DEFAULT_FN_ATTRS512
   1722 _mm512_loadu_epi16 (void const *__P)
   1723 {
   1724   struct __loadu_epi16 {
   1725     __m512i_u __v;
   1726   } __attribute__((__packed__, __may_alias__));
   1727   return ((const struct __loadu_epi16*)__P)->__v;
   1728 }
   1729 
   1730 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1731 _mm512_mask_loadu_epi16 (__m512i __W, __mmask32 __U, void const *__P)
   1732 {
   1733   return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
   1734                  (__v32hi) __W,
   1735                  (__mmask32) __U);
   1736 }
   1737 
   1738 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1739 _mm512_maskz_loadu_epi16 (__mmask32 __U, void const *__P)
   1740 {
   1741   return (__m512i) __builtin_ia32_loaddquhi512_mask ((const __v32hi *) __P,
   1742                  (__v32hi)
   1743                  _mm512_setzero_si512 (),
   1744                  (__mmask32) __U);
   1745 }
   1746 
   1747 static __inline __m512i __DEFAULT_FN_ATTRS512
   1748 _mm512_loadu_epi8 (void const *__P)
   1749 {
   1750   struct __loadu_epi8 {
   1751     __m512i_u __v;
   1752   } __attribute__((__packed__, __may_alias__));
   1753   return ((const struct __loadu_epi8*)__P)->__v;
   1754 }
   1755 
   1756 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1757 _mm512_mask_loadu_epi8 (__m512i __W, __mmask64 __U, void const *__P)
   1758 {
   1759   return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
   1760                  (__v64qi) __W,
   1761                  (__mmask64) __U);
   1762 }
   1763 
   1764 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1765 _mm512_maskz_loadu_epi8 (__mmask64 __U, void const *__P)
   1766 {
   1767   return (__m512i) __builtin_ia32_loaddquqi512_mask ((const __v64qi *) __P,
   1768                  (__v64qi)
   1769                  _mm512_setzero_si512 (),
   1770                  (__mmask64) __U);
   1771 }
   1772 
   1773 static __inline void __DEFAULT_FN_ATTRS512
   1774 _mm512_storeu_epi16 (void *__P, __m512i __A)
   1775 {
   1776   struct __storeu_epi16 {
   1777     __m512i_u __v;
   1778   } __attribute__((__packed__, __may_alias__));
   1779   ((struct __storeu_epi16*)__P)->__v = __A;
   1780 }
   1781 
   1782 static __inline__ void __DEFAULT_FN_ATTRS512
   1783 _mm512_mask_storeu_epi16 (void *__P, __mmask32 __U, __m512i __A)
   1784 {
   1785   __builtin_ia32_storedquhi512_mask ((__v32hi *) __P,
   1786              (__v32hi) __A,
   1787              (__mmask32) __U);
   1788 }
   1789 
   1790 static __inline void __DEFAULT_FN_ATTRS512
   1791 _mm512_storeu_epi8 (void *__P, __m512i __A)
   1792 {
   1793   struct __storeu_epi8 {
   1794     __m512i_u __v;
   1795   } __attribute__((__packed__, __may_alias__));
   1796   ((struct __storeu_epi8*)__P)->__v = __A;
   1797 }
   1798 
   1799 static __inline__ void __DEFAULT_FN_ATTRS512
   1800 _mm512_mask_storeu_epi8 (void *__P, __mmask64 __U, __m512i __A)
   1801 {
   1802   __builtin_ia32_storedquqi512_mask ((__v64qi *) __P,
   1803              (__v64qi) __A,
   1804              (__mmask64) __U);
   1805 }
   1806 
   1807 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
   1808 _mm512_test_epi8_mask (__m512i __A, __m512i __B)
   1809 {
   1810   return _mm512_cmpneq_epi8_mask (_mm512_and_epi32 (__A, __B),
   1811                                   _mm512_setzero_si512());
   1812 }
   1813 
   1814 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
   1815 _mm512_mask_test_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
   1816 {
   1817   return _mm512_mask_cmpneq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
   1818                                        _mm512_setzero_si512());
   1819 }
   1820 
   1821 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
   1822 _mm512_test_epi16_mask (__m512i __A, __m512i __B)
   1823 {
   1824   return _mm512_cmpneq_epi16_mask (_mm512_and_epi32 (__A, __B),
   1825                                    _mm512_setzero_si512());
   1826 }
   1827 
   1828 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
   1829 _mm512_mask_test_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
   1830 {
   1831   return _mm512_mask_cmpneq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
   1832                                         _mm512_setzero_si512());
   1833 }
   1834 
   1835 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
   1836 _mm512_testn_epi8_mask (__m512i __A, __m512i __B)
   1837 {
   1838   return _mm512_cmpeq_epi8_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512());
   1839 }
   1840 
   1841 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
   1842 _mm512_mask_testn_epi8_mask (__mmask64 __U, __m512i __A, __m512i __B)
   1843 {
   1844   return _mm512_mask_cmpeq_epi8_mask (__U, _mm512_and_epi32 (__A, __B),
   1845                                       _mm512_setzero_si512());
   1846 }
   1847 
   1848 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
   1849 _mm512_testn_epi16_mask (__m512i __A, __m512i __B)
   1850 {
   1851   return _mm512_cmpeq_epi16_mask (_mm512_and_epi32 (__A, __B),
   1852                                   _mm512_setzero_si512());
   1853 }
   1854 
   1855 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
   1856 _mm512_mask_testn_epi16_mask (__mmask32 __U, __m512i __A, __m512i __B)
   1857 {
   1858   return _mm512_mask_cmpeq_epi16_mask (__U, _mm512_and_epi32 (__A, __B),
   1859                                        _mm512_setzero_si512());
   1860 }
   1861 
   1862 static __inline__ __mmask64 __DEFAULT_FN_ATTRS512
   1863 _mm512_movepi8_mask (__m512i __A)
   1864 {
   1865   return (__mmask64) __builtin_ia32_cvtb2mask512 ((__v64qi) __A);
   1866 }
   1867 
   1868 static __inline__ __mmask32 __DEFAULT_FN_ATTRS512
   1869 _mm512_movepi16_mask (__m512i __A)
   1870 {
   1871   return (__mmask32) __builtin_ia32_cvtw2mask512 ((__v32hi) __A);
   1872 }
   1873 
   1874 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1875 _mm512_movm_epi8 (__mmask64 __A)
   1876 {
   1877   return (__m512i) __builtin_ia32_cvtmask2b512 (__A);
   1878 }
   1879 
   1880 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1881 _mm512_movm_epi16 (__mmask32 __A)
   1882 {
   1883   return (__m512i) __builtin_ia32_cvtmask2w512 (__A);
   1884 }
   1885 
   1886 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1887 _mm512_broadcastb_epi8 (__m128i __A)
   1888 {
   1889   return (__m512i)__builtin_shufflevector((__v16qi) __A, (__v16qi) __A,
   1890                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1891                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1892                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1893                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
   1894 }
   1895 
   1896 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1897 _mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A)
   1898 {
   1899   return (__m512i)__builtin_ia32_selectb_512(__M,
   1900                                              (__v64qi) _mm512_broadcastb_epi8(__A),
   1901                                              (__v64qi) __O);
   1902 }
   1903 
   1904 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1905 _mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A)
   1906 {
   1907   return (__m512i)__builtin_ia32_selectb_512(__M,
   1908                                              (__v64qi) _mm512_broadcastb_epi8(__A),
   1909                                              (__v64qi) _mm512_setzero_si512());
   1910 }
   1911 
   1912 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1913 _mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A)
   1914 {
   1915   return (__m512i) __builtin_ia32_selectw_512(__M,
   1916                                               (__v32hi) _mm512_set1_epi16(__A),
   1917                                               (__v32hi) __O);
   1918 }
   1919 
   1920 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1921 _mm512_maskz_set1_epi16 (__mmask32 __M, short __A)
   1922 {
   1923   return (__m512i) __builtin_ia32_selectw_512(__M,
   1924                                               (__v32hi) _mm512_set1_epi16(__A),
   1925                                               (__v32hi) _mm512_setzero_si512());
   1926 }
   1927 
   1928 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1929 _mm512_broadcastw_epi16 (__m128i __A)
   1930 {
   1931   return (__m512i)__builtin_shufflevector((__v8hi) __A, (__v8hi) __A,
   1932                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   1933                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
   1934 }
   1935 
   1936 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1937 _mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A)
   1938 {
   1939   return (__m512i)__builtin_ia32_selectw_512(__M,
   1940                                              (__v32hi) _mm512_broadcastw_epi16(__A),
   1941                                              (__v32hi) __O);
   1942 }
   1943 
   1944 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1945 _mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A)
   1946 {
   1947   return (__m512i)__builtin_ia32_selectw_512(__M,
   1948                                              (__v32hi) _mm512_broadcastw_epi16(__A),
   1949                                              (__v32hi) _mm512_setzero_si512());
   1950 }
   1951 
   1952 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1953 _mm512_permutexvar_epi16 (__m512i __A, __m512i __B)
   1954 {
   1955   return (__m512i)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A);
   1956 }
   1957 
   1958 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1959 _mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A,
   1960         __m512i __B)
   1961 {
   1962   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
   1963                                     (__v32hi)_mm512_permutexvar_epi16(__A, __B),
   1964                                     (__v32hi)_mm512_setzero_si512());
   1965 }
   1966 
   1967 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   1968 _mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A,
   1969              __m512i __B)
   1970 {
   1971   return (__m512i)__builtin_ia32_selectw_512((__mmask32)__M,
   1972                                     (__v32hi)_mm512_permutexvar_epi16(__A, __B),
   1973                                     (__v32hi)__W);
   1974 }
   1975 
   1976 #define _mm512_alignr_epi8(A, B, N) \
   1977   ((__m512i)__builtin_ia32_palignr512((__v64qi)(__m512i)(A), \
   1978                                       (__v64qi)(__m512i)(B), (int)(N)))
   1979 
   1980 #define _mm512_mask_alignr_epi8(W, U, A, B, N) \
   1981   ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
   1982                               (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
   1983                               (__v64qi)(__m512i)(W)))
   1984 
   1985 #define _mm512_maskz_alignr_epi8(U, A, B, N) \
   1986   ((__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \
   1987                               (__v64qi)_mm512_alignr_epi8((A), (B), (int)(N)), \
   1988                               (__v64qi)(__m512i)_mm512_setzero_si512()))
   1989 
   1990 #define _mm512_dbsad_epu8(A, B, imm) \
   1991   ((__m512i)__builtin_ia32_dbpsadbw512((__v64qi)(__m512i)(A), \
   1992                                        (__v64qi)(__m512i)(B), (int)(imm)))
   1993 
   1994 #define _mm512_mask_dbsad_epu8(W, U, A, B, imm) \
   1995   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   1996                                   (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
   1997                                   (__v32hi)(__m512i)(W)))
   1998 
   1999 #define _mm512_maskz_dbsad_epu8(U, A, B, imm) \
   2000   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
   2001                                   (__v32hi)_mm512_dbsad_epu8((A), (B), (imm)), \
   2002                                   (__v32hi)_mm512_setzero_si512()))
   2003 
   2004 static __inline__ __m512i __DEFAULT_FN_ATTRS512
   2005 _mm512_sad_epu8 (__m512i __A, __m512i __B)
   2006 {
   2007  return (__m512i) __builtin_ia32_psadbw512 ((__v64qi) __A,
   2008                (__v64qi) __B);
   2009 }
   2010 
   2011 #undef __DEFAULT_FN_ATTRS512
   2012 #undef __DEFAULT_FN_ATTRS
   2013 
   2014 #endif