zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx512vbmi2intrin.h (13493B) - Raw


      1 /*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------===
      2  *
      3  *
      4  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      5  * See https://llvm.org/LICENSE.txt for license information.
      6  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      7  *
      8  *===-----------------------------------------------------------------------===
      9  */
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <avx512vbmi2intrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __AVX512VBMI2INTRIN_H
     15 #define __AVX512VBMI2INTRIN_H
     16 
     17 /* Define the default attributes for the functions in this file. */
     18 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2,evex512"), __min_vector_width__(512)))
     19 
     20 
     21 static __inline__ __m512i __DEFAULT_FN_ATTRS
     22 _mm512_mask_compress_epi16(__m512i __S, __mmask32 __U, __m512i __D)
     23 {
     24   return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
     25               (__v32hi) __S,
     26               __U);
     27 }
     28 
     29 static __inline__ __m512i __DEFAULT_FN_ATTRS
     30 _mm512_maskz_compress_epi16(__mmask32 __U, __m512i __D)
     31 {
     32   return (__m512i) __builtin_ia32_compresshi512_mask ((__v32hi) __D,
     33               (__v32hi) _mm512_setzero_si512(),
     34               __U);
     35 }
     36 
     37 static __inline__ __m512i __DEFAULT_FN_ATTRS
     38 _mm512_mask_compress_epi8(__m512i __S, __mmask64 __U, __m512i __D)
     39 {
     40   return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
     41               (__v64qi) __S,
     42               __U);
     43 }
     44 
     45 static __inline__ __m512i __DEFAULT_FN_ATTRS
     46 _mm512_maskz_compress_epi8(__mmask64 __U, __m512i __D)
     47 {
     48   return (__m512i) __builtin_ia32_compressqi512_mask ((__v64qi) __D,
     49               (__v64qi) _mm512_setzero_si512(),
     50               __U);
     51 }
     52 
     53 static __inline__ void __DEFAULT_FN_ATTRS
     54 _mm512_mask_compressstoreu_epi16(void *__P, __mmask32 __U, __m512i __D)
     55 {
     56   __builtin_ia32_compressstorehi512_mask ((__v32hi *) __P, (__v32hi) __D,
     57               __U);
     58 }
     59 
     60 static __inline__ void __DEFAULT_FN_ATTRS
     61 _mm512_mask_compressstoreu_epi8(void *__P, __mmask64 __U, __m512i __D)
     62 {
     63   __builtin_ia32_compressstoreqi512_mask ((__v64qi *) __P, (__v64qi) __D,
     64               __U);
     65 }
     66 
     67 static __inline__ __m512i __DEFAULT_FN_ATTRS
     68 _mm512_mask_expand_epi16(__m512i __S, __mmask32 __U, __m512i __D)
     69 {
     70   return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
     71               (__v32hi) __S,
     72               __U);
     73 }
     74 
     75 static __inline__ __m512i __DEFAULT_FN_ATTRS
     76 _mm512_maskz_expand_epi16(__mmask32 __U, __m512i __D)
     77 {
     78   return (__m512i) __builtin_ia32_expandhi512_mask ((__v32hi) __D,
     79               (__v32hi) _mm512_setzero_si512(),
     80               __U);
     81 }
     82 
     83 static __inline__ __m512i __DEFAULT_FN_ATTRS
     84 _mm512_mask_expand_epi8(__m512i __S, __mmask64 __U, __m512i __D)
     85 {
     86   return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
     87               (__v64qi) __S,
     88               __U);
     89 }
     90 
     91 static __inline__ __m512i __DEFAULT_FN_ATTRS
     92 _mm512_maskz_expand_epi8(__mmask64 __U, __m512i __D)
     93 {
     94   return (__m512i) __builtin_ia32_expandqi512_mask ((__v64qi) __D,
     95               (__v64qi) _mm512_setzero_si512(),
     96               __U);
     97 }
     98 
     99 static __inline__ __m512i __DEFAULT_FN_ATTRS
    100 _mm512_mask_expandloadu_epi16(__m512i __S, __mmask32 __U, void const *__P)
    101 {
    102   return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
    103               (__v32hi) __S,
    104               __U);
    105 }
    106 
    107 static __inline__ __m512i __DEFAULT_FN_ATTRS
    108 _mm512_maskz_expandloadu_epi16(__mmask32 __U, void const *__P)
    109 {
    110   return (__m512i) __builtin_ia32_expandloadhi512_mask ((const __v32hi *)__P,
    111               (__v32hi) _mm512_setzero_si512(),
    112               __U);
    113 }
    114 
    115 static __inline__ __m512i __DEFAULT_FN_ATTRS
    116 _mm512_mask_expandloadu_epi8(__m512i __S, __mmask64 __U, void const *__P)
    117 {
    118   return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
    119               (__v64qi) __S,
    120               __U);
    121 }
    122 
    123 static __inline__ __m512i __DEFAULT_FN_ATTRS
    124 _mm512_maskz_expandloadu_epi8(__mmask64 __U, void const *__P)
    125 {
    126   return (__m512i) __builtin_ia32_expandloadqi512_mask ((const __v64qi *)__P,
    127               (__v64qi) _mm512_setzero_si512(),
    128               __U);
    129 }
    130 
    131 #define _mm512_shldi_epi64(A, B, I) \
    132   ((__m512i)__builtin_ia32_vpshldq512((__v8di)(__m512i)(A), \
    133                                       (__v8di)(__m512i)(B), (int)(I)))
    134 
    135 #define _mm512_mask_shldi_epi64(S, U, A, B, I) \
    136   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
    137                                      (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
    138                                      (__v8di)(__m512i)(S)))
    139 
    140 #define _mm512_maskz_shldi_epi64(U, A, B, I) \
    141   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
    142                                      (__v8di)_mm512_shldi_epi64((A), (B), (I)), \
    143                                      (__v8di)_mm512_setzero_si512()))
    144 
    145 #define _mm512_shldi_epi32(A, B, I) \
    146   ((__m512i)__builtin_ia32_vpshldd512((__v16si)(__m512i)(A), \
    147                                       (__v16si)(__m512i)(B), (int)(I)))
    148 
    149 #define _mm512_mask_shldi_epi32(S, U, A, B, I) \
    150   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
    151                                     (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
    152                                     (__v16si)(__m512i)(S)))
    153 
    154 #define _mm512_maskz_shldi_epi32(U, A, B, I) \
    155   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
    156                                     (__v16si)_mm512_shldi_epi32((A), (B), (I)), \
    157                                     (__v16si)_mm512_setzero_si512()))
    158 
    159 #define _mm512_shldi_epi16(A, B, I) \
    160   ((__m512i)__builtin_ia32_vpshldw512((__v32hi)(__m512i)(A), \
    161                                       (__v32hi)(__m512i)(B), (int)(I)))
    162 
    163 #define _mm512_mask_shldi_epi16(S, U, A, B, I) \
    164   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
    165                                     (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
    166                                     (__v32hi)(__m512i)(S)))
    167 
    168 #define _mm512_maskz_shldi_epi16(U, A, B, I) \
    169   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
    170                                     (__v32hi)_mm512_shldi_epi16((A), (B), (I)), \
    171                                     (__v32hi)_mm512_setzero_si512()))
    172 
    173 #define _mm512_shrdi_epi64(A, B, I) \
    174   ((__m512i)__builtin_ia32_vpshrdq512((__v8di)(__m512i)(A), \
    175                                       (__v8di)(__m512i)(B), (int)(I)))
    176 
    177 #define _mm512_mask_shrdi_epi64(S, U, A, B, I) \
    178   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
    179                                      (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
    180                                      (__v8di)(__m512i)(S)))
    181 
    182 #define _mm512_maskz_shrdi_epi64(U, A, B, I) \
    183   ((__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
    184                                      (__v8di)_mm512_shrdi_epi64((A), (B), (I)), \
    185                                      (__v8di)_mm512_setzero_si512()))
    186 
    187 #define _mm512_shrdi_epi32(A, B, I) \
    188   ((__m512i)__builtin_ia32_vpshrdd512((__v16si)(__m512i)(A), \
    189                                       (__v16si)(__m512i)(B), (int)(I)))
    190 
    191 #define _mm512_mask_shrdi_epi32(S, U, A, B, I) \
    192   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
    193                                     (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
    194                                     (__v16si)(__m512i)(S)))
    195 
    196 #define _mm512_maskz_shrdi_epi32(U, A, B, I) \
    197   ((__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
    198                                     (__v16si)_mm512_shrdi_epi32((A), (B), (I)), \
    199                                     (__v16si)_mm512_setzero_si512()))
    200 
    201 #define _mm512_shrdi_epi16(A, B, I) \
    202   ((__m512i)__builtin_ia32_vpshrdw512((__v32hi)(__m512i)(A), \
    203                                       (__v32hi)(__m512i)(B), (int)(I)))
    204 
    205 #define _mm512_mask_shrdi_epi16(S, U, A, B, I) \
    206   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
    207                                     (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
    208                                     (__v32hi)(__m512i)(S)))
    209 
    210 #define _mm512_maskz_shrdi_epi16(U, A, B, I) \
    211   ((__m512i)__builtin_ia32_selectw_512((__mmask32)(U), \
    212                                     (__v32hi)_mm512_shrdi_epi16((A), (B), (I)), \
    213                                     (__v32hi)_mm512_setzero_si512()))
    214 
    215 static __inline__ __m512i __DEFAULT_FN_ATTRS
    216 _mm512_shldv_epi64(__m512i __A, __m512i __B, __m512i __C)
    217 {
    218   return (__m512i)__builtin_ia32_vpshldvq512((__v8di)__A, (__v8di)__B,
    219                                              (__v8di)__C);
    220 }
    221 
    222 static __inline__ __m512i __DEFAULT_FN_ATTRS
    223 _mm512_mask_shldv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
    224 {
    225   return (__m512i)__builtin_ia32_selectq_512(__U,
    226                                       (__v8di)_mm512_shldv_epi64(__A, __B, __C),
    227                                       (__v8di)__A);
    228 }
    229 
    230 static __inline__ __m512i __DEFAULT_FN_ATTRS
    231 _mm512_maskz_shldv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
    232 {
    233   return (__m512i)__builtin_ia32_selectq_512(__U,
    234                                       (__v8di)_mm512_shldv_epi64(__A, __B, __C),
    235                                       (__v8di)_mm512_setzero_si512());
    236 }
    237 
    238 static __inline__ __m512i __DEFAULT_FN_ATTRS
    239 _mm512_shldv_epi32(__m512i __A, __m512i __B, __m512i __C)
    240 {
    241   return (__m512i)__builtin_ia32_vpshldvd512((__v16si)__A, (__v16si)__B,
    242                                              (__v16si)__C);
    243 }
    244 
    245 static __inline__ __m512i __DEFAULT_FN_ATTRS
    246 _mm512_mask_shldv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
    247 {
    248   return (__m512i)__builtin_ia32_selectd_512(__U,
    249                                      (__v16si)_mm512_shldv_epi32(__A, __B, __C),
    250                                      (__v16si)__A);
    251 }
    252 
    253 static __inline__ __m512i __DEFAULT_FN_ATTRS
    254 _mm512_maskz_shldv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
    255 {
    256   return (__m512i)__builtin_ia32_selectd_512(__U,
    257                                      (__v16si)_mm512_shldv_epi32(__A, __B, __C),
    258                                      (__v16si)_mm512_setzero_si512());
    259 }
    260 
    261 static __inline__ __m512i __DEFAULT_FN_ATTRS
    262 _mm512_shldv_epi16(__m512i __A, __m512i __B, __m512i __C)
    263 {
    264   return (__m512i)__builtin_ia32_vpshldvw512((__v32hi)__A, (__v32hi)__B,
    265                                              (__v32hi)__C);
    266 }
    267 
    268 static __inline__ __m512i __DEFAULT_FN_ATTRS
    269 _mm512_mask_shldv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
    270 {
    271   return (__m512i)__builtin_ia32_selectw_512(__U,
    272                                      (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
    273                                      (__v32hi)__A);
    274 }
    275 
    276 static __inline__ __m512i __DEFAULT_FN_ATTRS
    277 _mm512_maskz_shldv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
    278 {
    279   return (__m512i)__builtin_ia32_selectw_512(__U,
    280                                      (__v32hi)_mm512_shldv_epi16(__A, __B, __C),
    281                                      (__v32hi)_mm512_setzero_si512());
    282 }
    283 
    284 static __inline__ __m512i __DEFAULT_FN_ATTRS
    285 _mm512_shrdv_epi64(__m512i __A, __m512i __B, __m512i __C)
    286 {
    287   return (__m512i)__builtin_ia32_vpshrdvq512((__v8di)__A, (__v8di)__B,
    288                                              (__v8di)__C);
    289 }
    290 
    291 static __inline__ __m512i __DEFAULT_FN_ATTRS
    292 _mm512_mask_shrdv_epi64(__m512i __A, __mmask8 __U, __m512i __B, __m512i __C)
    293 {
    294   return (__m512i)__builtin_ia32_selectq_512(__U,
    295                                       (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
    296                                       (__v8di)__A);
    297 }
    298 
    299 static __inline__ __m512i __DEFAULT_FN_ATTRS
    300 _mm512_maskz_shrdv_epi64(__mmask8 __U, __m512i __A, __m512i __B, __m512i __C)
    301 {
    302   return (__m512i)__builtin_ia32_selectq_512(__U,
    303                                       (__v8di)_mm512_shrdv_epi64(__A, __B, __C),
    304                                       (__v8di)_mm512_setzero_si512());
    305 }
    306 
    307 static __inline__ __m512i __DEFAULT_FN_ATTRS
    308 _mm512_shrdv_epi32(__m512i __A, __m512i __B, __m512i __C)
    309 {
    310   return (__m512i)__builtin_ia32_vpshrdvd512((__v16si)__A, (__v16si)__B,
    311                                              (__v16si)__C);
    312 }
    313 
    314 static __inline__ __m512i __DEFAULT_FN_ATTRS
    315 _mm512_mask_shrdv_epi32(__m512i __A, __mmask16 __U, __m512i __B, __m512i __C)
    316 {
    317   return (__m512i) __builtin_ia32_selectd_512(__U,
    318                                      (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
    319                                      (__v16si)__A);
    320 }
    321 
    322 static __inline__ __m512i __DEFAULT_FN_ATTRS
    323 _mm512_maskz_shrdv_epi32(__mmask16 __U, __m512i __A, __m512i __B, __m512i __C)
    324 {
    325   return (__m512i) __builtin_ia32_selectd_512(__U,
    326                                      (__v16si)_mm512_shrdv_epi32(__A, __B, __C),
    327                                      (__v16si)_mm512_setzero_si512());
    328 }
    329 
    330 static __inline__ __m512i __DEFAULT_FN_ATTRS
    331 _mm512_shrdv_epi16(__m512i __A, __m512i __B, __m512i __C)
    332 {
    333   return (__m512i)__builtin_ia32_vpshrdvw512((__v32hi)__A, (__v32hi)__B,
    334                                              (__v32hi)__C);
    335 }
    336 
    337 static __inline__ __m512i __DEFAULT_FN_ATTRS
    338 _mm512_mask_shrdv_epi16(__m512i __A, __mmask32 __U, __m512i __B, __m512i __C)
    339 {
    340   return (__m512i)__builtin_ia32_selectw_512(__U,
    341                                      (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
    342                                      (__v32hi)__A);
    343 }
    344 
    345 static __inline__ __m512i __DEFAULT_FN_ATTRS
    346 _mm512_maskz_shrdv_epi16(__mmask32 __U, __m512i __A, __m512i __B, __m512i __C)
    347 {
    348   return (__m512i)__builtin_ia32_selectw_512(__U,
    349                                      (__v32hi)_mm512_shrdv_epi16(__A, __B, __C),
    350                                      (__v32hi)_mm512_setzero_si512());
    351 }
    352 
    353 
    354 #undef __DEFAULT_FN_ATTRS
    355 
    356 #endif
    357