zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

avx10_2convertintrin.h (24299B) - Raw


      1 /*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 #ifndef __IMMINTRIN_H
     10 #error                                                                         \
     11     "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead."
     12 #endif // __IMMINTRIN_H
     13 
     14 #ifdef __SSE2__
     15 
     16 #ifndef __AVX10_2CONVERTINTRIN_H
     17 #define __AVX10_2CONVERTINTRIN_H
     18 
     19 /* Define the default attributes for the functions in this file. */
     20 #define __DEFAULT_FN_ATTRS128                                                  \
     21   __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
     22                  __min_vector_width__(128)))
     23 #define __DEFAULT_FN_ATTRS256                                                  \
     24   __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"),    \
     25                  __min_vector_width__(256)))
     26 
     27 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A,
     28                                                                __m128 __B) {
     29   return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
     30       (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1));
     31 }
     32 
     33 static __inline__ __m128h __DEFAULT_FN_ATTRS128
     34 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) {
     35   return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
     36       (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U);
     37 }
     38 
     39 static __inline__ __m128h __DEFAULT_FN_ATTRS128
     40 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) {
     41   return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask(
     42       (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U);
     43 }
     44 
     45 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A,
     46                                                                   __m256 __B) {
     47   return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
     48       (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1),
     49       _MM_FROUND_CUR_DIRECTION);
     50 }
     51 
     52 static __inline__ __m256h __DEFAULT_FN_ATTRS256
     53 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) {
     54   return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
     55       (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U,
     56       _MM_FROUND_CUR_DIRECTION);
     57 }
     58 
     59 static __inline__ __m256h __DEFAULT_FN_ATTRS256
     60 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) {
     61   return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask(
     62       (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U,
     63       _MM_FROUND_CUR_DIRECTION);
     64 }
     65 
     66 #define _mm256_cvtx_round2ps_ph(A, B, R)                                       \
     67   ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
     68       (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(),                \
     69       (__mmask16)(-1), (const int)(R)))
     70 
     71 #define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R)                            \
     72   ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
     73       (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R)))
     74 
     75 #define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R)                              \
     76   ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask(                                \
     77       (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()),                \
     78       (__mmask16)(U), (const int)(R)))
     79 
     80 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A,
     81                                                                   __m128h __B) {
     82   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
     83       (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
     84 }
     85 
     86 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     87 _mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
     88   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
     89       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
     90 }
     91 
     92 static __inline__ __m128i __DEFAULT_FN_ATTRS128
     93 _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) {
     94   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask(
     95       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
     96       (__mmask8)__U);
     97 }
     98 
     99 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    100 _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) {
    101   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
    102       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
    103       (__mmask16)-1);
    104 }
    105 
    106 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8(
    107     __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
    108   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
    109       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
    110 }
    111 
    112 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    113 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) {
    114   return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask(
    115       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    116       (__mmask16)__U);
    117 }
    118 
    119 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    120 _mm_cvtbiassph_bf8(__m128i __A, __m128h __B) {
    121   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
    122       (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
    123 }
    124 
    125 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    126 _mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
    127   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
    128       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
    129 }
    130 
    131 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    132 _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) {
    133   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask(
    134       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    135       (__mmask8)__U);
    136 }
    137 
    138 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    139 _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) {
    140   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
    141       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
    142       (__mmask16)-1);
    143 }
    144 
    145 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8(
    146     __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
    147   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
    148       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
    149 }
    150 
    151 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    152 _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) {
    153   return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask(
    154       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    155       (__mmask16)__U);
    156 }
    157 
    158 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A,
    159                                                                   __m128h __B) {
    160   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
    161       (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
    162 }
    163 
    164 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    165 _mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
    166   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
    167       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
    168 }
    169 
    170 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    171 _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) {
    172   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask(
    173       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    174       (__mmask8)__U);
    175 }
    176 
    177 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    178 _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) {
    179   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
    180       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
    181       (__mmask16)-1);
    182 }
    183 
    184 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8(
    185     __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
    186   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
    187       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
    188 }
    189 
    190 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    191 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) {
    192   return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask(
    193       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    194       (__mmask16)__U);
    195 }
    196 
    197 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    198 _mm_cvtbiassph_hf8(__m128i __A, __m128h __B) {
    199   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
    200       (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1);
    201 }
    202 
    203 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    204 _mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) {
    205   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
    206       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U);
    207 }
    208 
    209 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    210 _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) {
    211   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask(
    212       (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    213       (__mmask8)__U);
    214 }
    215 
    216 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    217 _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) {
    218   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
    219       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(),
    220       (__mmask16)-1);
    221 }
    222 
    223 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8(
    224     __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) {
    225   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
    226       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U);
    227 }
    228 
    229 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    230 _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) {
    231   return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask(
    232       (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(),
    233       (__mmask16)__U);
    234 }
    235 
    236 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A,
    237                                                                __m128h __B) {
    238   return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B));
    239 }
    240 
    241 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    242 _mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
    243   return (__m128i)__builtin_ia32_selectb_128(
    244       (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W);
    245 }
    246 
    247 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    248 _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) {
    249   return (__m128i)__builtin_ia32_selectb_128(
    250       (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B),
    251       (__v16qi)(__m128i)_mm_setzero_si128());
    252 }
    253 
    254 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A,
    255                                                                   __m256h __B) {
    256   return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A),
    257                                                  (__v16hf)(__B));
    258 }
    259 
    260 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    261 _mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
    262   return (__m256i)__builtin_ia32_selectb_256(
    263       (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W);
    264 }
    265 
    266 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    267 _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
    268   return (__m256i)__builtin_ia32_selectb_256(
    269       (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B),
    270       (__v32qi)(__m256i)_mm256_setzero_si256());
    271 }
    272 
    273 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A,
    274                                                                 __m128h __B) {
    275   return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B));
    276 }
    277 
    278 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    279 _mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
    280   return (__m128i)__builtin_ia32_selectb_128(
    281       (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W);
    282 }
    283 
    284 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    285 _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) {
    286   return (__m128i)__builtin_ia32_selectb_128(
    287       (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B),
    288       (__v16qi)(__m128i)_mm_setzero_si128());
    289 }
    290 
    291 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    292 _mm256_cvts2ph_bf8(__m256h __A, __m256h __B) {
    293   return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A),
    294                                                   (__v16hf)(__B));
    295 }
    296 
    297 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    298 _mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
    299   return (__m256i)__builtin_ia32_selectb_256(
    300       (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W);
    301 }
    302 
    303 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    304 _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) {
    305   return (__m256i)__builtin_ia32_selectb_256(
    306       (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B),
    307       (__v32qi)(__m256i)_mm256_setzero_si256());
    308 }
    309 
    310 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A,
    311                                                                __m128h __B) {
    312   return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B));
    313 }
    314 
    315 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    316 _mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
    317   return (__m128i)__builtin_ia32_selectb_128(
    318       (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W);
    319 }
    320 
    321 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    322 _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) {
    323   return (__m128i)__builtin_ia32_selectb_128(
    324       (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B),
    325       (__v16qi)(__m128i)_mm_setzero_si128());
    326 }
    327 
    328 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A,
    329                                                                   __m256h __B) {
    330   return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A),
    331                                                  (__v16hf)(__B));
    332 }
    333 
    334 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    335 _mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
    336   return (__m256i)__builtin_ia32_selectb_256(
    337       (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W);
    338 }
    339 
    340 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    341 _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
    342   return (__m256i)__builtin_ia32_selectb_256(
    343       (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B),
    344       (__v32qi)(__m256i)_mm256_setzero_si256());
    345 }
    346 
    347 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A,
    348                                                                 __m128h __B) {
    349   return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B));
    350 }
    351 
    352 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    353 _mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) {
    354   return (__m128i)__builtin_ia32_selectb_128(
    355       (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W);
    356 }
    357 
    358 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    359 _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) {
    360   return (__m128i)__builtin_ia32_selectb_128(
    361       (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B),
    362       (__v16qi)(__m128i)_mm_setzero_si128());
    363 }
    364 
    365 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    366 _mm256_cvts2ph_hf8(__m256h __A, __m256h __B) {
    367   return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A),
    368                                                   (__v16hf)(__B));
    369 }
    370 
    371 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    372 _mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) {
    373   return (__m256i)__builtin_ia32_selectb_256(
    374       (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W);
    375 }
    376 
    377 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    378 _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) {
    379   return (__m256i)__builtin_ia32_selectb_256(
    380       (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B),
    381       (__v32qi)(__m256i)_mm256_setzero_si256());
    382 }
    383 
    384 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) {
    385   return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
    386       (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1);
    387 }
    388 
    389 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W,
    390                                                                 __mmask8 __U,
    391                                                                 __m128i __A) {
    392   return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
    393       (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U);
    394 }
    395 
    396 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U,
    397                                                                  __m128i __A) {
    398   return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask(
    399       (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U);
    400 }
    401 
    402 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) {
    403   return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
    404       (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1);
    405 }
    406 
    407 static __inline__ __m256h __DEFAULT_FN_ATTRS256
    408 _mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) {
    409   return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
    410       (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U);
    411 }
    412 
    413 static __inline__ __m256h __DEFAULT_FN_ATTRS256
    414 _mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) {
    415   return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask(
    416       (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U);
    417 }
    418 
    419 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) {
    420   return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
    421       (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
    422 }
    423 
    424 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    425 _mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) {
    426   return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
    427       (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
    428 }
    429 
    430 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    431 _mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) {
    432   return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask(
    433       (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
    434 }
    435 
    436 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) {
    437   return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
    438       (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
    439 }
    440 
    441 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    442 _mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) {
    443   return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
    444       (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
    445 }
    446 
    447 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    448 _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) {
    449   return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask(
    450       (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
    451 }
    452 
    453 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) {
    454   return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
    455       (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
    456 }
    457 
    458 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    459 _mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) {
    460   return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
    461       (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
    462 }
    463 
    464 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    465 _mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) {
    466   return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask(
    467       (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
    468 }
    469 
    470 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) {
    471   return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
    472       (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
    473 }
    474 
    475 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    476 _mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) {
    477   return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
    478       (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
    479 }
    480 
    481 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    482 _mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) {
    483   return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask(
    484       (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
    485 }
    486 
    487 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) {
    488   return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
    489       (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
    490 }
    491 
    492 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    493 _mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) {
    494   return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
    495       (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
    496 }
    497 
    498 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    499 _mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) {
    500   return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask(
    501       (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
    502 }
    503 
    504 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) {
    505   return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
    506       (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
    507 }
    508 
    509 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    510 _mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) {
    511   return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
    512       (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
    513 }
    514 
    515 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    516 _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) {
    517   return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask(
    518       (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
    519 }
    520 
    521 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) {
    522   return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
    523       (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1);
    524 }
    525 
    526 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    527 _mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) {
    528   return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
    529       (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U);
    530 }
    531 
    532 static __inline__ __m128i __DEFAULT_FN_ATTRS128
    533 _mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) {
    534   return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask(
    535       (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U);
    536 }
    537 
    538 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) {
    539   return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
    540       (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1);
    541 }
    542 
    543 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    544 _mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) {
    545   return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
    546       (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U);
    547 }
    548 
    549 static __inline__ __m128i __DEFAULT_FN_ATTRS256
    550 _mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) {
    551   return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask(
    552       (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U);
    553 }
    554 
    555 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) {
    556   return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8));
    557 }
    558 
    559 static __inline__ __m128h __DEFAULT_FN_ATTRS128
    560 _mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) {
    561   return _mm_castsi128_ph(
    562       _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8));
    563 }
    564 
    565 static __inline__ __m128h __DEFAULT_FN_ATTRS128
    566 _mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) {
    567   return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8));
    568 }
    569 
    570 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) {
    571   return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8));
    572 }
    573 
    574 static __inline__ __m256h __DEFAULT_FN_ATTRS256
    575 _mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) {
    576   return _mm256_castsi256_ph(
    577       _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8));
    578 }
    579 
    580 static __inline__ __m256h __DEFAULT_FN_ATTRS256
    581 _mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) {
    582   return _mm256_castsi256_ph(
    583       _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8));
    584 }
    585 
    586 #undef __DEFAULT_FN_ATTRS128
    587 #undef __DEFAULT_FN_ATTRS256
    588 
    589 #endif // __AVX10_2CONVERTINTRIN_H
    590 #endif // __SSE2__