avx10_2convertintrin.h (24299B) - Raw
1 /*===--------------- avx10_2convertintrin.h - AVX10_2CONVERT ---------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error \ 11 "Never use <avx10_2convertintrin.h> directly; include <immintrin.h> instead." 12 #endif // __IMMINTRIN_H 13 14 #ifdef __SSE2__ 15 16 #ifndef __AVX10_2CONVERTINTRIN_H 17 #define __AVX10_2CONVERTINTRIN_H 18 19 /* Define the default attributes for the functions in this file. */ 20 #define __DEFAULT_FN_ATTRS128 \ 21 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ 22 __min_vector_width__(128))) 23 #define __DEFAULT_FN_ATTRS256 \ 24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-256"), \ 25 __min_vector_width__(256))) 26 27 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtx2ps_ph(__m128 __A, 28 __m128 __B) { 29 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( 30 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)(-1)); 31 } 32 33 static __inline__ __m128h __DEFAULT_FN_ATTRS128 34 _mm_mask_cvtx2ps_ph(__m128h __W, __mmask8 __U, __m128 __A, __m128 __B) { 35 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( 36 (__v4sf)__A, (__v4sf)__B, (__v8hf)__W, (__mmask8)__U); 37 } 38 39 static __inline__ __m128h __DEFAULT_FN_ATTRS128 40 _mm_maskz_cvtx2ps_ph(__mmask8 __U, __m128 __A, __m128 __B) { 41 return (__m128h)__builtin_ia32_vcvt2ps2phx128_mask( 42 (__v4sf)__A, (__v4sf)__B, (__v8hf)_mm_setzero_ph(), (__mmask8)__U); 43 } 44 45 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtx2ps_ph(__m256 __A, 46 __m256 __B) { 47 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( 48 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)(-1), 49 _MM_FROUND_CUR_DIRECTION); 50 } 51 52 static __inline__ __m256h __DEFAULT_FN_ATTRS256 53 _mm256_mask_cvtx2ps_ph(__m256h __W, __mmask16 __U, __m256 __A, __m256 __B) { 54 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( 55 (__v8sf)__A, (__v8sf)__B, (__v16hf)__W, (__mmask16)__U, 56 _MM_FROUND_CUR_DIRECTION); 57 } 58 59 static __inline__ __m256h __DEFAULT_FN_ATTRS256 60 _mm256_maskz_cvtx2ps_ph(__mmask16 __U, __m256 __A, __m256 __B) { 61 return (__m256h)__builtin_ia32_vcvt2ps2phx256_mask( 62 (__v8sf)__A, (__v8sf)__B, (__v16hf)_mm256_setzero_ph(), (__mmask16)__U, 63 _MM_FROUND_CUR_DIRECTION); 64 } 65 66 #define _mm256_cvtx_round2ps_ph(A, B, R) \ 67 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ 68 (__v8sf)(A), (__v8sf)(B), (__v16hf)_mm256_undefined_ph(), \ 69 (__mmask16)(-1), (const int)(R))) 70 71 #define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \ 72 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ 73 (__v8sf)(A), (__v8sf)(B), (__v16hf)(W), (__mmask16)(U), (const int)(R))) 74 75 #define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \ 76 ((__m256h)__builtin_ia32_vcvt2ps2phx256_mask( \ 77 (__v8sf)(A), (__v8sf)(B), (__v16hf)(_mm256_setzero_ph()), \ 78 (__mmask16)(U), (const int)(R))) 79 80 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_bf8(__m128i __A, 81 __m128h __B) { 82 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( 83 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); 84 } 85 86 static __inline__ __m128i __DEFAULT_FN_ATTRS128 87 _mm_mask_cvtbiasph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { 88 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( 89 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); 90 } 91 92 static __inline__ __m128i __DEFAULT_FN_ATTRS128 93 _mm_maskz_cvtbiasph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { 94 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_128_mask( 95 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 96 (__mmask8)__U); 97 } 98 99 static __inline__ __m128i __DEFAULT_FN_ATTRS256 100 _mm256_cvtbiasph_bf8(__m256i __A, __m256h __B) { 101 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( 102 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), 103 (__mmask16)-1); 104 } 105 106 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_bf8( 107 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { 108 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( 109 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); 110 } 111 112 static __inline__ __m128i __DEFAULT_FN_ATTRS256 113 _mm256_maskz_cvtbiasph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { 114 return (__m128i)__builtin_ia32_vcvtbiasph2bf8_256_mask( 115 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 116 (__mmask16)__U); 117 } 118 119 static __inline__ __m128i __DEFAULT_FN_ATTRS128 120 _mm_cvtbiassph_bf8(__m128i __A, __m128h __B) { 121 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( 122 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); 123 } 124 125 static __inline__ __m128i __DEFAULT_FN_ATTRS128 126 _mm_mask_cvtbiassph_bf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { 127 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( 128 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); 129 } 130 131 static __inline__ __m128i __DEFAULT_FN_ATTRS128 132 _mm_maskz_cvtbiassph_bf8(__mmask8 __U, __m128i __A, __m128h __B) { 133 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_128_mask( 134 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 135 (__mmask8)__U); 136 } 137 138 static __inline__ __m128i __DEFAULT_FN_ATTRS256 139 _mm256_cvtbiassph_bf8(__m256i __A, __m256h __B) { 140 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( 141 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), 142 (__mmask16)-1); 143 } 144 145 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_bf8( 146 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { 147 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( 148 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); 149 } 150 151 static __inline__ __m128i __DEFAULT_FN_ATTRS256 152 _mm256_maskz_cvtbiassph_bf8(__mmask16 __U, __m256i __A, __m256h __B) { 153 return (__m128i)__builtin_ia32_vcvtbiasph2bf8s_256_mask( 154 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 155 (__mmask16)__U); 156 } 157 158 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtbiasph_hf8(__m128i __A, 159 __m128h __B) { 160 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( 161 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); 162 } 163 164 static __inline__ __m128i __DEFAULT_FN_ATTRS128 165 _mm_mask_cvtbiasph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { 166 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( 167 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); 168 } 169 170 static __inline__ __m128i __DEFAULT_FN_ATTRS128 171 _mm_maskz_cvtbiasph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { 172 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_128_mask( 173 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 174 (__mmask8)__U); 175 } 176 177 static __inline__ __m128i __DEFAULT_FN_ATTRS256 178 _mm256_cvtbiasph_hf8(__m256i __A, __m256h __B) { 179 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( 180 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), 181 (__mmask16)-1); 182 } 183 184 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiasph_hf8( 185 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { 186 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( 187 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); 188 } 189 190 static __inline__ __m128i __DEFAULT_FN_ATTRS256 191 _mm256_maskz_cvtbiasph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { 192 return (__m128i)__builtin_ia32_vcvtbiasph2hf8_256_mask( 193 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 194 (__mmask16)__U); 195 } 196 197 static __inline__ __m128i __DEFAULT_FN_ATTRS128 198 _mm_cvtbiassph_hf8(__m128i __A, __m128h __B) { 199 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( 200 (__v16qi)__A, (__v8hf)__B, (__v16qi)_mm_undefined_si128(), (__mmask8)-1); 201 } 202 203 static __inline__ __m128i __DEFAULT_FN_ATTRS128 204 _mm_mask_cvtbiassph_hf8(__m128i __W, __mmask8 __U, __m128i __A, __m128h __B) { 205 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( 206 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)__W, (__mmask8)__U); 207 } 208 209 static __inline__ __m128i __DEFAULT_FN_ATTRS128 210 _mm_maskz_cvtbiassph_hf8(__mmask8 __U, __m128i __A, __m128h __B) { 211 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_128_mask( 212 (__v16qi)__A, (__v8hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 213 (__mmask8)__U); 214 } 215 216 static __inline__ __m128i __DEFAULT_FN_ATTRS256 217 _mm256_cvtbiassph_hf8(__m256i __A, __m256h __B) { 218 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( 219 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_undefined_si128(), 220 (__mmask16)-1); 221 } 222 223 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtbiassph_hf8( 224 __m128i __W, __mmask16 __U, __m256i __A, __m256h __B) { 225 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( 226 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)__W, (__mmask16)__U); 227 } 228 229 static __inline__ __m128i __DEFAULT_FN_ATTRS256 230 _mm256_maskz_cvtbiassph_hf8(__mmask16 __U, __m256i __A, __m256h __B) { 231 return (__m128i)__builtin_ia32_vcvtbiasph2hf8s_256_mask( 232 (__v32qi)__A, (__v16hf)__B, (__v16qi)(__m128i)_mm_setzero_si128(), 233 (__mmask16)__U); 234 } 235 236 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_bf8(__m128h __A, 237 __m128h __B) { 238 return (__m128i)__builtin_ia32_vcvt2ph2bf8_128((__v8hf)(__A), (__v8hf)(__B)); 239 } 240 241 static __inline__ __m128i __DEFAULT_FN_ATTRS128 242 _mm_mask_cvt2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { 243 return (__m128i)__builtin_ia32_selectb_128( 244 (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), (__v16qi)__W); 245 } 246 247 static __inline__ __m128i __DEFAULT_FN_ATTRS128 248 _mm_maskz_cvt2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { 249 return (__m128i)__builtin_ia32_selectb_128( 250 (__mmask16)__U, (__v16qi)_mm_cvt2ph_bf8(__A, __B), 251 (__v16qi)(__m128i)_mm_setzero_si128()); 252 } 253 254 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_bf8(__m256h __A, 255 __m256h __B) { 256 return (__m256i)__builtin_ia32_vcvt2ph2bf8_256((__v16hf)(__A), 257 (__v16hf)(__B)); 258 } 259 260 static __inline__ __m256i __DEFAULT_FN_ATTRS256 261 _mm256_mask_cvt2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { 262 return (__m256i)__builtin_ia32_selectb_256( 263 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), (__v32qi)__W); 264 } 265 266 static __inline__ __m256i __DEFAULT_FN_ATTRS256 267 _mm256_maskz_cvt2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { 268 return (__m256i)__builtin_ia32_selectb_256( 269 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_bf8(__A, __B), 270 (__v32qi)(__m256i)_mm256_setzero_si256()); 271 } 272 273 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_bf8(__m128h __A, 274 __m128h __B) { 275 return (__m128i)__builtin_ia32_vcvt2ph2bf8s_128((__v8hf)(__A), (__v8hf)(__B)); 276 } 277 278 static __inline__ __m128i __DEFAULT_FN_ATTRS128 279 _mm_mask_cvts2ph_bf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { 280 return (__m128i)__builtin_ia32_selectb_128( 281 (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), (__v16qi)__W); 282 } 283 284 static __inline__ __m128i __DEFAULT_FN_ATTRS128 285 _mm_maskz_cvts2ph_bf8(__mmask16 __U, __m128h __A, __m128h __B) { 286 return (__m128i)__builtin_ia32_selectb_128( 287 (__mmask16)__U, (__v16qi)_mm_cvts2ph_bf8(__A, __B), 288 (__v16qi)(__m128i)_mm_setzero_si128()); 289 } 290 291 static __inline__ __m256i __DEFAULT_FN_ATTRS256 292 _mm256_cvts2ph_bf8(__m256h __A, __m256h __B) { 293 return (__m256i)__builtin_ia32_vcvt2ph2bf8s_256((__v16hf)(__A), 294 (__v16hf)(__B)); 295 } 296 297 static __inline__ __m256i __DEFAULT_FN_ATTRS256 298 _mm256_mask_cvts2ph_bf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { 299 return (__m256i)__builtin_ia32_selectb_256( 300 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), (__v32qi)__W); 301 } 302 303 static __inline__ __m256i __DEFAULT_FN_ATTRS256 304 _mm256_maskz_cvts2ph_bf8(__mmask32 __U, __m256h __A, __m256h __B) { 305 return (__m256i)__builtin_ia32_selectb_256( 306 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_bf8(__A, __B), 307 (__v32qi)(__m256i)_mm256_setzero_si256()); 308 } 309 310 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvt2ph_hf8(__m128h __A, 311 __m128h __B) { 312 return (__m128i)__builtin_ia32_vcvt2ph2hf8_128((__v8hf)(__A), (__v8hf)(__B)); 313 } 314 315 static __inline__ __m128i __DEFAULT_FN_ATTRS128 316 _mm_mask_cvt2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { 317 return (__m128i)__builtin_ia32_selectb_128( 318 (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), (__v16qi)__W); 319 } 320 321 static __inline__ __m128i __DEFAULT_FN_ATTRS128 322 _mm_maskz_cvt2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { 323 return (__m128i)__builtin_ia32_selectb_128( 324 (__mmask16)__U, (__v16qi)_mm_cvt2ph_hf8(__A, __B), 325 (__v16qi)(__m128i)_mm_setzero_si128()); 326 } 327 328 static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvt2ph_hf8(__m256h __A, 329 __m256h __B) { 330 return (__m256i)__builtin_ia32_vcvt2ph2hf8_256((__v16hf)(__A), 331 (__v16hf)(__B)); 332 } 333 334 static __inline__ __m256i __DEFAULT_FN_ATTRS256 335 _mm256_mask_cvt2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { 336 return (__m256i)__builtin_ia32_selectb_256( 337 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), (__v32qi)__W); 338 } 339 340 static __inline__ __m256i __DEFAULT_FN_ATTRS256 341 _mm256_maskz_cvt2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { 342 return (__m256i)__builtin_ia32_selectb_256( 343 (__mmask16)__U, (__v32qi)_mm256_cvt2ph_hf8(__A, __B), 344 (__v32qi)(__m256i)_mm256_setzero_si256()); 345 } 346 347 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvts2ph_hf8(__m128h __A, 348 __m128h __B) { 349 return (__m128i)__builtin_ia32_vcvt2ph2hf8s_128((__v8hf)(__A), (__v8hf)(__B)); 350 } 351 352 static __inline__ __m128i __DEFAULT_FN_ATTRS128 353 _mm_mask_cvts2ph_hf8(__m128i __W, __mmask16 __U, __m128h __A, __m128h __B) { 354 return (__m128i)__builtin_ia32_selectb_128( 355 (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), (__v16qi)__W); 356 } 357 358 static __inline__ __m128i __DEFAULT_FN_ATTRS128 359 _mm_maskz_cvts2ph_hf8(__mmask16 __U, __m128h __A, __m128h __B) { 360 return (__m128i)__builtin_ia32_selectb_128( 361 (__mmask16)__U, (__v16qi)_mm_cvts2ph_hf8(__A, __B), 362 (__v16qi)(__m128i)_mm_setzero_si128()); 363 } 364 365 static __inline__ __m256i __DEFAULT_FN_ATTRS256 366 _mm256_cvts2ph_hf8(__m256h __A, __m256h __B) { 367 return (__m256i)__builtin_ia32_vcvt2ph2hf8s_256((__v16hf)(__A), 368 (__v16hf)(__B)); 369 } 370 371 static __inline__ __m256i __DEFAULT_FN_ATTRS256 372 _mm256_mask_cvts2ph_hf8(__m256i __W, __mmask32 __U, __m256h __A, __m256h __B) { 373 return (__m256i)__builtin_ia32_selectb_256( 374 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), (__v32qi)__W); 375 } 376 377 static __inline__ __m256i __DEFAULT_FN_ATTRS256 378 _mm256_maskz_cvts2ph_hf8(__mmask32 __U, __m256h __A, __m256h __B) { 379 return (__m256i)__builtin_ia32_selectb_256( 380 (__mmask16)__U, (__v32qi)_mm256_cvts2ph_hf8(__A, __B), 381 (__v32qi)(__m256i)_mm256_setzero_si256()); 382 } 383 384 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvthf8(__m128i __A) { 385 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( 386 (__v16qi)__A, (__v8hf)(__m128h)_mm_undefined_ph(), (__mmask8)-1); 387 } 388 389 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_mask_cvthf8(__m128h __W, 390 __mmask8 __U, 391 __m128i __A) { 392 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( 393 (__v16qi)__A, (__v8hf)(__m128h)__W, (__mmask8)__U); 394 } 395 396 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_maskz_cvthf8(__mmask8 __U, 397 __m128i __A) { 398 return (__m128h)__builtin_ia32_vcvthf8_2ph128_mask( 399 (__v16qi)__A, (__v8hf)(__m128h)_mm_setzero_ph(), (__mmask8)__U); 400 } 401 402 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvthf8(__m128i __A) { 403 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( 404 (__v16qi)__A, (__v16hf)(__m256h)_mm256_undefined_ph(), (__mmask16)-1); 405 } 406 407 static __inline__ __m256h __DEFAULT_FN_ATTRS256 408 _mm256_mask_cvthf8(__m256h __W, __mmask16 __U, __m128i __A) { 409 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( 410 (__v16qi)__A, (__v16hf)(__m256h)__W, (__mmask16)__U); 411 } 412 413 static __inline__ __m256h __DEFAULT_FN_ATTRS256 414 _mm256_maskz_cvthf8(__mmask16 __U, __m128i __A) { 415 return (__m256h)__builtin_ia32_vcvthf8_2ph256_mask( 416 (__v16qi)__A, (__v16hf)(__m256h)_mm256_setzero_ph(), (__mmask16)__U); 417 } 418 419 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_bf8(__m128h __A) { 420 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( 421 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); 422 } 423 424 static __inline__ __m128i __DEFAULT_FN_ATTRS128 425 _mm_mask_cvtph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { 426 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( 427 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); 428 } 429 430 static __inline__ __m128i __DEFAULT_FN_ATTRS128 431 _mm_maskz_cvtph_bf8(__mmask8 __U, __m128h __A) { 432 return (__m128i)__builtin_ia32_vcvtph2bf8_128_mask( 433 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); 434 } 435 436 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_bf8(__m256h __A) { 437 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( 438 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); 439 } 440 441 static __inline__ __m128i __DEFAULT_FN_ATTRS256 442 _mm256_mask_cvtph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { 443 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( 444 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); 445 } 446 447 static __inline__ __m128i __DEFAULT_FN_ATTRS256 448 _mm256_maskz_cvtph_bf8(__mmask16 __U, __m256h __A) { 449 return (__m128i)__builtin_ia32_vcvtph2bf8_256_mask( 450 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); 451 } 452 453 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_bf8(__m128h __A) { 454 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( 455 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); 456 } 457 458 static __inline__ __m128i __DEFAULT_FN_ATTRS128 459 _mm_mask_cvtsph_bf8(__m128i __W, __mmask8 __U, __m128h __A) { 460 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( 461 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); 462 } 463 464 static __inline__ __m128i __DEFAULT_FN_ATTRS128 465 _mm_maskz_cvtsph_bf8(__mmask8 __U, __m128h __A) { 466 return (__m128i)__builtin_ia32_vcvtph2bf8s_128_mask( 467 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); 468 } 469 470 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_bf8(__m256h __A) { 471 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( 472 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); 473 } 474 475 static __inline__ __m128i __DEFAULT_FN_ATTRS256 476 _mm256_mask_cvtsph_bf8(__m128i __W, __mmask16 __U, __m256h __A) { 477 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( 478 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); 479 } 480 481 static __inline__ __m128i __DEFAULT_FN_ATTRS256 482 _mm256_maskz_cvtsph_bf8(__mmask16 __U, __m256h __A) { 483 return (__m128i)__builtin_ia32_vcvtph2bf8s_256_mask( 484 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); 485 } 486 487 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtph_hf8(__m128h __A) { 488 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( 489 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); 490 } 491 492 static __inline__ __m128i __DEFAULT_FN_ATTRS128 493 _mm_mask_cvtph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { 494 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( 495 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); 496 } 497 498 static __inline__ __m128i __DEFAULT_FN_ATTRS128 499 _mm_maskz_cvtph_hf8(__mmask8 __U, __m128h __A) { 500 return (__m128i)__builtin_ia32_vcvtph2hf8_128_mask( 501 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); 502 } 503 504 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtph_hf8(__m256h __A) { 505 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( 506 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); 507 } 508 509 static __inline__ __m128i __DEFAULT_FN_ATTRS256 510 _mm256_mask_cvtph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { 511 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( 512 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); 513 } 514 515 static __inline__ __m128i __DEFAULT_FN_ATTRS256 516 _mm256_maskz_cvtph_hf8(__mmask16 __U, __m256h __A) { 517 return (__m128i)__builtin_ia32_vcvtph2hf8_256_mask( 518 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); 519 } 520 521 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsph_hf8(__m128h __A) { 522 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( 523 (__v8hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask8)-1); 524 } 525 526 static __inline__ __m128i __DEFAULT_FN_ATTRS128 527 _mm_mask_cvtsph_hf8(__m128i __W, __mmask8 __U, __m128h __A) { 528 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( 529 (__v8hf)__A, (__v16qi)(__m128i)__W, (__mmask8)__U); 530 } 531 532 static __inline__ __m128i __DEFAULT_FN_ATTRS128 533 _mm_maskz_cvtsph_hf8(__mmask8 __U, __m128h __A) { 534 return (__m128i)__builtin_ia32_vcvtph2hf8s_128_mask( 535 (__v8hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask8)__U); 536 } 537 538 static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsph_hf8(__m256h __A) { 539 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( 540 (__v16hf)__A, (__v16qi)(__m128i)_mm_undefined_si128(), (__mmask16)-1); 541 } 542 543 static __inline__ __m128i __DEFAULT_FN_ATTRS256 544 _mm256_mask_cvtsph_hf8(__m128i __W, __mmask16 __U, __m256h __A) { 545 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( 546 (__v16hf)__A, (__v16qi)(__m128i)__W, (__mmask16)__U); 547 } 548 549 static __inline__ __m128i __DEFAULT_FN_ATTRS256 550 _mm256_maskz_cvtsph_hf8(__mmask16 __U, __m256h __A) { 551 return (__m128i)__builtin_ia32_vcvtph2hf8s_256_mask( 552 (__v16hf)__A, (__v16qi)(__m128i)_mm_setzero_si128(), (__mmask16)__U); 553 } 554 555 static __inline__ __m128h __DEFAULT_FN_ATTRS128 _mm_cvtbf8_ph(__m128i __A) { 556 return _mm_castsi128_ph(_mm_slli_epi16(_mm_cvtepi8_epi16(__A), 8)); 557 } 558 559 static __inline__ __m128h __DEFAULT_FN_ATTRS128 560 _mm_mask_cvtbf8_ph(__m128h __S, __mmask8 __U, __m128i __A) { 561 return _mm_castsi128_ph( 562 _mm_mask_slli_epi16((__m128i)__S, __U, _mm_cvtepi8_epi16(__A), 8)); 563 } 564 565 static __inline__ __m128h __DEFAULT_FN_ATTRS128 566 _mm_maskz_cvtbf8_ph(__mmask8 __U, __m128i __A) { 567 return _mm_castsi128_ph(_mm_slli_epi16(_mm_maskz_cvtepi8_epi16(__U, __A), 8)); 568 } 569 570 static __inline__ __m256h __DEFAULT_FN_ATTRS256 _mm256_cvtbf8_ph(__m128i __A) { 571 return _mm256_castsi256_ph(_mm256_slli_epi16(_mm256_cvtepi8_epi16(__A), 8)); 572 } 573 574 static __inline__ __m256h __DEFAULT_FN_ATTRS256 575 _mm256_mask_cvtbf8_ph(__m256h __S, __mmask16 __U, __m128i __A) { 576 return _mm256_castsi256_ph( 577 _mm256_mask_slli_epi16((__m256i)__S, __U, _mm256_cvtepi8_epi16(__A), 8)); 578 } 579 580 static __inline__ __m256h __DEFAULT_FN_ATTRS256 581 _mm256_maskz_cvtbf8_ph(__mmask16 __U, __m128i __A) { 582 return _mm256_castsi256_ph( 583 _mm256_slli_epi16(_mm256_maskz_cvtepi8_epi16(__U, __A), 8)); 584 } 585 586 #undef __DEFAULT_FN_ATTRS128 587 #undef __DEFAULT_FN_ATTRS256 588 589 #endif // __AVX10_2CONVERTINTRIN_H 590 #endif // __SSE2__