avx10_2_512bf16intrin.h (23418B) - Raw
1 /*===----------- avx10_2_512bf16intrin.h - AVX10-BF16 intrinsics ---------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __IMMINTRIN_H 10 #error \ 11 "Never use <avx10_2_512bf16intrin.h> directly; include <immintrin.h> instead." 12 #endif 13 14 #ifdef __SSE2__ 15 16 #ifndef __AVX10_2_512BF16INTRIN_H 17 #define __AVX10_2_512BF16INTRIN_H 18 19 /* Define the default attributes for the functions in this file. */ 20 typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1))); 21 22 /* Define the default attributes for the functions in this file. */ 23 #define __DEFAULT_FN_ATTRS512 \ 24 __attribute__((__always_inline__, __nodebug__, __target__("avx10.2-512"), \ 25 __min_vector_width__(512))) 26 27 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) { 28 return __builtin_bit_cast(__m512bh, _mm512_setzero_ps()); 29 } 30 31 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_undefined_pbh(void) { 32 return (__m512bh)__builtin_ia32_undef512(); 33 } 34 35 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set1_pbh(__bf16 bf) { 36 return (__m512bh)(__v32bf){bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, 37 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, bf, 38 bf, bf, bf, bf, bf, bf, bf, bf, bf, bf}; 39 } 40 41 static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_set_pbh( 42 __bf16 bf1, __bf16 bf2, __bf16 bf3, __bf16 bf4, __bf16 bf5, __bf16 bf6, 43 __bf16 bf7, __bf16 bf8, __bf16 bf9, __bf16 bf10, __bf16 bf11, __bf16 bf12, 44 __bf16 bf13, __bf16 bf14, __bf16 bf15, __bf16 bf16, __bf16 bf17, 45 __bf16 bf18, __bf16 bf19, __bf16 bf20, __bf16 bf21, __bf16 bf22, 46 __bf16 bf23, __bf16 bf24, __bf16 bf25, __bf16 bf26, __bf16 bf27, 47 __bf16 bf28, __bf16 bf29, __bf16 bf30, __bf16 bf31, __bf16 bf32) { 48 return (__m512bh)(__v32bf){bf32, bf31, bf30, bf29, bf28, bf27, bf26, bf25, 49 bf24, bf23, bf22, bf21, bf20, bf19, bf18, bf17, 50 bf16, bf15, bf14, bf13, bf12, bf11, bf10, bf9, 51 bf8, bf7, bf6, bf5, bf4, bf3, bf2, bf1}; 52 } 53 54 #define _mm512_setr_pbh(bf1, bf2, bf3, bf4, bf5, bf6, bf7, bf8, bf9, bf10, \ 55 bf11, bf12, bf13, bf14, bf15, bf16, bf17, bf18, bf19, \ 56 bf20, bf21, bf22, bf23, bf24, bf25, bf26, bf27, bf28, \ 57 bf29, bf30, bf31, bf32) \ 58 _mm512_set_pbh((bf32), (bf31), (bf30), (bf29), (bf28), (bf27), (bf26), \ 59 (bf25), (bf24), (bf23), (bf22), (bf21), (bf20), (bf19), \ 60 (bf18), (bf17), (bf16), (bf15), (bf14), (bf13), (bf12), \ 61 (bf11), (bf10), (bf9), (bf8), (bf7), (bf6), (bf5), (bf4), \ 62 (bf3), (bf2), (bf1)) 63 64 static __inline__ __m512 __DEFAULT_FN_ATTRS512 65 _mm512_castbf16_ps(__m512bh __a) { 66 return (__m512)__a; 67 } 68 69 static __inline__ __m512d __DEFAULT_FN_ATTRS512 70 _mm512_castbf16_pd(__m512bh __a) { 71 return (__m512d)__a; 72 } 73 74 static __inline__ __m512i __DEFAULT_FN_ATTRS512 75 _mm512_castbf16_si512(__m512bh __a) { 76 return (__m512i)__a; 77 } 78 79 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_castps_pbh(__m512 __a) { 80 return (__m512bh)__a; 81 } 82 83 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 84 _mm512_castpd_pbh(__m512d __a) { 85 return (__m512bh)__a; 86 } 87 88 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 89 _mm512_castsi512_pbh(__m512i __a) { 90 return (__m512bh)__a; 91 } 92 93 static __inline__ __m128bh __DEFAULT_FN_ATTRS512 94 _mm512_castbf16512_pbh128(__m512bh __a) { 95 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7); 96 } 97 98 static __inline__ __m256bh __DEFAULT_FN_ATTRS512 99 _mm512_castbf16512_pbh256(__m512bh __a) { 100 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 101 12, 13, 14, 15); 102 } 103 104 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 105 _mm512_castbf16128_pbh512(__m128bh __a) { 106 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, 107 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 108 -1, -1, -1, -1, -1, -1, -1, -1, -1); 109 } 110 111 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 112 _mm512_castbf16256_pbh512(__m256bh __a) { 113 return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 114 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, 115 -1, -1, -1, -1, -1, -1, -1, -1); 116 } 117 118 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 119 _mm512_zextbf16128_pbh512(__m128bh __a) { 120 return __builtin_shufflevector( 121 __a, (__v8bf)_mm_setzero_pbh(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 122 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15, 8, 9, 10, 11, 12, 13, 14, 15); 123 } 124 125 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 126 _mm512_zextbf16256_pbh512(__m256bh __a) { 127 return __builtin_shufflevector(__a, (__v16bf)_mm256_setzero_pbh(), 0, 1, 2, 3, 128 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 129 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 130 29, 30, 31); 131 } 132 133 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_abs_pbh(__m512bh __A) { 134 return (__m512bh)_mm512_and_epi32(_mm512_set1_epi32(0x7FFF7FFF), 135 (__m512i)__A); 136 } 137 138 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 139 _mm512_load_pbh(void const *__p) { 140 return *(const __m512bh *)__p; 141 } 142 143 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 144 _mm512_loadu_pbh(void const *__p) { 145 struct __loadu_pbh { 146 __m512bh_u __v; 147 } __attribute__((__packed__, __may_alias__)); 148 return ((const struct __loadu_pbh *)__p)->__v; 149 } 150 151 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_store_pbh(void *__P, 152 __m512bh __A) { 153 *(__m512bh *)__P = __A; 154 } 155 156 static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_storeu_pbh(void *__P, 157 __m512bh __A) { 158 struct __storeu_pbh { 159 __m512bh_u __v; 160 } __attribute__((__packed__, __may_alias__)); 161 ((struct __storeu_pbh *)__P)->__v = __A; 162 } 163 164 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 165 _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) { 166 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, (__v32bf)__W, 167 (__v32bf)__A); 168 } 169 170 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 171 _mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) { 172 return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I, 173 (__v32hi)__B); 174 } 175 176 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 177 _mm512_permutexvar_pbh(__m512i __A, __m512bh __B) { 178 return (__m512bh)__builtin_ia32_permvarhi512((__v32hi)__B, (__v32hi)__A); 179 } 180 181 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_add_pbh(__m512bh __A, 182 __m512bh __B) { 183 return (__m512bh)((__v32bf)__A + (__v32bf)__B); 184 } 185 186 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 187 _mm512_mask_add_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 188 return (__m512bh)__builtin_ia32_selectpbf_512( 189 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), (__v32bf)__W); 190 } 191 192 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 193 _mm512_maskz_add_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 194 return (__m512bh)__builtin_ia32_selectpbf_512( 195 (__mmask32)__U, (__v32bf)_mm512_add_pbh(__A, __B), 196 (__v32bf)_mm512_setzero_pbh()); 197 } 198 199 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sub_pbh(__m512bh __A, 200 __m512bh __B) { 201 return (__m512bh)((__v32bf)__A - (__v32bf)__B); 202 } 203 204 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 205 _mm512_mask_sub_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 206 return (__m512bh)__builtin_ia32_selectpbf_512( 207 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), (__v32bf)__W); 208 } 209 210 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 211 _mm512_maskz_sub_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 212 return (__m512bh)__builtin_ia32_selectpbf_512( 213 (__mmask32)__U, (__v32bf)_mm512_sub_pbh(__A, __B), 214 (__v32bf)_mm512_setzero_pbh()); 215 } 216 217 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mul_pbh(__m512bh __A, 218 __m512bh __B) { 219 return (__m512bh)((__v32bf)__A * (__v32bf)__B); 220 } 221 222 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 223 _mm512_mask_mul_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 224 return (__m512bh)__builtin_ia32_selectpbf_512( 225 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), (__v32bf)__W); 226 } 227 228 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 229 _mm512_maskz_mul_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 230 return (__m512bh)__builtin_ia32_selectpbf_512( 231 (__mmask32)__U, (__v32bf)_mm512_mul_pbh(__A, __B), 232 (__v32bf)_mm512_setzero_pbh()); 233 } 234 235 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_div_pbh(__m512bh __A, 236 __m512bh __B) { 237 return (__m512bh)((__v32bf)__A / (__v32bf)__B); 238 } 239 240 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 241 _mm512_mask_div_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 242 return (__m512bh)__builtin_ia32_selectpbf_512( 243 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), (__v32bf)__W); 244 } 245 246 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 247 _mm512_maskz_div_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 248 return (__m512bh)__builtin_ia32_selectpbf_512( 249 (__mmask32)__U, (__v32bf)_mm512_div_pbh(__A, __B), 250 (__v32bf)_mm512_setzero_pbh()); 251 } 252 253 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_max_pbh(__m512bh __A, 254 __m512bh __B) { 255 return (__m512bh)__builtin_ia32_vmaxbf16512((__v32bf)__A, (__v32bf)__B); 256 } 257 258 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 259 _mm512_mask_max_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 260 return (__m512bh)__builtin_ia32_selectpbf_512( 261 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), (__v32bf)__W); 262 } 263 264 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 265 _mm512_maskz_max_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 266 return (__m512bh)__builtin_ia32_selectpbf_512( 267 (__mmask32)__U, (__v32bf)_mm512_max_pbh(__A, __B), 268 (__v32bf)_mm512_setzero_pbh()); 269 } 270 271 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_min_pbh(__m512bh __A, 272 __m512bh __B) { 273 return (__m512bh)__builtin_ia32_vminbf16512((__v32bf)__A, (__v32bf)__B); 274 } 275 276 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 277 _mm512_mask_min_pbh(__m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 278 return (__m512bh)__builtin_ia32_selectpbf_512( 279 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), (__v32bf)__W); 280 } 281 282 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 283 _mm512_maskz_min_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 284 return (__m512bh)__builtin_ia32_selectpbf_512( 285 (__mmask32)__U, (__v32bf)_mm512_min_pbh(__A, __B), 286 (__v32bf)_mm512_setzero_pbh()); 287 } 288 289 #define _mm512_cmp_pbh_mask(__A, __B, __P) \ 290 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ 291 (__v32bf)(__m512bh)(__B), \ 292 (int)(__P), (__mmask32) - 1)) 293 294 #define _mm512_mask_cmp_pbh_mask(__U, __A, __B, __P) \ 295 ((__mmask32)__builtin_ia32_vcmpbf16512_mask((__v32bf)(__m512bh)(__A), \ 296 (__v32bf)(__m512bh)(__B), \ 297 (int)(__P), (__mmask32)(__U))) 298 299 #define _mm512_mask_fpclass_pbh_mask(__U, __A, imm) \ 300 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ 301 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32)(__U))) 302 303 #define _mm512_fpclass_pbh_mask(__A, imm) \ 304 ((__mmask32)__builtin_ia32_vfpclassbf16512_mask( \ 305 (__v32bf)(__m512bh)(__A), (int)(imm), (__mmask32) - 1)) 306 307 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 308 _mm512_scalef_pbh(__m512bh __A, __m512bh __B) { 309 return (__m512bh)__builtin_ia32_vscalefbf16512_mask( 310 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_undefined_pbh(), 311 (__mmask32)-1); 312 } 313 314 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pbh( 315 __m512bh __W, __mmask32 __U, __m512bh __A, __m512bh __B) { 316 return (__m512bh)__builtin_ia32_vscalefbf16512_mask( 317 (__v32bf)__A, (__v32bf)__B, (__v32bf)__W, (__mmask32)__U); 318 } 319 320 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 321 _mm512_maskz_scalef_pbh(__mmask32 __U, __m512bh __A, __m512bh __B) { 322 return (__m512bh)__builtin_ia32_vscalefbf16512_mask( 323 (__v32bf)__A, (__v32bf)__B, (__v32bf)_mm512_setzero_pbh(), 324 (__mmask32)__U); 325 } 326 327 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_rcp_pbh(__m512bh __A) { 328 return (__m512bh)__builtin_ia32_vrcpbf16512_mask( 329 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); 330 } 331 332 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 333 _mm512_mask_rcp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { 334 return (__m512bh)__builtin_ia32_vrcpbf16512_mask((__v32bf)__A, (__v32bf)__W, 335 (__mmask32)__U); 336 } 337 338 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 339 _mm512_maskz_rcp_pbh(__mmask32 __U, __m512bh __A) { 340 return (__m512bh)__builtin_ia32_vrcpbf16512_mask( 341 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); 342 } 343 344 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 345 _mm512_getexp_pbh(__m512bh __A) { 346 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( 347 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); 348 } 349 350 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 351 _mm512_mask_getexp_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { 352 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( 353 (__v32bf)__A, (__v32bf)__W, (__mmask32)__U); 354 } 355 356 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 357 _mm512_maskz_getexp_pbh(__mmask32 __U, __m512bh __A) { 358 return (__m512bh)__builtin_ia32_vgetexpbf16512_mask( 359 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); 360 } 361 362 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 363 _mm512_rsqrt_pbh(__m512bh __A) { 364 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( 365 (__v32bf)__A, (__v32bf)_mm512_undefined_pbh(), (__mmask32)-1); 366 } 367 368 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 369 _mm512_mask_rsqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { 370 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask((__v32bf)__A, (__v32bf)__W, 371 (__mmask32)__U); 372 } 373 374 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 375 _mm512_maskz_rsqrt_pbh(__mmask32 __U, __m512bh __A) { 376 return (__m512bh)__builtin_ia32_vrsqrtbf16512_mask( 377 (__v32bf)__A, (__v32bf)_mm512_setzero_pbh(), (__mmask32)__U); 378 } 379 380 #define _mm512_reduce_pbh(__A, imm) \ 381 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ 382 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_undefined_pbh(), \ 383 (__mmask32) - 1)) 384 385 #define _mm512_mask_reduce_pbh(__W, __U, __A, imm) \ 386 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ 387 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ 388 (__mmask32)(__U))) 389 390 #define _mm512_maskz_reduce_pbh(__U, __A, imm) \ 391 ((__m512bh)__builtin_ia32_vreducebf16512_mask( \ 392 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ 393 (__mmask32)(__U))) 394 395 #define _mm512_roundscale_pbh(__A, imm) \ 396 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ 397 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ 398 (__mmask32) - 1)) 399 400 #define _mm512_mask_roundscale_pbh(__W, __U, __A, imm) \ 401 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ 402 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)(__m512bh)(__W), \ 403 (__mmask32)(__U))) 404 405 #define _mm512_maskz_roundscale_pbh(__U, __A, imm) \ 406 ((__m512bh)__builtin_ia32_vrndscalebf16_mask( \ 407 (__v32bf)(__m512bh)(__A), (int)(imm), (__v32bf)_mm512_setzero_pbh(), \ 408 (__mmask32)(__U))) 409 410 #define _mm512_getmant_pbh(__A, __B, __C) \ 411 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ 412 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ 413 (__v32bf)_mm512_undefined_pbh(), (__mmask32) - 1)) 414 415 #define _mm512_mask_getmant_pbh(__W, __U, __A, __B, __C) \ 416 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ 417 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ 418 (__v32bf)(__m512bh)(__W), (__mmask32)(__U))) 419 420 #define _mm512_maskz_getmant_pbh(__U, __A, __B, __C) \ 421 ((__m512bh)__builtin_ia32_vgetmantbf16512_mask( \ 422 (__v32bf)(__m512bh)(__A), (int)(((__C) << 2) | (__B)), \ 423 (__v32bf)_mm512_setzero_pbh(), (__mmask32)(__U))) 424 425 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_sqrt_pbh(__m512bh __A) { 426 return (__m512bh)__builtin_ia32_vsqrtbf16512((__v32bf)__A); 427 } 428 429 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 430 _mm512_mask_sqrt_pbh(__m512bh __W, __mmask32 __U, __m512bh __A) { 431 return (__m512bh)__builtin_ia32_selectpbf_512( 432 (__mmask32)__U, (__v32bf)_mm512_sqrt_pbh(__A), (__v32bf)__W); 433 } 434 435 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 436 _mm512_maskz_sqrt_pbh(__mmask32 __U, __m512bh __A) { 437 return (__m512bh)__builtin_ia32_selectpbf_512((__mmask32)__U, 438 (__v32bf)_mm512_sqrt_pbh(__A), 439 (__v32bf)_mm512_setzero_pbh()); 440 } 441 442 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 443 _mm512_fmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { 444 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, 445 (__v32bf)__C); 446 } 447 448 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 449 _mm512_mask_fmadd_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { 450 return (__m512bh)__builtin_ia32_selectpbf_512( 451 (__mmask32)__U, 452 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); 453 } 454 455 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pbh( 456 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { 457 return (__m512bh)__builtin_ia32_selectpbf_512( 458 (__mmask32)__U, 459 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); 460 } 461 462 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pbh( 463 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { 464 return (__m512bh)__builtin_ia32_selectpbf_512( 465 (__mmask32)__U, 466 _mm512_fmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 467 (__v32bf)_mm512_setzero_pbh()); 468 } 469 470 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 471 _mm512_fmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { 472 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, (__v32bf)__B, 473 -(__v32bf)__C); 474 } 475 476 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 477 _mm512_mask_fmsub_pbh(__m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { 478 return (__m512bh)__builtin_ia32_selectpbf_512( 479 (__mmask32)__U, 480 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__A); 481 } 482 483 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pbh( 484 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { 485 return (__m512bh)__builtin_ia32_selectpbf_512( 486 (__mmask32)__U, 487 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), (__v32bf)__C); 488 } 489 490 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pbh( 491 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { 492 return (__m512bh)__builtin_ia32_selectpbf_512( 493 (__mmask32)__U, 494 _mm512_fmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 495 (__v32bf)_mm512_setzero_pbh()); 496 } 497 498 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 499 _mm512_fnmadd_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { 500 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, 501 (__v32bf)__C); 502 } 503 504 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pbh( 505 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { 506 return (__m512bh)__builtin_ia32_selectpbf_512( 507 (__mmask32)__U, 508 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 509 (__v32bf)__A); 510 } 511 512 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pbh( 513 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { 514 return (__m512bh)__builtin_ia32_selectpbf_512( 515 (__mmask32)__U, 516 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 517 (__v32bf)__C); 518 } 519 520 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pbh( 521 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { 522 return (__m512bh)__builtin_ia32_selectpbf_512( 523 (__mmask32)__U, 524 _mm512_fnmadd_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 525 (__v32bf)_mm512_setzero_pbh()); 526 } 527 528 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 529 _mm512_fnmsub_pbh(__m512bh __A, __m512bh __B, __m512bh __C) { 530 return (__m512bh)__builtin_ia32_vfmaddnepbh512((__v32bf)__A, -(__v32bf)__B, 531 -(__v32bf)__C); 532 } 533 534 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pbh( 535 __m512bh __A, __mmask32 __U, __m512bh __B, __m512bh __C) { 536 return (__m512bh)__builtin_ia32_selectpbf_512( 537 (__mmask32)__U, 538 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 539 (__v32bf)__A); 540 } 541 542 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pbh( 543 __m512bh __A, __m512bh __B, __m512bh __C, __mmask32 __U) { 544 return (__m512bh)__builtin_ia32_selectpbf_512( 545 (__mmask32)__U, 546 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 547 (__v32bf)__C); 548 } 549 550 static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh( 551 __mmask32 __U, __m512bh __A, __m512bh __B, __m512bh __C) { 552 return (__m512bh)__builtin_ia32_selectpbf_512( 553 (__mmask32)__U, 554 _mm512_fnmsub_pbh((__v32bf)__A, (__v32bf)__B, (__v32bf)__C), 555 (__v32bf)_mm512_setzero_pbh()); 556 } 557 558 #undef __DEFAULT_FN_ATTRS512 559 560 #endif 561 #endif