zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

immintrin.h (26068B) - Raw


      1 /*===---- immintrin.h - Intel intrinsics -----------------------------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #define __IMMINTRIN_H
     12 
     13 #if !defined(__i386__) && !defined(__x86_64__)
     14 #error "This header is only meant to be used on x86 and x64 architecture"
     15 #endif
     16 
     17 #include <x86gprintrin.h>
     18 
     19 #if !defined(__SCE__) || __has_feature(modules) || defined(__MMX__)
     20 #include <mmintrin.h>
     21 #endif
     22 
     23 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE__)
     24 #include <xmmintrin.h>
     25 #endif
     26 
     27 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE2__)
     28 #include <emmintrin.h>
     29 #endif
     30 
     31 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSE3__)
     32 #include <pmmintrin.h>
     33 #endif
     34 
     35 #if !defined(__SCE__) || __has_feature(modules) || defined(__SSSE3__)
     36 #include <tmmintrin.h>
     37 #endif
     38 
     39 #if !defined(__SCE__) || __has_feature(modules) ||                             \
     40     (defined(__SSE4_2__) || defined(__SSE4_1__))
     41 #include <smmintrin.h>
     42 #endif
     43 
     44 #if !defined(__SCE__) || __has_feature(modules) ||                             \
     45     (defined(__AES__) || defined(__PCLMUL__))
     46 #include <wmmintrin.h>
     47 #endif
     48 
     49 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLFLUSHOPT__)
     50 #include <clflushoptintrin.h>
     51 #endif
     52 
     53 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLWB__)
     54 #include <clwbintrin.h>
     55 #endif
     56 
     57 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX__)
     58 #include <avxintrin.h>
     59 #endif
     60 
     61 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX2__)
     62 #include <avx2intrin.h>
     63 #endif
     64 
     65 #if !defined(__SCE__) || __has_feature(modules) || defined(__F16C__)
     66 #include <f16cintrin.h>
     67 #endif
     68 
     69 /* No feature check desired due to internal checks */
     70 #include <bmiintrin.h>
     71 
     72 #if !defined(__SCE__) || __has_feature(modules) || defined(__BMI2__)
     73 #include <bmi2intrin.h>
     74 #endif
     75 
     76 #if !defined(__SCE__) || __has_feature(modules) || defined(__LZCNT__)
     77 #include <lzcntintrin.h>
     78 #endif
     79 
     80 #if !defined(__SCE__) || __has_feature(modules) || defined(__POPCNT__)
     81 #include <popcntintrin.h>
     82 #endif
     83 
     84 #if !defined(__SCE__) || __has_feature(modules) || defined(__FMA__)
     85 #include <fmaintrin.h>
     86 #endif
     87 
     88 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512F__)
     89 #include <avx512fintrin.h>
     90 #endif
     91 
     92 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VL__)
     93 #include <avx512vlintrin.h>
     94 #endif
     95 
     96 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BW__)
     97 #include <avx512bwintrin.h>
     98 #endif
     99 
    100 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BITALG__)
    101 #include <avx512bitalgintrin.h>
    102 #endif
    103 
    104 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512CD__)
    105 #include <avx512cdintrin.h>
    106 #endif
    107 
    108 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__)
    109 #include <avx512vpopcntdqintrin.h>
    110 #endif
    111 
    112 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    113     (defined(__AVX512VL__) && defined(__AVX512VPOPCNTDQ__))
    114 #include <avx512vpopcntdqvlintrin.h>
    115 #endif
    116 
    117 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VNNI__)
    118 #include <avx512vnniintrin.h>
    119 #endif
    120 
    121 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    122     (defined(__AVX512VL__) && defined(__AVX512VNNI__))
    123 #include <avx512vlvnniintrin.h>
    124 #endif
    125 
    126 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNI__)
    127 #include <avxvnniintrin.h>
    128 #endif
    129 
    130 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512DQ__)
    131 #include <avx512dqintrin.h>
    132 #endif
    133 
    134 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    135     (defined(__AVX512VL__) && defined(__AVX512BITALG__))
    136 #include <avx512vlbitalgintrin.h>
    137 #endif
    138 
    139 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    140     (defined(__AVX512VL__) && defined(__AVX512BW__))
    141 #include <avx512vlbwintrin.h>
    142 #endif
    143 
    144 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    145     (defined(__AVX512VL__) && defined(__AVX512CD__))
    146 #include <avx512vlcdintrin.h>
    147 #endif
    148 
    149 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    150     (defined(__AVX512VL__) && defined(__AVX512DQ__))
    151 #include <avx512vldqintrin.h>
    152 #endif
    153 
    154 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512IFMA__)
    155 #include <avx512ifmaintrin.h>
    156 #endif
    157 
    158 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    159     (defined(__AVX512IFMA__) && defined(__AVX512VL__))
    160 #include <avx512ifmavlintrin.h>
    161 #endif
    162 
    163 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXIFMA__)
    164 #include <avxifmaintrin.h>
    165 #endif
    166 
    167 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI__)
    168 #include <avx512vbmiintrin.h>
    169 #endif
    170 
    171 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    172     (defined(__AVX512VBMI__) && defined(__AVX512VL__))
    173 #include <avx512vbmivlintrin.h>
    174 #endif
    175 
    176 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512VBMI2__)
    177 #include <avx512vbmi2intrin.h>
    178 #endif
    179 
    180 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    181     (defined(__AVX512VBMI2__) && defined(__AVX512VL__))
    182 #include <avx512vlvbmi2intrin.h>
    183 #endif
    184 
    185 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512FP16__)
    186 #include <avx512fp16intrin.h>
    187 #endif
    188 
    189 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    190     (defined(__AVX512VL__) && defined(__AVX512FP16__))
    191 #include <avx512vlfp16intrin.h>
    192 #endif
    193 
    194 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX512BF16__)
    195 #include <avx512bf16intrin.h>
    196 #endif
    197 
    198 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    199     (defined(__AVX512VL__) && defined(__AVX512BF16__))
    200 #include <avx512vlbf16intrin.h>
    201 #endif
    202 
    203 #if !defined(__SCE__) || __has_feature(modules) || defined(__PKU__)
    204 #include <pkuintrin.h>
    205 #endif
    206 
    207 #if !defined(__SCE__) || __has_feature(modules) || defined(__VPCLMULQDQ__)
    208 #include <vpclmulqdqintrin.h>
    209 #endif
    210 
    211 #if !defined(__SCE__) || __has_feature(modules) || defined(__VAES__)
    212 #include <vaesintrin.h>
    213 #endif
    214 
    215 #if !defined(__SCE__) || __has_feature(modules) || defined(__GFNI__)
    216 #include <gfniintrin.h>
    217 #endif
    218 
    219 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT8__)
    220 #include <avxvnniint8intrin.h>
    221 #endif
    222 
    223 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXNECONVERT__)
    224 #include <avxneconvertintrin.h>
    225 #endif
    226 
    227 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA512__)
    228 #include <sha512intrin.h>
    229 #endif
    230 
    231 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM3__)
    232 #include <sm3intrin.h>
    233 #endif
    234 
    235 #if !defined(__SCE__) || __has_feature(modules) || defined(__SM4__)
    236 #include <sm4intrin.h>
    237 #endif
    238 
    239 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVXVNNIINT16__)
    240 #include <avxvnniint16intrin.h>
    241 #endif
    242 
    243 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDPID__)
    244 /// Reads the value of the IA32_TSC_AUX MSR (0xc0000103).
    245 ///
    246 /// \headerfile <immintrin.h>
    247 ///
    248 /// This intrinsic corresponds to the <c> RDPID </c> instruction.
    249 ///
    250 /// \returns The 32-bit contents of the MSR.
    251 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("rdpid")))
    252 _rdpid_u32(void) {
    253   return __builtin_ia32_rdpid();
    254 }
    255 #endif // __RDPID__
    256 
    257 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDRND__)
    258 /// Returns a 16-bit hardware-generated random value.
    259 ///
    260 /// \headerfile <immintrin.h>
    261 ///
    262 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
    263 ///
    264 /// \param __p
    265 ///    A pointer to a 16-bit memory location to place the random value.
    266 /// \returns 1 if the value was successfully generated, 0 otherwise.
    267 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    268 _rdrand16_step(unsigned short *__p)
    269 {
    270   return (int)__builtin_ia32_rdrand16_step(__p);
    271 }
    272 
    273 /// Returns a 32-bit hardware-generated random value.
    274 ///
    275 /// \headerfile <immintrin.h>
    276 ///
    277 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
    278 ///
    279 /// \param __p
    280 ///    A pointer to a 32-bit memory location to place the random value.
    281 /// \returns 1 if the value was successfully generated, 0 otherwise.
    282 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    283 _rdrand32_step(unsigned int *__p)
    284 {
    285   return (int)__builtin_ia32_rdrand32_step(__p);
    286 }
    287 
    288 /// Returns a 64-bit hardware-generated random value.
    289 ///
    290 /// \headerfile <immintrin.h>
    291 ///
    292 /// This intrinsic corresponds to the <c> RDRAND </c> instruction.
    293 ///
    294 /// \param __p
    295 ///    A pointer to a 64-bit memory location to place the random value.
    296 /// \returns 1 if the value was successfully generated, 0 otherwise.
    297 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd")))
    298 _rdrand64_step(unsigned long long *__p)
    299 {
    300 #ifdef __x86_64__
    301   return (int)__builtin_ia32_rdrand64_step(__p);
    302 #else
    303   // We need to emulate the functionality of 64-bit rdrand with 2 32-bit
    304   // rdrand instructions.
    305   unsigned int __lo, __hi;
    306   unsigned int __res_lo = __builtin_ia32_rdrand32_step(&__lo);
    307   unsigned int __res_hi = __builtin_ia32_rdrand32_step(&__hi);
    308   if (__res_lo && __res_hi) {
    309     *__p = ((unsigned long long)__hi << 32) | (unsigned long long)__lo;
    310     return 1;
    311   } else {
    312     *__p = 0;
    313     return 0;
    314   }
    315 #endif
    316 }
    317 #endif /* __RDRND__ */
    318 
    319 #if !defined(__SCE__) || __has_feature(modules) || defined(__FSGSBASE__)
    320 #ifdef __x86_64__
    321 /// Reads the FS base register.
    322 ///
    323 /// \headerfile <immintrin.h>
    324 ///
    325 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
    326 ///
    327 /// \returns The lower 32 bits of the FS base register.
    328 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    329 _readfsbase_u32(void)
    330 {
    331   return __builtin_ia32_rdfsbase32();
    332 }
    333 
    334 /// Reads the FS base register.
    335 ///
    336 /// \headerfile <immintrin.h>
    337 ///
    338 /// This intrinsic corresponds to the <c> RDFSBASE </c> instruction.
    339 ///
    340 /// \returns The contents of the FS base register.
    341 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    342 _readfsbase_u64(void)
    343 {
    344   return __builtin_ia32_rdfsbase64();
    345 }
    346 
    347 /// Reads the GS base register.
    348 ///
    349 /// \headerfile <immintrin.h>
    350 ///
    351 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
    352 ///
    353 /// \returns The lower 32 bits of the GS base register.
    354 static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    355 _readgsbase_u32(void)
    356 {
    357   return __builtin_ia32_rdgsbase32();
    358 }
    359 
    360 /// Reads the GS base register.
    361 ///
    362 /// \headerfile <immintrin.h>
    363 ///
    364 /// This intrinsic corresponds to the <c> RDGSBASE </c> instruction.
    365 ///
    366 /// \returns The contents of the GS base register.
    367 static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    368 _readgsbase_u64(void)
    369 {
    370   return __builtin_ia32_rdgsbase64();
    371 }
    372 
    373 /// Modifies the FS base register.
    374 ///
    375 /// \headerfile <immintrin.h>
    376 ///
    377 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
    378 ///
    379 /// \param __V
    380 ///    Value to use for the lower 32 bits of the FS base register.
    381 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    382 _writefsbase_u32(unsigned int __V)
    383 {
    384   __builtin_ia32_wrfsbase32(__V);
    385 }
    386 
    387 /// Modifies the FS base register.
    388 ///
    389 /// \headerfile <immintrin.h>
    390 ///
    391 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
    392 ///
    393 /// \param __V
    394 ///    Value to use for the FS base register.
    395 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    396 _writefsbase_u64(unsigned long long __V)
    397 {
    398   __builtin_ia32_wrfsbase64(__V);
    399 }
    400 
    401 /// Modifies the GS base register.
    402 ///
    403 /// \headerfile <immintrin.h>
    404 ///
    405 /// This intrinsic corresponds to the <c> WRGSBASE </c> instruction.
    406 ///
    407 /// \param __V
    408 ///    Value to use for the lower 32 bits of the GS base register.
    409 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    410 _writegsbase_u32(unsigned int __V)
    411 {
    412   __builtin_ia32_wrgsbase32(__V);
    413 }
    414 
    415 /// Modifies the GS base register.
    416 ///
    417 /// \headerfile <immintrin.h>
    418 ///
    419 /// This intrinsic corresponds to the <c> WRFSBASE </c> instruction.
    420 ///
    421 /// \param __V
    422 ///    Value to use for GS base register.
    423 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase")))
    424 _writegsbase_u64(unsigned long long __V)
    425 {
    426   __builtin_ia32_wrgsbase64(__V);
    427 }
    428 
    429 #endif
    430 #endif /* __FSGSBASE__ */
    431 
    432 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVBE__)
    433 
    434 /* The structs used below are to force the load/store to be unaligned. This
    435  * is accomplished with the __packed__ attribute. The __may_alias__ prevents
    436  * tbaa metadata from being generated based on the struct and the type of the
    437  * field inside of it.
    438  */
    439 
    440 /// Load a 16-bit value from memory and swap its bytes.
    441 ///
    442 /// \headerfile <x86intrin.h>
    443 ///
    444 /// This intrinsic corresponds to the MOVBE instruction.
    445 ///
    446 /// \param __P
    447 ///    A pointer to the 16-bit value to load.
    448 /// \returns The byte-swapped value.
    449 static __inline__ short __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    450 _loadbe_i16(void const * __P) {
    451   struct __loadu_i16 {
    452     unsigned short __v;
    453   } __attribute__((__packed__, __may_alias__));
    454   return (short)__builtin_bswap16(((const struct __loadu_i16*)__P)->__v);
    455 }
    456 
    457 /// Swap the bytes of a 16-bit value and store it to memory.
    458 ///
    459 /// \headerfile <x86intrin.h>
    460 ///
    461 /// This intrinsic corresponds to the MOVBE instruction.
    462 ///
    463 /// \param __P
    464 ///    A pointer to the memory for storing the swapped value.
    465 /// \param __D
    466 ///    The 16-bit value to be byte-swapped.
    467 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    468 _storebe_i16(void * __P, short __D) {
    469   struct __storeu_i16 {
    470     unsigned short __v;
    471   } __attribute__((__packed__, __may_alias__));
    472   ((struct __storeu_i16*)__P)->__v = __builtin_bswap16((unsigned short)__D);
    473 }
    474 
    475 /// Load a 32-bit value from memory and swap its bytes.
    476 ///
    477 /// \headerfile <x86intrin.h>
    478 ///
    479 /// This intrinsic corresponds to the MOVBE instruction.
    480 ///
    481 /// \param __P
    482 ///    A pointer to the 32-bit value to load.
    483 /// \returns The byte-swapped value.
    484 static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    485 _loadbe_i32(void const * __P) {
    486   struct __loadu_i32 {
    487     unsigned int __v;
    488   } __attribute__((__packed__, __may_alias__));
    489   return (int)__builtin_bswap32(((const struct __loadu_i32*)__P)->__v);
    490 }
    491 
    492 /// Swap the bytes of a 32-bit value and store it to memory.
    493 ///
    494 /// \headerfile <x86intrin.h>
    495 ///
    496 /// This intrinsic corresponds to the MOVBE instruction.
    497 ///
    498 /// \param __P
    499 ///    A pointer to the memory for storing the swapped value.
    500 /// \param __D
    501 ///    The 32-bit value to be byte-swapped.
    502 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    503 _storebe_i32(void * __P, int __D) {
    504   struct __storeu_i32 {
    505     unsigned int __v;
    506   } __attribute__((__packed__, __may_alias__));
    507   ((struct __storeu_i32*)__P)->__v = __builtin_bswap32((unsigned int)__D);
    508 }
    509 
    510 #ifdef __x86_64__
    511 /// Load a 64-bit value from memory and swap its bytes.
    512 ///
    513 /// \headerfile <x86intrin.h>
    514 ///
    515 /// This intrinsic corresponds to the MOVBE instruction.
    516 ///
    517 /// \param __P
    518 ///    A pointer to the 64-bit value to load.
    519 /// \returns The byte-swapped value.
    520 static __inline__ long long __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    521 _loadbe_i64(void const * __P) {
    522   struct __loadu_i64 {
    523     unsigned long long __v;
    524   } __attribute__((__packed__, __may_alias__));
    525   return (long long)__builtin_bswap64(((const struct __loadu_i64*)__P)->__v);
    526 }
    527 
    528 /// Swap the bytes of a 64-bit value and store it to memory.
    529 ///
    530 /// \headerfile <x86intrin.h>
    531 ///
    532 /// This intrinsic corresponds to the MOVBE instruction.
    533 ///
    534 /// \param __P
    535 ///    A pointer to the memory for storing the swapped value.
    536 /// \param __D
    537 ///    The 64-bit value to be byte-swapped.
    538 static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("movbe")))
    539 _storebe_i64(void * __P, long long __D) {
    540   struct __storeu_i64 {
    541     unsigned long long __v;
    542   } __attribute__((__packed__, __may_alias__));
    543   ((struct __storeu_i64*)__P)->__v = __builtin_bswap64((unsigned long long)__D);
    544 }
    545 #endif
    546 #endif /* __MOVBE */
    547 
    548 #if !defined(__SCE__) || __has_feature(modules) || defined(__RTM__)
    549 #include <rtmintrin.h>
    550 #include <xtestintrin.h>
    551 #endif
    552 
    553 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHA__)
    554 #include <shaintrin.h>
    555 #endif
    556 
    557 #if !defined(__SCE__) || __has_feature(modules) || defined(__FXSR__)
    558 #include <fxsrintrin.h>
    559 #endif
    560 
    561 /* No feature check desired due to internal MSC_VER checks */
    562 #include <xsaveintrin.h>
    563 
    564 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEOPT__)
    565 #include <xsaveoptintrin.h>
    566 #endif
    567 
    568 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVEC__)
    569 #include <xsavecintrin.h>
    570 #endif
    571 
    572 #if !defined(__SCE__) || __has_feature(modules) || defined(__XSAVES__)
    573 #include <xsavesintrin.h>
    574 #endif
    575 
    576 #if !defined(__SCE__) || __has_feature(modules) || defined(__SHSTK__)
    577 #include <cetintrin.h>
    578 #endif
    579 
    580 /* Intrinsics inside adcintrin.h are available at all times. */
    581 #include <adcintrin.h>
    582 
    583 #if !defined(__SCE__) || __has_feature(modules) || defined(__ADX__)
    584 #include <adxintrin.h>
    585 #endif
    586 
    587 #if !defined(__SCE__) || __has_feature(modules) || defined(__RDSEED__)
    588 #include <rdseedintrin.h>
    589 #endif
    590 
    591 #if !defined(__SCE__) || __has_feature(modules) || defined(__WBNOINVD__)
    592 #include <wbnoinvdintrin.h>
    593 #endif
    594 
    595 #if !defined(__SCE__) || __has_feature(modules) || defined(__CLDEMOTE__)
    596 #include <cldemoteintrin.h>
    597 #endif
    598 
    599 #if !defined(__SCE__) || __has_feature(modules) || defined(__WAITPKG__)
    600 #include <waitpkgintrin.h>
    601 #endif
    602 
    603 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVDIRI__) ||     \
    604     defined(__MOVDIR64B__)
    605 #include <movdirintrin.h>
    606 #endif
    607 
    608 #if !defined(__SCE__) || __has_feature(modules) || defined(__MOVRS__)
    609 #include <movrsintrin.h>
    610 #endif
    611 
    612 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    613     (defined(__AVX10_2__) && defined(__MOVRS__))
    614 #include <movrs_avx10_2intrin.h>
    615 #endif
    616 
    617 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    618     (defined(__AVX10_2_512__) && defined(__MOVRS__))
    619 #include <movrs_avx10_2_512intrin.h>
    620 #endif
    621 
    622 #if !defined(__SCE__) || __has_feature(modules) || defined(__PCONFIG__)
    623 #include <pconfigintrin.h>
    624 #endif
    625 
    626 #if !defined(__SCE__) || __has_feature(modules) || defined(__SGX__)
    627 #include <sgxintrin.h>
    628 #endif
    629 
    630 #if !defined(__SCE__) || __has_feature(modules) || defined(__PTWRITE__)
    631 #include <ptwriteintrin.h>
    632 #endif
    633 
    634 #if !defined(__SCE__) || __has_feature(modules) || defined(__INVPCID__)
    635 #include <invpcidintrin.h>
    636 #endif
    637 
    638 #if !defined(__SCE__) || __has_feature(modules) || defined(__KL__) ||          \
    639     defined(__WIDEKL__)
    640 #include <keylockerintrin.h>
    641 #endif
    642 
    643 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TILE__) ||    \
    644     defined(__AMX_INT8__) || defined(__AMX_BF16__)
    645 #include <amxintrin.h>
    646 #endif
    647 
    648 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP16__)
    649 #include <amxfp16intrin.h>
    650 #endif
    651 
    652 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_COMPLEX__)
    653 #include <amxcomplexintrin.h>
    654 #endif
    655 
    656 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_FP8__)
    657 #include <amxfp8intrin.h>
    658 #endif
    659 
    660 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TRANSPOSE__)
    661 #include <amxtransposeintrin.h>
    662 #endif
    663 
    664 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_MOVRS__)
    665 #include <amxmovrsintrin.h>
    666 #endif
    667 
    668 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    669     (defined(__AMX_MOVRS__) && defined(__AMX_TRANSPOSE__))
    670 #include <amxmovrstransposeintrin.h>
    671 #endif
    672 
    673 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_AVX512__)
    674 #include <amxavx512intrin.h>
    675 #endif
    676 
    677 #if !defined(__SCE__) || __has_feature(modules) || defined(__AMX_TF32__)
    678 #include <amxtf32intrin.h>
    679 #endif
    680 
    681 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    682     (defined(__AMX_TF32__) && defined(__AMX_TRANSPOSE__))
    683 #include <amxtf32transposeintrin.h>
    684 #endif
    685 
    686 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    687     (defined(__AMX_BF16__) && defined(__AMX_TRANSPOSE__))
    688 #include <amxbf16transposeintrin.h>
    689 #endif
    690 
    691 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    692     (defined(__AMX_FP16__) && defined(__AMX_TRANSPOSE__))
    693 #include <amxfp16transposeintrin.h>
    694 #endif
    695 
    696 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    697     (defined(__AMX_COMPLEX__) && defined(__AMX_TRANSPOSE__))
    698 #include <amxcomplextransposeintrin.h>
    699 #endif
    700 
    701 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    702     defined(__AVX512VP2INTERSECT__)
    703 #include <avx512vp2intersectintrin.h>
    704 #endif
    705 
    706 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    707     (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__))
    708 #include <avx512vlvp2intersectintrin.h>
    709 #endif
    710 
    711 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2__)
    712 #include <avx10_2bf16intrin.h>
    713 #include <avx10_2convertintrin.h>
    714 #include <avx10_2copyintrin.h>
    715 #include <avx10_2minmaxintrin.h>
    716 #include <avx10_2niintrin.h>
    717 #include <avx10_2satcvtdsintrin.h>
    718 #include <avx10_2satcvtintrin.h>
    719 #endif
    720 
    721 #if !defined(__SCE__) || __has_feature(modules) || defined(__AVX10_2_512__)
    722 #include <avx10_2_512bf16intrin.h>
    723 #include <avx10_2_512convertintrin.h>
    724 #include <avx10_2_512minmaxintrin.h>
    725 #include <avx10_2_512niintrin.h>
    726 #include <avx10_2_512satcvtdsintrin.h>
    727 #include <avx10_2_512satcvtintrin.h>
    728 #endif
    729 
    730 #if !defined(__SCE__) || __has_feature(modules) ||                             \
    731     (defined(__AVX10_2_512__) && defined(__SM4__))
    732 #include <sm4evexintrin.h>
    733 #endif
    734 
    735 #if !defined(__SCE__) || __has_feature(modules) || defined(__ENQCMD__)
    736 #include <enqcmdintrin.h>
    737 #endif
    738 
    739 #if !defined(__SCE__) || __has_feature(modules) || defined(__SERIALIZE__)
    740 #include <serializeintrin.h>
    741 #endif
    742 
    743 #if !defined(__SCE__) || __has_feature(modules) || defined(__TSXLDTRK__)
    744 #include <tsxldtrkintrin.h>
    745 #endif
    746 
    747 #if defined(_MSC_VER) && __has_extension(gnu_asm)
    748 /* Define the default attributes for these intrinsics */
    749 #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
    750 #ifdef __cplusplus
    751 extern "C" {
    752 #endif
    753 /*----------------------------------------------------------------------------*\
    754 |* Interlocked Exchange HLE
    755 \*----------------------------------------------------------------------------*/
    756 #if defined(__i386__) || defined(__x86_64__)
    757 static __inline__ long __DEFAULT_FN_ATTRS
    758 _InterlockedExchange_HLEAcquire(long volatile *_Target, long _Value) {
    759   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
    760                        : "+r" (_Value), "+m" (*_Target) :: "memory");
    761   return _Value;
    762 }
    763 static __inline__ long __DEFAULT_FN_ATTRS
    764 _InterlockedExchange_HLERelease(long volatile *_Target, long _Value) {
    765   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
    766                        : "+r" (_Value), "+m" (*_Target) :: "memory");
    767   return _Value;
    768 }
    769 #endif
    770 #if defined(__x86_64__)
    771 static __inline__ __int64 __DEFAULT_FN_ATTRS
    772 _InterlockedExchange64_HLEAcquire(__int64 volatile *_Target, __int64 _Value) {
    773   __asm__ __volatile__(".byte 0xf2 ; lock ; xchg {%0, %1|%1, %0}"
    774                        : "+r" (_Value), "+m" (*_Target) :: "memory");
    775   return _Value;
    776 }
    777 static __inline__ __int64 __DEFAULT_FN_ATTRS
    778 _InterlockedExchange64_HLERelease(__int64 volatile *_Target, __int64 _Value) {
    779   __asm__ __volatile__(".byte 0xf3 ; lock ; xchg {%0, %1|%1, %0}"
    780                        : "+r" (_Value), "+m" (*_Target) :: "memory");
    781   return _Value;
    782 }
    783 #endif
    784 /*----------------------------------------------------------------------------*\
    785 |* Interlocked Compare Exchange HLE
    786 \*----------------------------------------------------------------------------*/
    787 #if defined(__i386__) || defined(__x86_64__)
    788 static __inline__ long __DEFAULT_FN_ATTRS
    789 _InterlockedCompareExchange_HLEAcquire(long volatile *_Destination,
    790                               long _Exchange, long _Comparand) {
    791   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
    792                        : "+a" (_Comparand), "+m" (*_Destination)
    793                        : "r" (_Exchange) : "memory");
    794   return _Comparand;
    795 }
    796 static __inline__ long __DEFAULT_FN_ATTRS
    797 _InterlockedCompareExchange_HLERelease(long volatile *_Destination,
    798                               long _Exchange, long _Comparand) {
    799   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
    800                        : "+a" (_Comparand), "+m" (*_Destination)
    801                        : "r" (_Exchange) : "memory");
    802   return _Comparand;
    803 }
    804 #endif
    805 #if defined(__x86_64__)
    806 static __inline__ __int64 __DEFAULT_FN_ATTRS
    807 _InterlockedCompareExchange64_HLEAcquire(__int64 volatile *_Destination,
    808                               __int64 _Exchange, __int64 _Comparand) {
    809   __asm__ __volatile__(".byte 0xf2 ; lock ; cmpxchg {%2, %1|%1, %2}"
    810                        : "+a" (_Comparand), "+m" (*_Destination)
    811                        : "r" (_Exchange) : "memory");
    812   return _Comparand;
    813 }
    814 static __inline__ __int64 __DEFAULT_FN_ATTRS
    815 _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination,
    816                               __int64 _Exchange, __int64 _Comparand) {
    817   __asm__ __volatile__(".byte 0xf3 ; lock ; cmpxchg {%2, %1|%1, %2}"
    818                        : "+a" (_Comparand), "+m" (*_Destination)
    819                        : "r" (_Exchange) : "memory");
    820   return _Comparand;
    821 }
    822 #endif
    823 #ifdef __cplusplus
    824 }
    825 #endif
    826 
    827 #undef __DEFAULT_FN_ATTRS
    828 
    829 #endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */
    830 
    831 #endif /* __IMMINTRIN_H */