zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

bmi2intrin.h (7546B) - Raw


      1 /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <bmi2intrin.h> directly; include <immintrin.h> instead."
     12 #endif
     13 
     14 #ifndef __BMI2INTRIN_H
     15 #define __BMI2INTRIN_H
     16 
     17 /* Define the default attributes for the functions in this file. */
     18 #if defined(__cplusplus) && (__cplusplus >= 201103L)
     19 #define __DEFAULT_FN_ATTRS                                                     \
     20   __attribute__((__always_inline__, __nodebug__, __target__("bmi2"))) constexpr
     21 #else
     22 #define __DEFAULT_FN_ATTRS                                                     \
     23   __attribute__((__always_inline__, __nodebug__, __target__("bmi2")))
     24 #endif
     25 
     26 /// Copies the unsigned 32-bit integer \a __X and zeroes the upper bits
     27 ///    starting at bit number \a __Y.
     28 ///
     29 /// \code{.operation}
     30 /// i := __Y[7:0]
     31 /// result := __X
     32 /// IF i < 32
     33 ///   result[31:i] := 0
     34 /// FI
     35 /// \endcode
     36 ///
     37 /// \headerfile <immintrin.h>
     38 ///
     39 /// This intrinsic corresponds to the \c BZHI instruction.
     40 ///
     41 /// \param __X
     42 ///    The 32-bit source value to copy.
     43 /// \param __Y
     44 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
     45 /// \returns The partially zeroed 32-bit value.
     46 static __inline__ unsigned int __DEFAULT_FN_ATTRS
     47 _bzhi_u32(unsigned int __X, unsigned int __Y) {
     48   return __builtin_ia32_bzhi_si(__X, __Y);
     49 }
     50 
     51 /// Deposit (scatter) low-order bits from the unsigned 32-bit integer \a __X
     52 ///    into the 32-bit result, according to the mask in the unsigned 32-bit
     53 ///    integer \a __Y. All other bits of the result are zero.
     54 ///
     55 /// \code{.operation}
     56 /// i := 0
     57 /// result := 0
     58 /// FOR m := 0 TO 31
     59 ///   IF __Y[m] == 1
     60 ///     result[m] := __X[i]
     61 ///     i := i + 1
     62 ///   ENDIF
     63 /// ENDFOR
     64 /// \endcode
     65 ///
     66 /// \headerfile <immintrin.h>
     67 ///
     68 /// This intrinsic corresponds to the \c PDEP instruction.
     69 ///
     70 /// \param __X
     71 ///    The 32-bit source value to copy.
     72 /// \param __Y
     73 ///    The 32-bit mask specifying where to deposit source bits.
     74 /// \returns The 32-bit result.
     75 static __inline__ unsigned int __DEFAULT_FN_ATTRS
     76 _pdep_u32(unsigned int __X, unsigned int __Y) {
     77   return __builtin_ia32_pdep_si(__X, __Y);
     78 }
     79 
     80 /// Extract (gather) bits from the unsigned 32-bit integer \a __X into the
     81 ///    low-order bits of the 32-bit result, according to the mask in the
     82 ///    unsigned 32-bit integer \a __Y. All other bits of the result are zero.
     83 ///
     84 /// \code{.operation}
     85 /// i := 0
     86 /// result := 0
     87 /// FOR m := 0 TO 31
     88 ///   IF __Y[m] == 1
     89 ///     result[i] := __X[m]
     90 ///     i := i + 1
     91 ///   ENDIF
     92 /// ENDFOR
     93 /// \endcode
     94 ///
     95 /// \headerfile <immintrin.h>
     96 ///
     97 /// This intrinsic corresponds to the \c PEXT instruction.
     98 ///
     99 /// \param __X
    100 ///    The 32-bit source value to copy.
    101 /// \param __Y
    102 ///    The 32-bit mask specifying which source bits to extract.
    103 /// \returns The 32-bit result.
    104 static __inline__ unsigned int __DEFAULT_FN_ATTRS
    105 _pext_u32(unsigned int __X, unsigned int __Y) {
    106   return __builtin_ia32_pext_si(__X, __Y);
    107 }
    108 
    109 /// Multiplies the unsigned 32-bit integers \a __X and \a __Y to form a
    110 ///    64-bit product. Stores the upper 32 bits of the product in the
    111 ///    memory at \a __P and returns the lower 32 bits.
    112 ///
    113 /// \code{.operation}
    114 /// Store32(__P, (__X * __Y)[63:32])
    115 /// result := (__X * __Y)[31:0]
    116 /// \endcode
    117 ///
    118 /// \headerfile <immintrin.h>
    119 ///
    120 /// This intrinsic corresponds to the \c MULX instruction.
    121 ///
    122 /// \param __X
    123 ///    An unsigned 32-bit multiplicand.
    124 /// \param __Y
    125 ///    An unsigned 32-bit multiplicand.
    126 /// \param __P
    127 ///    A pointer to memory for storing the upper half of the product.
    128 /// \returns The lower half of the product.
    129 static __inline__ unsigned int __DEFAULT_FN_ATTRS
    130 _mulx_u32(unsigned int __X, unsigned int __Y, unsigned int *__P) {
    131   unsigned long long __res = (unsigned long long) __X * __Y;
    132   *__P = (unsigned int)(__res >> 32);
    133   return (unsigned int)__res;
    134 }
    135 
    136 #ifdef  __x86_64__
    137 
    138 /// Copies the unsigned 64-bit integer \a __X and zeroes the upper bits
    139 ///    starting at bit number \a __Y.
    140 ///
    141 /// \code{.operation}
    142 /// i := __Y[7:0]
    143 /// result := __X
    144 /// IF i < 64
    145 ///   result[63:i] := 0
    146 /// FI
    147 /// \endcode
    148 ///
    149 /// \headerfile <immintrin.h>
    150 ///
    151 /// This intrinsic corresponds to the \c BZHI instruction.
    152 ///
    153 /// \param __X
    154 ///    The 64-bit source value to copy.
    155 /// \param __Y
    156 ///    The lower 8 bits specify the bit number of the lowest bit to zero.
    157 /// \returns The partially zeroed 64-bit value.
    158 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
    159 _bzhi_u64(unsigned long long __X, unsigned long long __Y) {
    160   return __builtin_ia32_bzhi_di(__X, __Y);
    161 }
    162 
    163 /// Deposit (scatter) low-order bits from the unsigned 64-bit integer \a __X
    164 ///    into the 64-bit result, according to the mask in the unsigned 64-bit
    165 ///    integer \a __Y. All other bits of the result are zero.
    166 ///
    167 /// \code{.operation}
    168 /// i := 0
    169 /// result := 0
    170 /// FOR m := 0 TO 63
    171 ///   IF __Y[m] == 1
    172 ///     result[m] := __X[i]
    173 ///     i := i + 1
    174 ///   ENDIF
    175 /// ENDFOR
    176 /// \endcode
    177 ///
    178 /// \headerfile <immintrin.h>
    179 ///
    180 /// This intrinsic corresponds to the \c PDEP instruction.
    181 ///
    182 /// \param __X
    183 ///    The 64-bit source value to copy.
    184 /// \param __Y
    185 ///    The 64-bit mask specifying where to deposit source bits.
    186 /// \returns The 64-bit result.
    187 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
    188 _pdep_u64(unsigned long long __X, unsigned long long __Y) {
    189   return __builtin_ia32_pdep_di(__X, __Y);
    190 }
    191 
    192 /// Extract (gather) bits from the unsigned 64-bit integer \a __X into the
    193 ///    low-order bits of the 64-bit result, according to the mask in the
    194 ///    unsigned 64-bit integer \a __Y. All other bits of the result are zero.
    195 ///
    196 /// \code{.operation}
    197 /// i := 0
    198 /// result := 0
    199 /// FOR m := 0 TO 63
    200 ///   IF __Y[m] == 1
    201 ///     result[i] := __X[m]
    202 ///     i := i + 1
    203 ///   ENDIF
    204 /// ENDFOR
    205 /// \endcode
    206 ///
    207 /// \headerfile <immintrin.h>
    208 ///
    209 /// This intrinsic corresponds to the \c PEXT instruction.
    210 ///
    211 /// \param __X
    212 ///    The 64-bit source value to copy.
    213 /// \param __Y
    214 ///    The 64-bit mask specifying which source bits to extract.
    215 /// \returns The 64-bit result.
    216 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
    217 _pext_u64(unsigned long long __X, unsigned long long __Y) {
    218   return __builtin_ia32_pext_di(__X, __Y);
    219 }
    220 
    221 /// Multiplies the unsigned 64-bit integers \a __X and \a __Y to form a
    222 ///    128-bit product. Stores the upper 64 bits of the product to the
    223 ///    memory addressed by \a __P and returns the lower 64 bits.
    224 ///
    225 /// \code{.operation}
    226 /// Store64(__P, (__X * __Y)[127:64])
    227 /// result := (__X * __Y)[63:0]
    228 /// \endcode
    229 ///
    230 /// \headerfile <immintrin.h>
    231 ///
    232 /// This intrinsic corresponds to the \c MULX instruction.
    233 ///
    234 /// \param __X
    235 ///    An unsigned 64-bit multiplicand.
    236 /// \param __Y
    237 ///    An unsigned 64-bit multiplicand.
    238 /// \param __P
    239 ///    A pointer to memory for storing the upper half of the product.
    240 /// \returns The lower half of the product.
    241 static __inline__ unsigned long long __DEFAULT_FN_ATTRS
    242 _mulx_u64 (unsigned long long __X, unsigned long long __Y,
    243            unsigned long long *__P) {
    244   unsigned __int128 __res = (unsigned __int128) __X * __Y;
    245   *__P = (unsigned long long) (__res >> 64);
    246   return (unsigned long long) __res;
    247 }
    248 
    249 #endif /* __x86_64__  */
    250 
    251 #undef __DEFAULT_FN_ATTRS
    252 
    253 #endif /* __BMI2INTRIN_H */