zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

sha512intrin.h (6088B) - Raw


      1 /*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
     12 #endif // __IMMINTRIN_H
     13 
     14 #ifndef __SHA512INTRIN_H
     15 #define __SHA512INTRIN_H
     16 
     17 #define __DEFAULT_FN_ATTRS256                                                  \
     18   __attribute__((__always_inline__, __nodebug__, __target__("sha512"),         \
     19                  __min_vector_width__(256)))
     20 
     21 /// This intrinisc is one of the two SHA512 message scheduling instructions.
     22 ///    The intrinsic performs an intermediate calculation for the next four
     23 ///    SHA512 message qwords. The calculated results are stored in \a dst.
     24 ///
     25 /// \headerfile <immintrin.h>
     26 ///
     27 /// \code
     28 /// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
     29 /// \endcode
     30 ///
     31 /// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
     32 ///
     33 /// \param __A
     34 ///    A 256-bit vector of [4 x long long].
     35 /// \param __B
     36 ///    A 128-bit vector of [2 x long long].
     37 /// \returns
     38 ///    A 256-bit vector of [4 x long long].
     39 ///
     40 /// \code{.operation}
     41 /// DEFINE ROR64(qword, n) {
     42 /// 	count := n % 64
     43 /// 	dest := (qword >> count) | (qword << (64 - count))
     44 /// 	RETURN dest
     45 /// }
     46 /// DEFINE SHR64(qword, n) {
     47 /// 	RETURN qword >> n
     48 /// }
     49 /// DEFINE s0(qword):
     50 /// 	RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
     51 /// }
     52 /// W[4] := __B.qword[0]
     53 /// W[3] := __A.qword[3]
     54 /// W[2] := __A.qword[2]
     55 /// W[1] := __A.qword[1]
     56 /// W[0] := __A.qword[0]
     57 /// dst.qword[3] := W[3] + s0(W[4])
     58 /// dst.qword[2] := W[2] + s0(W[3])
     59 /// dst.qword[1] := W[1] + s0(W[2])
     60 /// dst.qword[0] := W[0] + s0(W[1])
     61 /// dst[MAX:256] := 0
     62 /// \endcode
     63 static __inline__ __m256i __DEFAULT_FN_ATTRS256
     64 _mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
     65   return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
     66 }
     67 
     68 /// This intrinisc is one of the two SHA512 message scheduling instructions.
     69 ///    The intrinsic performs the final calculation for the next four SHA512
     70 ///    message qwords. The calculated results are stored in \a dst.
     71 ///
     72 /// \headerfile <immintrin.h>
     73 ///
     74 /// \code
     75 /// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
     76 /// \endcode
     77 ///
     78 /// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
     79 ///
     80 /// \param __A
     81 ///    A 256-bit vector of [4 x long long].
     82 /// \param __B
     83 ///    A 256-bit vector of [4 x long long].
     84 /// \returns
     85 ///    A 256-bit vector of [4 x long long].
     86 ///
     87 /// \code{.operation}
     88 /// DEFINE ROR64(qword, n) {
     89 /// 	count := n % 64
     90 /// 	dest := (qword >> count) | (qword << (64 - count))
     91 /// 	RETURN dest
     92 /// }
     93 /// DEFINE SHR64(qword, n) {
     94 /// 	RETURN qword >> n
     95 /// }
     96 /// DEFINE s1(qword) {
     97 /// 	RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
     98 /// }
     99 /// W[14] := __B.qword[2]
    100 /// W[15] := __B.qword[3]
    101 /// W[16] := __A.qword[0] + s1(W[14])
    102 /// W[17] := __A.qword[1] + s1(W[15])
    103 /// W[18] := __A.qword[2] + s1(W[16])
    104 /// W[19] := __A.qword[3] + s1(W[17])
    105 /// dst.qword[3] := W[19]
    106 /// dst.qword[2] := W[18]
    107 /// dst.qword[1] := W[17]
    108 /// dst.qword[0] := W[16]
    109 /// dst[MAX:256] := 0
    110 /// \endcode
    111 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    112 _mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
    113   return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
    114 }
    115 
    116 /// This intrinisc performs two rounds of SHA512 operation using initial SHA512
    117 ///    state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
    118 ///    \a __A, and a pre-computed sum of the next two round message qwords and
    119 ///    the corresponding round constants from \a __C (only the two lower qwords
    120 ///    of the third operand). The updated SHA512 state (A,B,E,F) is written to
    121 ///    \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
    122 ///    rounds.
    123 ///
    124 /// \headerfile <immintrin.h>
    125 ///
    126 /// \code
    127 /// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
    128 /// \endcode
    129 ///
    130 /// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
    131 ///
    132 /// \param __A
    133 ///    A 256-bit vector of [4 x long long].
    134 /// \param __B
    135 ///    A 256-bit vector of [4 x long long].
    136 /// \param __C
    137 ///    A 128-bit vector of [2 x long long].
    138 /// \returns
    139 ///    A 256-bit vector of [4 x long long].
    140 ///
    141 /// \code{.operation}
    142 /// DEFINE ROR64(qword, n) {
    143 /// 	count := n % 64
    144 /// 	dest := (qword >> count) | (qword << (64 - count))
    145 /// 	RETURN dest
    146 /// }
    147 /// DEFINE SHR64(qword, n) {
    148 /// 	RETURN qword >> n
    149 /// }
    150 /// DEFINE cap_sigma0(qword) {
    151 /// 	RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
    152 /// }
    153 /// DEFINE cap_sigma1(qword) {
    154 /// 	RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
    155 /// }
    156 /// DEFINE MAJ(a,b,c) {
    157 /// 	RETURN (a & b) ^ (a & c) ^ (b & c)
    158 /// }
    159 /// DEFINE CH(e,f,g) {
    160 /// 	RETURN (e & f) ^ (g & ~e)
    161 /// }
    162 /// A[0] := __B.qword[3]
    163 /// B[0] := __B.qword[2]
    164 /// C[0] := __C.qword[3]
    165 /// D[0] := __C.qword[2]
    166 /// E[0] := __B.qword[1]
    167 /// F[0] := __B.qword[0]
    168 /// G[0] := __C.qword[1]
    169 /// H[0] := __C.qword[0]
    170 /// WK[0]:= __A.qword[0]
    171 /// WK[1]:= __A.qword[1]
    172 /// FOR i := 0 to 1:
    173 /// 	A[i+1] := CH(E[i], F[i], G[i]) +
    174 /// 	cap_sigma1(E[i]) + WK[i] + H[i] +
    175 /// 	MAJ(A[i], B[i], C[i]) +
    176 /// 	cap_sigma0(A[i])
    177 /// 	B[i+1] := A[i]
    178 /// 	C[i+1] := B[i]
    179 /// 	D[i+1] := C[i]
    180 /// 	E[i+1] := CH(E[i], F[i], G[i]) +
    181 /// 	cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
    182 /// 	F[i+1] := E[i]
    183 /// 	G[i+1] := F[i]
    184 /// 	H[i+1] := G[i]
    185 /// ENDFOR
    186 /// dst.qword[3] := A[2]
    187 /// dst.qword[2] := B[2]
    188 /// dst.qword[1] := E[2]
    189 /// dst.qword[0] := F[2]
    190 /// dst[MAX:256] := 0
    191 /// \endcode
    192 static __inline__ __m256i __DEFAULT_FN_ATTRS256
    193 _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
    194   return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
    195                                               (__v2du)__C);
    196 }
    197 
    198 #undef __DEFAULT_FN_ATTRS256
    199 
    200 #endif // __SHA512INTRIN_H