zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

sm3intrin.h (7470B) - Raw


      1 /*===-------------------- sm3intrin.h - SM3 intrinsics ---------------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __IMMINTRIN_H
     11 #error "Never use <sm3intrin.h> directly; include <immintrin.h> instead."
     12 #endif // __IMMINTRIN_H
     13 
     14 #ifndef __SM3INTRIN_H
     15 #define __SM3INTRIN_H
     16 
     17 #define __DEFAULT_FN_ATTRS128                                                  \
     18   __attribute__((__always_inline__, __nodebug__, __target__("sm3"),            \
     19                  __min_vector_width__(128)))
     20 
     21 /// This intrinisc is one of the two SM3 message scheduling intrinsics. The
     22 ///    intrinsic performs an initial calculation for the next four SM3 message
     23 ///    words. The calculated results are stored in \a dst.
     24 ///
     25 /// \headerfile <immintrin.h>
     26 ///
     27 /// \code
     28 /// __m128i _mm_sm3msg1_epi32(__m128i __A, __m128i __B, __m128i __C)
     29 /// \endcode
     30 ///
     31 /// This intrinsic corresponds to the \c VSM3MSG1 instruction.
     32 ///
     33 /// \param __A
     34 ///    A 128-bit vector of [4 x int].
     35 /// \param __B
     36 ///    A 128-bit vector of [4 x int].
     37 /// \param __C
     38 ///    A 128-bit vector of [4 x int].
     39 /// \returns
     40 ///    A 128-bit vector of [4 x int].
     41 ///
     42 /// \code{.operation}
     43 /// DEFINE ROL32(dword, n) {
     44 /// 	count := n % 32
     45 /// 	dest := (dword << count) | (dword >> (32 - count))
     46 /// 	RETURN dest
     47 /// }
     48 /// DEFINE P1(x) {
     49 /// 	RETURN x ^ ROL32(x, 15) ^ ROL32(x, 23)
     50 /// }
     51 /// W[0] := __C.dword[0]
     52 /// W[1] := __C.dword[1]
     53 /// W[2] := __C.dword[2]
     54 /// W[3] := __C.dword[3]
     55 /// W[7] := __A.dword[0]
     56 /// W[8] := __A.dword[1]
     57 /// W[9] := __A.dword[2]
     58 /// W[10] := __A.dword[3]
     59 /// W[13] := __B.dword[0]
     60 /// W[14] := __B.dword[1]
     61 /// W[15] := __B.dword[2]
     62 /// TMP0 := W[7] ^ W[0] ^ ROL32(W[13], 15)
     63 /// TMP1 := W[8] ^ W[1] ^ ROL32(W[14], 15)
     64 /// TMP2 := W[9] ^ W[2] ^ ROL32(W[15], 15)
     65 /// TMP3 := W[10] ^ W[3]
     66 /// dst.dword[0] := P1(TMP0)
     67 /// dst.dword[1] := P1(TMP1)
     68 /// dst.dword[2] := P1(TMP2)
     69 /// dst.dword[3] := P1(TMP3)
     70 /// dst[MAX:128] := 0
     71 /// \endcode
     72 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg1_epi32(__m128i __A,
     73                                                                   __m128i __B,
     74                                                                   __m128i __C) {
     75   return (__m128i)__builtin_ia32_vsm3msg1((__v4su)__A, (__v4su)__B,
     76                                           (__v4su)__C);
     77 }
     78 
     79 /// This intrinisc is one of the two SM3 message scheduling intrinsics. The
     80 ///    intrinsic performs the final calculation for the next four SM3 message
     81 ///    words. The calculated results are stored in \a dst.
     82 ///
     83 /// \headerfile <immintrin.h>
     84 ///
     85 /// \code
     86 /// __m128i _mm_sm3msg2_epi32(__m128i __A, __m128i __B, __m128i __C)
     87 /// \endcode
     88 ///
     89 /// This intrinsic corresponds to the \c VSM3MSG2 instruction.
     90 ///
     91 /// \param __A
     92 ///    A 128-bit vector of [4 x int].
     93 /// \param __B
     94 ///    A 128-bit vector of [4 x int].
     95 /// \param __C
     96 ///    A 128-bit vector of [4 x int].
     97 /// \returns
     98 ///    A 128-bit vector of [4 x int].
     99 ///
    100 /// \code{.operation}
    101 /// DEFINE ROL32(dword, n) {
    102 /// 	count := n % 32
    103 /// 	dest := (dword << count) | (dword >> (32-count))
    104 /// 	RETURN dest
    105 /// }
    106 /// WTMP[0] := __A.dword[0]
    107 /// WTMP[1] := __A.dword[1]
    108 /// WTMP[2] := __A.dword[2]
    109 /// WTMP[3] := __A.dword[3]
    110 /// W[3] := __B.dword[0]
    111 /// W[4] := __B.dword[1]
    112 /// W[5] := __B.dword[2]
    113 /// W[6] := __B.dword[3]
    114 /// W[10] := __C.dword[0]
    115 /// W[11] := __C.dword[1]
    116 /// W[12] := __C.dword[2]
    117 /// W[13] := __C.dword[3]
    118 /// W[16] := ROL32(W[3], 7) ^ W[10] ^ WTMP[0]
    119 /// W[17] := ROL32(W[4], 7) ^ W[11] ^ WTMP[1]
    120 /// W[18] := ROL32(W[5], 7) ^ W[12] ^ WTMP[2]
    121 /// W[19] := ROL32(W[6], 7) ^ W[13] ^ WTMP[3]
    122 /// W[19] := W[19] ^ ROL32(W[16], 6) ^ ROL32(W[16], 15) ^ ROL32(W[16], 30)
    123 /// dst.dword[0] := W[16]
    124 /// dst.dword[1] := W[17]
    125 /// dst.dword[2] := W[18]
    126 /// dst.dword[3] := W[19]
    127 /// dst[MAX:128] := 0
    128 /// \endcode
    129 static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sm3msg2_epi32(__m128i __A,
    130                                                                   __m128i __B,
    131                                                                   __m128i __C) {
    132   return (__m128i)__builtin_ia32_vsm3msg2((__v4su)__A, (__v4su)__B,
    133                                           (__v4su)__C);
    134 }
    135 
    136 /// This intrinsic performs two rounds of SM3 operation using initial SM3 state
    137 ///    (C, D, G, H) from \a __A, an initial SM3 states (A, B, E, F)
    138 ///    from \a __B and a pre-computed words from the \a __C. \a __A with
    139 ///    initial SM3 state of (C, D, G, H) assumes input of non-rotated left
    140 ///    variables from previous state. The updated SM3 state (A, B, E, F) is
    141 ///    written to \a __A. The \a imm8 should contain the even round number
    142 ///    for the first of the two rounds computed by this instruction. The
    143 ///    computation masks the \a imm8 value by AND’ing it with 0x3E so that only
    144 ///    even round numbers from 0 through 62 are used for this operation. The
    145 ///    calculated results are stored in \a dst.
    146 ///
    147 /// \headerfile <immintrin.h>
    148 ///
    149 /// \code
    150 /// __m128i _mm_sm3rnds2_epi32(__m128i __A, __m128i __B, __m128i __C, const int
    151 /// imm8) \endcode
    152 ///
    153 /// This intrinsic corresponds to the \c VSM3RNDS2 instruction.
    154 ///
    155 /// \param __A
    156 ///    A 128-bit vector of [4 x int].
    157 /// \param __B
    158 ///    A 128-bit vector of [4 x int].
    159 /// \param __C
    160 ///    A 128-bit vector of [4 x int].
    161 /// \param imm8
    162 ///    A 8-bit constant integer.
    163 /// \returns
    164 ///    A 128-bit vector of [4 x int].
    165 ///
    166 /// \code{.operation}
    167 /// DEFINE ROL32(dword, n) {
    168 /// 	count := n % 32
    169 /// 	dest := (dword << count) | (dword >> (32-count))
    170 /// 	RETURN dest
    171 /// }
    172 /// DEFINE P0(dword) {
    173 /// 	RETURN dword ^ ROL32(dword, 9) ^ ROL32(dword, 17)
    174 /// }
    175 /// DEFINE FF(x,y,z, round){
    176 /// 	IF round < 16
    177 /// 		RETURN (x ^ y ^ z)
    178 /// 	ELSE
    179 /// 		RETURN (x & y) | (x & z) | (y & z)
    180 /// 	FI
    181 /// }
    182 /// DEFINE GG(x, y, z, round){
    183 ///   IF round < 16
    184 ///   	RETURN (x ^ y ^ z)
    185 ///   ELSE
    186 ///   	RETURN (x & y) | (~x & z)
    187 ///   FI
    188 /// }
    189 /// A[0] := __B.dword[3]
    190 /// B[0] := __B.dword[2]
    191 /// C[0] := __A.dword[3]
    192 /// D[0] := __A.dword[2]
    193 /// E[0] := __B.dword[1]
    194 /// F[0] := __B.dword[0]
    195 /// G[0] := __A.dword[1]
    196 /// H[0] := __A.dword[0]
    197 /// W[0] := __C.dword[0]
    198 /// W[1] := __C.dword[1]
    199 /// W[4] := __C.dword[2]
    200 /// W[5] := __C.dword[3]
    201 /// C[0] := ROL32(C[0], 9)
    202 /// D[0] := ROL32(D[0], 9)
    203 /// G[0] := ROL32(G[0], 19)
    204 /// H[0] := ROL32(H[0], 19)
    205 /// ROUND := __D & 0x3E
    206 /// IF ROUND < 16
    207 /// 	CONST := 0x79CC4519
    208 /// ELSE
    209 /// 	CONST := 0x7A879D8A
    210 /// FI
    211 /// CONST := ROL32(CONST,ROUND)
    212 /// FOR i:= 0 to 1
    213 /// 	S1 := ROL32((ROL32(A[i], 12) + E[i] + CONST), 7)
    214 /// 	S2 := S1 ^ ROL32(A[i], 12)
    215 /// 	T1 := FF(A[i], B[i], C[i], ROUND) + D[i] + S2 + (W[i] ^ W[i+4])
    216 /// 	T2 := GG(E[i], F[i], G[i], ROUND) + H[i] + S1 + W[i]
    217 /// 	D[i+1] := C[i]
    218 /// 	C[i+1] := ROL32(B[i],9)
    219 /// 	B[i+1] := A[i]
    220 /// 	A[i+1] := T1
    221 /// 	H[i+1] := G[i]
    222 /// 	G[i+1] := ROL32(F[i], 19)
    223 /// 	F[i+1] := E[i]
    224 /// 	E[i+1] := P0(T2)
    225 /// 	CONST := ROL32(CONST, 1)
    226 /// ENDFOR
    227 /// dst.dword[3] := A[2]
    228 /// dst.dword[2] := B[2]
    229 /// dst.dword[1] := E[2]
    230 /// dst.dword[0] := F[2]
    231 /// dst[MAX:128] := 0
    232 /// \endcode
    233 #define _mm_sm3rnds2_epi32(A, B, C, D)                                         \
    234   (__m128i) __builtin_ia32_vsm3rnds2((__v4su)A, (__v4su)B, (__v4su)C, (int)D)
    235 
    236 #undef __DEFAULT_FN_ATTRS128
    237 
    238 #endif // __SM3INTRIN_H