zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

fma4intrin.h (6986B) - Raw


      1 /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------===
      2  *
      3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4  * See https://llvm.org/LICENSE.txt for license information.
      5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6  *
      7  *===-----------------------------------------------------------------------===
      8  */
      9 
     10 #ifndef __X86INTRIN_H
     11 #error "Never use <fma4intrin.h> directly; include <x86intrin.h> instead."
     12 #endif
     13 
     14 #ifndef __FMA4INTRIN_H
     15 #define __FMA4INTRIN_H
     16 
     17 #include <pmmintrin.h>
     18 
     19 /* Define the default attributes for the functions in this file. */
     20 #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
     21 #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
     22 
     23 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     24 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
     25 {
     26   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     27 }
     28 
     29 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     30 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
     31 {
     32   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     33 }
     34 
     35 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     36 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
     37 {
     38   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     39 }
     40 
     41 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     42 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
     43 {
     44   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
     45 }
     46 
     47 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     48 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
     49 {
     50   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
     51 }
     52 
     53 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     54 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
     55 {
     56   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
     57 }
     58 
     59 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     60 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
     61 {
     62   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
     63 }
     64 
     65 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     66 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
     67 {
     68   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
     69 }
     70 
     71 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     72 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
     73 {
     74   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     75 }
     76 
     77 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     78 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
     79 {
     80   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
     81 }
     82 
     83 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     84 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
     85 {
     86   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
     87 }
     88 
     89 static __inline__ __m128d __DEFAULT_FN_ATTRS128
     90 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
     91 {
     92   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
     93 }
     94 
     95 static __inline__ __m128 __DEFAULT_FN_ATTRS128
     96 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
     97 {
     98   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
     99 }
    100 
    101 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    102 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
    103 {
    104   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
    105 }
    106 
    107 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    108 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
    109 {
    110   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
    111 }
    112 
    113 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    114 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
    115 {
    116   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
    117 }
    118 
    119 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    120 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
    121 {
    122   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
    123 }
    124 
    125 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    126 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
    127 {
    128   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
    129 }
    130 
    131 static __inline__ __m128 __DEFAULT_FN_ATTRS128
    132 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
    133 {
    134   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
    135 }
    136 
    137 static __inline__ __m128d __DEFAULT_FN_ATTRS128
    138 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
    139 {
    140   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
    141 }
    142 
    143 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    144 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
    145 {
    146   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    147 }
    148 
    149 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    150 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
    151 {
    152   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    153 }
    154 
    155 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    156 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
    157 {
    158   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    159 }
    160 
    161 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    162 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
    163 {
    164   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
    165 }
    166 
    167 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    168 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
    169 {
    170   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    171 }
    172 
    173 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    174 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
    175 {
    176   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
    177 }
    178 
    179 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    180 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
    181 {
    182   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    183 }
    184 
    185 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    186 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
    187 {
    188   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
    189 }
    190 
    191 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    192 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
    193 {
    194   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
    195 }
    196 
    197 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    198 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
    199 {
    200   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
    201 }
    202 
    203 static __inline__ __m256 __DEFAULT_FN_ATTRS256
    204 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
    205 {
    206   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
    207 }
    208 
    209 static __inline__ __m256d __DEFAULT_FN_ATTRS256
    210 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
    211 {
    212   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
    213 }
    214 
    215 #undef __DEFAULT_FN_ATTRS128
    216 #undef __DEFAULT_FN_ATTRS256
    217 
    218 #endif /* __FMA4INTRIN_H */