common.h (252084B) - Raw
1 /*! @header 2 * The interfaces declared in this header provide "common" elementwise 3 * operations that are neither math nor logic functions. These are available 4 * only for floating-point vectors and scalars, except for min, max, abs, 5 * clamp, and the reduce operations, which also support integer vectors. 6 * 7 * simd_abs(x) Absolute value of x. Also available as fabs 8 * for floating-point vectors. If x is the 9 * smallest signed integer, x is returned. 10 * 11 * simd_max(x,y) Returns the maximum of x and y. Also available 12 * as fmax for floating-point vectors. 13 * 14 * simd_min(x,y) Returns the minimum of x and y. Also available 15 * as fmin for floating-point vectors. 16 * 17 * simd_clamp(x,min,max) x clamped to the range [min, max]. 18 * 19 * simd_sign(x) -1 if x is less than zero, 0 if x is zero or 20 * NaN, and +1 if x is greater than zero. 21 * 22 * simd_mix(x,y,t) If t is not in the range [0,1], the result is 23 * simd_lerp(x,y,t) undefined. Otherwise the result is x+(y-x)*t, 24 * which linearly interpolates between x and y. 25 * 26 * simd_recip(x) An approximation to 1/x. If x is very near the 27 * limits of representable values, or is infinity 28 * or NaN, the result is undefined. There are 29 * two variants of this function: 30 * 31 * simd_precise_recip(x) 32 * 33 * and 34 * 35 * simd_fast_recip(x). 36 * 37 * The "precise" variant is accurate to a few ULPs, 38 * whereas the "fast" variant may have as little 39 * as 11 bits of accuracy in float and about 22 40 * bits in double. 41 * 42 * The function simd_recip(x) resolves to 43 * simd_precise_recip(x) ordinarily, but to 44 * simd_fast_recip(x) when used in a translation 45 * unit compiled with -ffast-math (when 46 * -ffast-math is in effect, you may still use the 47 * precise version of this function by calling it 48 * explicitly by name). 49 * 50 * simd_rsqrt(x) An approximation to 1/sqrt(x). If x is 51 * infinity or NaN, the result is undefined. 52 * There are two variants of this function: 53 * 54 * simd_precise_rsqrt(x) 55 * 56 * and 57 * 58 * simd_fast_rsqrt(x). 59 * 60 * The "precise" variant is accurate to a few ULPs, 61 * whereas the "fast" variant may have as little 62 * as 11 bits of accuracy in float and about 22 63 * bits in double. 64 * 65 * The function simd_rsqrt(x) resolves to 66 * simd_precise_rsqrt(x) ordinarily, but to 67 * simd_fast_rsqrt(x) when used in a translation 68 * unit compiled with -ffast-math (when 69 * -ffast-math is in effect, you may still use the 70 * precise version of this function by calling it 71 * explicitly by name). 72 * 73 * simd_fract(x) The "fractional part" of x, which lies strictly 74 * in the range [0, 0x1.fffffep-1]. 75 * 76 * simd_step(edge,x) 0 if x < edge, and 1 otherwise. 77 * 78 * simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and 79 * a Hermite interpolation between 0 and 1 if 80 * edge0 < x < edge1. 81 * 82 * simd_reduce_add(x) Sum of the elements of x. 83 * 84 * simd_reduce_min(x) Minimum of the elements of x. 85 * 86 * simd_reduce_max(x) Maximum of the elements of x. 87 * 88 * simd_equal(x,y) True if and only if every lane of x is equal 89 * to the corresponding lane of y. 90 * 91 * The following common functions are available in the simd:: namespace: 92 * 93 * C++ Function Equivalent C Function 94 * -------------------------------------------------------------------- 95 * simd::abs(x) simd_abs(x) 96 * simd::max(x,y) simd_max(x,y) 97 * simd::min(x,y) simd_min(x,y) 98 * simd::clamp(x,min,max) simd_clamp(x,min,max) 99 * simd::sign(x) simd_sign(x) 100 * simd::mix(x,y,t) simd_mix(x,y,t) 101 * simd::lerp(x,y,t) simd_lerp(x,y,t) 102 * simd::recip(x) simd_recip(x) 103 * simd::rsqrt(x) simd_rsqrt(x) 104 * simd::fract(x) simd_fract(x) 105 * simd::step(edge,x) simd_step(edge,x) 106 * simd::smoothstep(e0,e1,x) simd_smoothstep(e0,e1,x) 107 * simd::reduce_add(x) simd_reduce_add(x) 108 * simd::reduce_max(x) simd_reduce_max(x) 109 * simd::reduce_min(x) simd_reduce_min(x) 110 * simd::equal(x,y) simd_equal(x,y) 111 * 112 * simd::precise::recip(x) simd_precise_recip(x) 113 * simd::precise::rsqrt(x) simd_precise_rsqrt(x) 114 * 115 * simd::fast::recip(x) simd_fast_recip(x) 116 * simd::fast::rsqrt(x) simd_fast_rsqrt(x) 117 * 118 * @copyright 2014-2017 Apple, Inc. All rights reserved. 119 * @unsorted */ 120 121 #ifndef SIMD_COMMON_HEADER 122 #define SIMD_COMMON_HEADER 123 124 #include <simd/base.h> 125 #if SIMD_COMPILER_HAS_REQUIRED_FEATURES 126 #include <simd/vector_make.h> 127 #include <simd/logic.h> 128 #include <simd/math.h> 129 130 #ifdef __cplusplus 131 extern "C" { 132 #endif 133 134 /*! @abstract The elementwise absolute value of x. */ 135 static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x); 136 /*! @abstract The elementwise absolute value of x. */ 137 static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x); 138 /*! @abstract The elementwise absolute value of x. */ 139 static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x); 140 /*! @abstract The elementwise absolute value of x. */ 141 static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x); 142 /*! @abstract The elementwise absolute value of x. */ 143 static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x); 144 /*! @abstract The elementwise absolute value of x. */ 145 static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x); 146 /*! @abstract The elementwise absolute value of x. */ 147 static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x); 148 /*! @abstract The elementwise absolute value of x. */ 149 static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x); 150 /*! @abstract The elementwise absolute value of x. */ 151 static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x); 152 /*! @abstract The elementwise absolute value of x. */ 153 static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x); 154 /*! @abstract The elementwise absolute value of x. */ 155 static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x); 156 /*! @abstract The elementwise absolute value of x. */ 157 static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x); 158 /*! @abstract The elementwise absolute value of x. */ 159 static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x); 160 /*! @abstract The elementwise absolute value of x. */ 161 static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x); 162 /*! @abstract The elementwise absolute value of x. */ 163 static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x); 164 /*! @abstract The elementwise absolute value of x. */ 165 static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x); 166 /*! @abstract The elementwise absolute value of x. */ 167 static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x); 168 /*! @abstract The elementwise absolute value of x. */ 169 static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x); 170 /*! @abstract The elementwise absolute value of x. */ 171 static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x); 172 /*! @abstract The elementwise absolute value of x. */ 173 static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x); 174 /*! @abstract The elementwise absolute value of x. */ 175 static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x); 176 /*! @abstract The elementwise absolute value of x. */ 177 static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x); 178 /*! @abstract The elementwise absolute value of x. */ 179 static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x); 180 /*! @abstract The elementwise absolute value of x. */ 181 static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x); 182 /*! @abstract The elementwise absolute value of x. */ 183 static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x); 184 /*! @abstract The elementwise absolute value of x. */ 185 static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x); 186 /*! @abstract The elementwise absolute value of x. */ 187 static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x); 188 /*! @abstract The elementwise absolute value of x. */ 189 static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x); 190 /*! @abstract The elementwise absolute value of x. */ 191 static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x); 192 /*! @abstract The elementwise absolute value of x. */ 193 static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x); 194 /*! @abstract The elementwise absolute value of x. */ 195 static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x); 196 /*! @abstract The elementwise absolute value of x. */ 197 static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x); 198 /*! @abstract The elementwise absolute value of x. */ 199 static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x); 200 /*! @abstract The elementwise absolute value of x. */ 201 static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x); 202 /*! @abstract The elementwise absolute value of x. */ 203 static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x); 204 /*! @abstract The elementwise absolute value of x. */ 205 static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x); 206 /*! @abstract The elementwise absolute value of x. */ 207 static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x); 208 /*! @abstract The elementwise absolute value of x. 209 * @discussion Deprecated. Use simd_abs(x) instead. */ 210 #define vector_abs simd_abs 211 212 /*! @abstract The elementwise maximum of x and y. */ 213 static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y); 214 /*! @abstract The elementwise maximum of x and y. */ 215 static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y); 216 /*! @abstract The elementwise maximum of x and y. */ 217 static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y); 218 /*! @abstract The elementwise maximum of x and y. */ 219 static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y); 220 /*! @abstract The elementwise maximum of x and y. */ 221 static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y); 222 /*! @abstract The elementwise maximum of x and y. */ 223 static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y); 224 /*! @abstract The elementwise maximum of x and y. */ 225 static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y); 226 /*! @abstract The elementwise maximum of x and y. */ 227 static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y); 228 /*! @abstract The elementwise maximum of x and y. */ 229 static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y); 230 /*! @abstract The elementwise maximum of x and y. */ 231 static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y); 232 /*! @abstract The elementwise maximum of x and y. */ 233 static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y); 234 /*! @abstract The elementwise maximum of x and y. */ 235 static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y); 236 /*! @abstract The elementwise maximum of x and y. */ 237 static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y); 238 /*! @abstract The elementwise maximum of x and y. */ 239 static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y); 240 /*! @abstract The elementwise maximum of x and y. */ 241 static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y); 242 /*! @abstract The elementwise maximum of x and y. */ 243 static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y); 244 /*! @abstract The elementwise maximum of x and y. */ 245 static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y); 246 /*! @abstract The elementwise maximum of x and y. */ 247 static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y); 248 /*! @abstract The elementwise maximum of x and y. */ 249 static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y); 250 /*! @abstract The elementwise maximum of x and y. */ 251 static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y); 252 /*! @abstract The elementwise maximum of x and y. */ 253 static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y); 254 /*! @abstract The elementwise maximum of x and y. */ 255 static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y); 256 /*! @abstract The elementwise maximum of x and y. */ 257 static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y); 258 /*! @abstract The elementwise maximum of x and y. */ 259 static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y); 260 /*! @abstract The elementwise maximum of x and y. */ 261 static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y); 262 /*! @abstract The elementwise maximum of x and y. */ 263 static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y); 264 /*! @abstract The elementwise maximum of x and y. */ 265 static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y); 266 /*! @abstract The elementwise maximum of x and y. */ 267 static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y); 268 /*! @abstract The elementwise maximum of x and y. */ 269 static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y); 270 /*! @abstract The elementwise maximum of x and y. */ 271 static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y); 272 /*! @abstract The elementwise maximum of x and y. */ 273 static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y); 274 /*! @abstract The elementwise maximum of x and y. */ 275 static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y); 276 /*! @abstract The elementwise maximum of x and y. */ 277 static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y); 278 /*! @abstract The elementwise maximum of x and y. */ 279 static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y); 280 /*! @abstract The elementwise maximum of x and y. */ 281 static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y); 282 /*! @abstract The elementwise maximum of x and y. */ 283 static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y); 284 /*! @abstract The elementwise maximum of x and y. */ 285 static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y); 286 /*! @abstract The elementwise maximum of x and y. */ 287 static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y); 288 /*! @abstract The elementwise maximum of x and y. */ 289 static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y); 290 /*! @abstract The elementwise maximum of x and y. */ 291 static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y); 292 /*! @abstract The elementwise maximum of x and y. */ 293 static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y); 294 /*! @abstract The elementwise maximum of x and y. */ 295 static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y); 296 /*! @abstract The elementwise maximum of x and y. */ 297 static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y); 298 /*! @abstract The elementwise maximum of x and y. */ 299 static inline SIMD_CFUNC float simd_max(float x, float y); 300 /*! @abstract The elementwise maximum of x and y. */ 301 static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y); 302 /*! @abstract The elementwise maximum of x and y. */ 303 static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y); 304 /*! @abstract The elementwise maximum of x and y. */ 305 static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y); 306 /*! @abstract The elementwise maximum of x and y. */ 307 static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y); 308 /*! @abstract The elementwise maximum of x and y. */ 309 static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y); 310 /*! @abstract The elementwise maximum of x and y. */ 311 static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y); 312 /*! @abstract The elementwise maximum of x and y. */ 313 static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y); 314 /*! @abstract The elementwise maximum of x and y. */ 315 static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y); 316 /*! @abstract The elementwise maximum of x and y. */ 317 static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y); 318 /*! @abstract The elementwise maximum of x and y. */ 319 static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y); 320 /*! @abstract The elementwise maximum of x and y. */ 321 static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y); 322 /*! @abstract The elementwise maximum of x and y. */ 323 static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y); 324 /*! @abstract The elementwise maximum of x and y. */ 325 static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y); 326 /*! @abstract The elementwise maximum of x and y. */ 327 static inline SIMD_CFUNC double simd_max(double x, double y); 328 /*! @abstract The elementwise maximum of x and y. */ 329 static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y); 330 /*! @abstract The elementwise maximum of x and y. */ 331 static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y); 332 /*! @abstract The elementwise maximum of x and y. */ 333 static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y); 334 /*! @abstract The elementwise maximum of x and y. */ 335 static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y); 336 /*! @abstract The elementwise maximum of x and y. 337 * @discussion Deprecated. Use simd_max(x,y) instead. */ 338 #define vector_max simd_max 339 340 /*! @abstract The elementwise minimum of x and y. */ 341 static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y); 342 /*! @abstract The elementwise minimum of x and y. */ 343 static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y); 344 /*! @abstract The elementwise minimum of x and y. */ 345 static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y); 346 /*! @abstract The elementwise minimum of x and y. */ 347 static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y); 348 /*! @abstract The elementwise minimum of x and y. */ 349 static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y); 350 /*! @abstract The elementwise minimum of x and y. */ 351 static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y); 352 /*! @abstract The elementwise minimum of x and y. */ 353 static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y); 354 /*! @abstract The elementwise minimum of x and y. */ 355 static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y); 356 /*! @abstract The elementwise minimum of x and y. */ 357 static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y); 358 /*! @abstract The elementwise minimum of x and y. */ 359 static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y); 360 /*! @abstract The elementwise minimum of x and y. */ 361 static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y); 362 /*! @abstract The elementwise minimum of x and y. */ 363 static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y); 364 /*! @abstract The elementwise minimum of x and y. */ 365 static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y); 366 /*! @abstract The elementwise minimum of x and y. */ 367 static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y); 368 /*! @abstract The elementwise minimum of x and y. */ 369 static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y); 370 /*! @abstract The elementwise minimum of x and y. */ 371 static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y); 372 /*! @abstract The elementwise minimum of x and y. */ 373 static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y); 374 /*! @abstract The elementwise minimum of x and y. */ 375 static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y); 376 /*! @abstract The elementwise minimum of x and y. */ 377 static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y); 378 /*! @abstract The elementwise minimum of x and y. */ 379 static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y); 380 /*! @abstract The elementwise minimum of x and y. */ 381 static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y); 382 /*! @abstract The elementwise minimum of x and y. */ 383 static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y); 384 /*! @abstract The elementwise minimum of x and y. */ 385 static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y); 386 /*! @abstract The elementwise minimum of x and y. */ 387 static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y); 388 /*! @abstract The elementwise minimum of x and y. */ 389 static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y); 390 /*! @abstract The elementwise minimum of x and y. */ 391 static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y); 392 /*! @abstract The elementwise minimum of x and y. */ 393 static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y); 394 /*! @abstract The elementwise minimum of x and y. */ 395 static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y); 396 /*! @abstract The elementwise minimum of x and y. */ 397 static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y); 398 /*! @abstract The elementwise minimum of x and y. */ 399 static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y); 400 /*! @abstract The elementwise minimum of x and y. */ 401 static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y); 402 /*! @abstract The elementwise minimum of x and y. */ 403 static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y); 404 /*! @abstract The elementwise minimum of x and y. */ 405 static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y); 406 /*! @abstract The elementwise minimum of x and y. */ 407 static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y); 408 /*! @abstract The elementwise minimum of x and y. */ 409 static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y); 410 /*! @abstract The elementwise minimum of x and y. */ 411 static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y); 412 /*! @abstract The elementwise minimum of x and y. */ 413 static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y); 414 /*! @abstract The elementwise minimum of x and y. */ 415 static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y); 416 /*! @abstract The elementwise minimum of x and y. */ 417 static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y); 418 /*! @abstract The elementwise minimum of x and y. */ 419 static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y); 420 /*! @abstract The elementwise minimum of x and y. */ 421 static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y); 422 /*! @abstract The elementwise minimum of x and y. */ 423 static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y); 424 /*! @abstract The elementwise minimum of x and y. */ 425 static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y); 426 /*! @abstract The elementwise minimum of x and y. */ 427 static inline SIMD_CFUNC float simd_min(float x, float y); 428 /*! @abstract The elementwise minimum of x and y. */ 429 static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y); 430 /*! @abstract The elementwise minimum of x and y. */ 431 static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y); 432 /*! @abstract The elementwise minimum of x and y. */ 433 static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y); 434 /*! @abstract The elementwise minimum of x and y. */ 435 static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y); 436 /*! @abstract The elementwise minimum of x and y. */ 437 static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y); 438 /*! @abstract The elementwise minimum of x and y. */ 439 static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y); 440 /*! @abstract The elementwise minimum of x and y. */ 441 static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y); 442 /*! @abstract The elementwise minimum of x and y. */ 443 static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y); 444 /*! @abstract The elementwise minimum of x and y. */ 445 static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y); 446 /*! @abstract The elementwise minimum of x and y. */ 447 static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y); 448 /*! @abstract The elementwise minimum of x and y. */ 449 static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y); 450 /*! @abstract The elementwise minimum of x and y. */ 451 static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y); 452 /*! @abstract The elementwise minimum of x and y. */ 453 static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y); 454 /*! @abstract The elementwise minimum of x and y. */ 455 static inline SIMD_CFUNC double simd_min(double x, double y); 456 /*! @abstract The elementwise minimum of x and y. */ 457 static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y); 458 /*! @abstract The elementwise minimum of x and y. */ 459 static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y); 460 /*! @abstract The elementwise minimum of x and y. */ 461 static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y); 462 /*! @abstract The elementwise minimum of x and y. */ 463 static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y); 464 /*! @abstract The elementwise minimum of x and y. 465 * @discussion Deprecated. Use simd_min(x,y) instead. */ 466 #define vector_min simd_min 467 468 469 /*! @abstract x clamped to the range [min, max]. 470 * @discussion Note that if you want to clamp all lanes to the same range, 471 * you can use a scalar value for min and max. */ 472 static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max); 473 /*! @abstract x clamped to the range [min, max]. 474 * @discussion Note that if you want to clamp all lanes to the same range, 475 * you can use a scalar value for min and max. */ 476 static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max); 477 /*! @abstract x clamped to the range [min, max]. 478 * @discussion Note that if you want to clamp all lanes to the same range, 479 * you can use a scalar value for min and max. */ 480 static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max); 481 /*! @abstract x clamped to the range [min, max]. 482 * @discussion Note that if you want to clamp all lanes to the same range, 483 * you can use a scalar value for min and max. */ 484 static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max); 485 /*! @abstract x clamped to the range [min, max]. 486 * @discussion Note that if you want to clamp all lanes to the same range, 487 * you can use a scalar value for min and max. */ 488 static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max); 489 /*! @abstract x clamped to the range [min, max]. 490 * @discussion Note that if you want to clamp all lanes to the same range, 491 * you can use a scalar value for min and max. */ 492 static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max); 493 /*! @abstract x clamped to the range [min, max]. 494 * @discussion Note that if you want to clamp all lanes to the same range, 495 * you can use a scalar value for min and max. */ 496 static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max); 497 /*! @abstract x clamped to the range [min, max]. 498 * @discussion Note that if you want to clamp all lanes to the same range, 499 * you can use a scalar value for min and max. */ 500 static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max); 501 /*! @abstract x clamped to the range [min, max]. 502 * @discussion Note that if you want to clamp all lanes to the same range, 503 * you can use a scalar value for min and max. */ 504 static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max); 505 /*! @abstract x clamped to the range [min, max]. 506 * @discussion Note that if you want to clamp all lanes to the same range, 507 * you can use a scalar value for min and max. */ 508 static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max); 509 /*! @abstract x clamped to the range [min, max]. 510 * @discussion Note that if you want to clamp all lanes to the same range, 511 * you can use a scalar value for min and max. */ 512 static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max); 513 /*! @abstract x clamped to the range [min, max]. 514 * @discussion Note that if you want to clamp all lanes to the same range, 515 * you can use a scalar value for min and max. */ 516 static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max); 517 /*! @abstract x clamped to the range [min, max]. 518 * @discussion Note that if you want to clamp all lanes to the same range, 519 * you can use a scalar value for min and max. */ 520 static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max); 521 /*! @abstract x clamped to the range [min, max]. 522 * @discussion Note that if you want to clamp all lanes to the same range, 523 * you can use a scalar value for min and max. */ 524 static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max); 525 /*! @abstract x clamped to the range [min, max]. 526 * @discussion Note that if you want to clamp all lanes to the same range, 527 * you can use a scalar value for min and max. */ 528 static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max); 529 /*! @abstract x clamped to the range [min, max]. 530 * @discussion Note that if you want to clamp all lanes to the same range, 531 * you can use a scalar value for min and max. */ 532 static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max); 533 /*! @abstract x clamped to the range [min, max]. 534 * @discussion Note that if you want to clamp all lanes to the same range, 535 * you can use a scalar value for min and max. */ 536 static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max); 537 /*! @abstract x clamped to the range [min, max]. 538 * @discussion Note that if you want to clamp all lanes to the same range, 539 * you can use a scalar value for min and max. */ 540 static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max); 541 /*! @abstract x clamped to the range [min, max]. 542 * @discussion Note that if you want to clamp all lanes to the same range, 543 * you can use a scalar value for min and max. */ 544 static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max); 545 /*! @abstract x clamped to the range [min, max]. 546 * @discussion Note that if you want to clamp all lanes to the same range, 547 * you can use a scalar value for min and max. */ 548 static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max); 549 /*! @abstract x clamped to the range [min, max]. 550 * @discussion Note that if you want to clamp all lanes to the same range, 551 * you can use a scalar value for min and max. */ 552 static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max); 553 /*! @abstract x clamped to the range [min, max]. 554 * @discussion Note that if you want to clamp all lanes to the same range, 555 * you can use a scalar value for min and max. */ 556 static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max); 557 /*! @abstract x clamped to the range [min, max]. 558 * @discussion Note that if you want to clamp all lanes to the same range, 559 * you can use a scalar value for min and max. */ 560 static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max); 561 /*! @abstract x clamped to the range [min, max]. 562 * @discussion Note that if you want to clamp all lanes to the same range, 563 * you can use a scalar value for min and max. */ 564 static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max); 565 /*! @abstract x clamped to the range [min, max]. 566 * @discussion Note that if you want to clamp all lanes to the same range, 567 * you can use a scalar value for min and max. */ 568 static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max); 569 /*! @abstract x clamped to the range [min, max]. 570 * @discussion Note that if you want to clamp all lanes to the same range, 571 * you can use a scalar value for min and max. */ 572 static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max); 573 /*! @abstract x clamped to the range [min, max]. 574 * @discussion Note that if you want to clamp all lanes to the same range, 575 * you can use a scalar value for min and max. */ 576 static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max); 577 /*! @abstract x clamped to the range [min, max]. 578 * @discussion Note that if you want to clamp all lanes to the same range, 579 * you can use a scalar value for min and max. */ 580 static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max); 581 /*! @abstract x clamped to the range [min, max]. 582 * @discussion Note that if you want to clamp all lanes to the same range, 583 * you can use a scalar value for min and max. */ 584 static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max); 585 /*! @abstract x clamped to the range [min, max]. 586 * @discussion Note that if you want to clamp all lanes to the same range, 587 * you can use a scalar value for min and max. */ 588 static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max); 589 /*! @abstract x clamped to the range [min, max]. 590 * @discussion Note that if you want to clamp all lanes to the same range, 591 * you can use a scalar value for min and max. */ 592 static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max); 593 /*! @abstract x clamped to the range [min, max]. 594 * @discussion Note that if you want to clamp all lanes to the same range, 595 * you can use a scalar value for min and max. */ 596 static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max); 597 /*! @abstract x clamped to the range [min, max]. 598 * @discussion Note that if you want to clamp all lanes to the same range, 599 * you can use a scalar value for min and max. */ 600 static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max); 601 /*! @abstract x clamped to the range [min, max]. 602 * @discussion Note that if you want to clamp all lanes to the same range, 603 * you can use a scalar value for min and max. */ 604 static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max); 605 /*! @abstract x clamped to the range [min, max]. 606 * @discussion Note that if you want to clamp all lanes to the same range, 607 * you can use a scalar value for min and max. */ 608 static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max); 609 /*! @abstract x clamped to the range [min, max]. 610 * @discussion Note that if you want to clamp all lanes to the same range, 611 * you can use a scalar value for min and max. */ 612 static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max); 613 /*! @abstract x clamped to the range [min, max]. 614 * @discussion Note that if you want to clamp all lanes to the same range, 615 * you can use a scalar value for min and max. */ 616 static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max); 617 /*! @abstract x clamped to the range [min, max]. 618 * @discussion Note that if you want to clamp all lanes to the same range, 619 * you can use a scalar value for min and max. */ 620 static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max); 621 /*! @abstract x clamped to the range [min, max]. 622 * @discussion Note that if you want to clamp all lanes to the same range, 623 * you can use a scalar value for min and max. */ 624 static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max); 625 /*! @abstract x clamped to the range [min, max]. 626 * @discussion Note that if you want to clamp all lanes to the same range, 627 * you can use a scalar value for min and max. */ 628 static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max); 629 /*! @abstract x clamped to the range [min, max]. 630 * @discussion Note that if you want to clamp all lanes to the same range, 631 * you can use a scalar value for min and max. */ 632 static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max); 633 /*! @abstract x clamped to the range [min, max]. 634 * @discussion Note that if you want to clamp all lanes to the same range, 635 * you can use a scalar value for min and max. */ 636 static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max); 637 /*! @abstract x clamped to the range [min, max]. 638 * @discussion Note that if you want to clamp all lanes to the same range, 639 * you can use a scalar value for min and max. */ 640 static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max); 641 /*! @abstract x clamped to the range [min, max]. 642 * @discussion Note that if you want to clamp all lanes to the same range, 643 * you can use a scalar value for min and max. */ 644 static inline SIMD_CFUNC float simd_clamp(float x, float min, float max); 645 /*! @abstract x clamped to the range [min, max]. 646 * @discussion Note that if you want to clamp all lanes to the same range, 647 * you can use a scalar value for min and max. */ 648 static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max); 649 /*! @abstract x clamped to the range [min, max]. 650 * @discussion Note that if you want to clamp all lanes to the same range, 651 * you can use a scalar value for min and max. */ 652 static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max); 653 /*! @abstract x clamped to the range [min, max]. 654 * @discussion Note that if you want to clamp all lanes to the same range, 655 * you can use a scalar value for min and max. */ 656 static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max); 657 /*! @abstract x clamped to the range [min, max]. 658 * @discussion Note that if you want to clamp all lanes to the same range, 659 * you can use a scalar value for min and max. */ 660 static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max); 661 /*! @abstract x clamped to the range [min, max]. 662 * @discussion Note that if you want to clamp all lanes to the same range, 663 * you can use a scalar value for min and max. */ 664 static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max); 665 /*! @abstract x clamped to the range [min, max]. 666 * @discussion Note that if you want to clamp all lanes to the same range, 667 * you can use a scalar value for min and max. */ 668 static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max); 669 /*! @abstract x clamped to the range [min, max]. 670 * @discussion Note that if you want to clamp all lanes to the same range, 671 * you can use a scalar value for min and max. */ 672 static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max); 673 /*! @abstract x clamped to the range [min, max]. 674 * @discussion Note that if you want to clamp all lanes to the same range, 675 * you can use a scalar value for min and max. */ 676 static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max); 677 /*! @abstract x clamped to the range [min, max]. 678 * @discussion Note that if you want to clamp all lanes to the same range, 679 * you can use a scalar value for min and max. */ 680 static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max); 681 /*! @abstract x clamped to the range [min, max]. 682 * @discussion Note that if you want to clamp all lanes to the same range, 683 * you can use a scalar value for min and max. */ 684 static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max); 685 /*! @abstract x clamped to the range [min, max]. 686 * @discussion Note that if you want to clamp all lanes to the same range, 687 * you can use a scalar value for min and max. */ 688 static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max); 689 /*! @abstract x clamped to the range [min, max]. 690 * @discussion Note that if you want to clamp all lanes to the same range, 691 * you can use a scalar value for min and max. */ 692 static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max); 693 /*! @abstract x clamped to the range [min, max]. 694 * @discussion Note that if you want to clamp all lanes to the same range, 695 * you can use a scalar value for min and max. */ 696 static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max); 697 /*! @abstract x clamped to the range [min, max]. 698 * @discussion Note that if you want to clamp all lanes to the same range, 699 * you can use a scalar value for min and max. */ 700 static inline SIMD_CFUNC double simd_clamp(double x, double min, double max); 701 /*! @abstract x clamped to the range [min, max]. 702 * @discussion Note that if you want to clamp all lanes to the same range, 703 * you can use a scalar value for min and max. */ 704 static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max); 705 /*! @abstract x clamped to the range [min, max]. 706 * @discussion Note that if you want to clamp all lanes to the same range, 707 * you can use a scalar value for min and max. */ 708 static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max); 709 /*! @abstract x clamped to the range [min, max]. 710 * @discussion Note that if you want to clamp all lanes to the same range, 711 * you can use a scalar value for min and max. */ 712 static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max); 713 /*! @abstract x clamped to the range [min, max]. 714 * @discussion Note that if you want to clamp all lanes to the same range, 715 * you can use a scalar value for min and max. */ 716 static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max); 717 /*! @abstract x clamped to the range [min, max]. 718 * @discussion Deprecated. Use simd_clamp(x,min,max) instead. */ 719 #define vector_clamp simd_clamp 720 721 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 722 static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x); 723 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 724 static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x); 725 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 726 static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x); 727 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 728 static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x); 729 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 730 static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x); 731 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 732 static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x); 733 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 734 static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x); 735 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 736 static inline SIMD_CFUNC float simd_sign(float x); 737 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 738 static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x); 739 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 740 static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x); 741 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 742 static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x); 743 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 744 static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x); 745 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 746 static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x); 747 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 748 static inline SIMD_CFUNC double simd_sign(double x); 749 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 750 static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x); 751 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 752 static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x); 753 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 754 static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x); 755 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ 756 static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x); 757 /*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. 758 * @discussion Deprecated. Use simd_sign(x) instead. */ 759 #define vector_sign simd_sign 760 761 /*! @abstract Linearly interpolates between x and y, taking the value x when 762 * t=0 and y when t=1 */ 763 static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t); 764 /*! @abstract Linearly interpolates between x and y, taking the value x when 765 * t=0 and y when t=1 */ 766 static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t); 767 /*! @abstract Linearly interpolates between x and y, taking the value x when 768 * t=0 and y when t=1 */ 769 static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t); 770 /*! @abstract Linearly interpolates between x and y, taking the value x when 771 * t=0 and y when t=1 */ 772 static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t); 773 /*! @abstract Linearly interpolates between x and y, taking the value x when 774 * t=0 and y when t=1 */ 775 static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t); 776 /*! @abstract Linearly interpolates between x and y, taking the value x when 777 * t=0 and y when t=1 */ 778 static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t); 779 /*! @abstract Linearly interpolates between x and y, taking the value x when 780 * t=0 and y when t=1 */ 781 static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t); 782 /*! @abstract Linearly interpolates between x and y, taking the value x when 783 * t=0 and y when t=1 */ 784 static inline SIMD_CFUNC float simd_mix(float x, float y, float t); 785 /*! @abstract Linearly interpolates between x and y, taking the value x when 786 * t=0 and y when t=1 */ 787 static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t); 788 /*! @abstract Linearly interpolates between x and y, taking the value x when 789 * t=0 and y when t=1 */ 790 static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t); 791 /*! @abstract Linearly interpolates between x and y, taking the value x when 792 * t=0 and y when t=1 */ 793 static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t); 794 /*! @abstract Linearly interpolates between x and y, taking the value x when 795 * t=0 and y when t=1 */ 796 static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t); 797 /*! @abstract Linearly interpolates between x and y, taking the value x when 798 * t=0 and y when t=1 */ 799 static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t); 800 /*! @abstract Linearly interpolates between x and y, taking the value x when 801 * t=0 and y when t=1 */ 802 static inline SIMD_CFUNC double simd_mix(double x, double y, double t); 803 /*! @abstract Linearly interpolates between x and y, taking the value x when 804 * t=0 and y when t=1 */ 805 static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t); 806 /*! @abstract Linearly interpolates between x and y, taking the value x when 807 * t=0 and y when t=1 */ 808 static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t); 809 /*! @abstract Linearly interpolates between x and y, taking the value x when 810 * t=0 and y when t=1 */ 811 static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t); 812 /*! @abstract Linearly interpolates between x and y, taking the value x when 813 * t=0 and y when t=1 */ 814 static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t); 815 /*! @abstract Linearly interpolates between x and y, taking the value x when 816 * t=0 and y when t=1 817 * @discussion Deprecated. Use simd_mix(x, y, t) instead. */ 818 #define vector_mix simd_mix 819 #define simd_lerp simd_mix 820 821 /*! @abstract A good approximation to 1/x. 822 * @discussion If x is very close to the limits of representation, the 823 * result may overflow or underflow; otherwise this function is accurate to 824 * a few units in the last place (ULPs). */ 825 static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x); 826 /*! @abstract A good approximation to 1/x. 827 * @discussion If x is very close to the limits of representation, the 828 * result may overflow or underflow; otherwise this function is accurate to 829 * a few units in the last place (ULPs). */ 830 static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x); 831 /*! @abstract A good approximation to 1/x. 832 * @discussion If x is very close to the limits of representation, the 833 * result may overflow or underflow; otherwise this function is accurate to 834 * a few units in the last place (ULPs). */ 835 static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x); 836 /*! @abstract A good approximation to 1/x. 837 * @discussion If x is very close to the limits of representation, the 838 * result may overflow or underflow; otherwise this function is accurate to 839 * a few units in the last place (ULPs). */ 840 static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x); 841 /*! @abstract A good approximation to 1/x. 842 * @discussion If x is very close to the limits of representation, the 843 * result may overflow or underflow; otherwise this function is accurate to 844 * a few units in the last place (ULPs). */ 845 static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x); 846 /*! @abstract A good approximation to 1/x. 847 * @discussion If x is very close to the limits of representation, the 848 * result may overflow or underflow; otherwise this function is accurate to 849 * a few units in the last place (ULPs). */ 850 static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x); 851 /*! @abstract A good approximation to 1/x. 852 * @discussion If x is very close to the limits of representation, the 853 * result may overflow or underflow; otherwise this function is accurate to 854 * a few units in the last place (ULPs). */ 855 static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x); 856 /*! @abstract A good approximation to 1/x. 857 * @discussion If x is very close to the limits of representation, the 858 * result may overflow or underflow; otherwise this function is accurate to 859 * a few units in the last place (ULPs). */ 860 static inline SIMD_CFUNC float simd_precise_recip(float x); 861 /*! @abstract A good approximation to 1/x. 862 * @discussion If x is very close to the limits of representation, the 863 * result may overflow or underflow; otherwise this function is accurate to 864 * a few units in the last place (ULPs). */ 865 static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x); 866 /*! @abstract A good approximation to 1/x. 867 * @discussion If x is very close to the limits of representation, the 868 * result may overflow or underflow; otherwise this function is accurate to 869 * a few units in the last place (ULPs). */ 870 static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x); 871 /*! @abstract A good approximation to 1/x. 872 * @discussion If x is very close to the limits of representation, the 873 * result may overflow or underflow; otherwise this function is accurate to 874 * a few units in the last place (ULPs). */ 875 static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x); 876 /*! @abstract A good approximation to 1/x. 877 * @discussion If x is very close to the limits of representation, the 878 * result may overflow or underflow; otherwise this function is accurate to 879 * a few units in the last place (ULPs). */ 880 static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x); 881 /*! @abstract A good approximation to 1/x. 882 * @discussion If x is very close to the limits of representation, the 883 * result may overflow or underflow; otherwise this function is accurate to 884 * a few units in the last place (ULPs). */ 885 static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x); 886 /*! @abstract A good approximation to 1/x. 887 * @discussion If x is very close to the limits of representation, the 888 * result may overflow or underflow; otherwise this function is accurate to 889 * a few units in the last place (ULPs). */ 890 static inline SIMD_CFUNC double simd_precise_recip(double x); 891 /*! @abstract A good approximation to 1/x. 892 * @discussion If x is very close to the limits of representation, the 893 * result may overflow or underflow; otherwise this function is accurate to 894 * a few units in the last place (ULPs). */ 895 static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x); 896 /*! @abstract A good approximation to 1/x. 897 * @discussion If x is very close to the limits of representation, the 898 * result may overflow or underflow; otherwise this function is accurate to 899 * a few units in the last place (ULPs). */ 900 static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x); 901 /*! @abstract A good approximation to 1/x. 902 * @discussion If x is very close to the limits of representation, the 903 * result may overflow or underflow; otherwise this function is accurate to 904 * a few units in the last place (ULPs). */ 905 static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x); 906 /*! @abstract A good approximation to 1/x. 907 * @discussion If x is very close to the limits of representation, the 908 * result may overflow or underflow; otherwise this function is accurate to 909 * a few units in the last place (ULPs). */ 910 static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x); 911 /*! @abstract A good approximation to 1/x. 912 * @discussion Deprecated. Use simd_precise_recip(x) instead. */ 913 #define vector_precise_recip simd_precise_recip 914 915 /*! @abstract A fast approximation to 1/x. 916 * @discussion If x is very close to the limits of representation, the 917 * result may overflow or underflow; otherwise this function is accurate to 918 * at least 11 bits for float and 22 bits for double. */ 919 static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x); 920 /*! @abstract A fast approximation to 1/x. 921 * @discussion If x is very close to the limits of representation, the 922 * result may overflow or underflow; otherwise this function is accurate to 923 * at least 11 bits for float and 22 bits for double. */ 924 static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x); 925 /*! @abstract A fast approximation to 1/x. 926 * @discussion If x is very close to the limits of representation, the 927 * result may overflow or underflow; otherwise this function is accurate to 928 * at least 11 bits for float and 22 bits for double. */ 929 static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x); 930 /*! @abstract A fast approximation to 1/x. 931 * @discussion If x is very close to the limits of representation, the 932 * result may overflow or underflow; otherwise this function is accurate to 933 * at least 11 bits for float and 22 bits for double. */ 934 static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x); 935 /*! @abstract A fast approximation to 1/x. 936 * @discussion If x is very close to the limits of representation, the 937 * result may overflow or underflow; otherwise this function is accurate to 938 * at least 11 bits for float and 22 bits for double. */ 939 static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x); 940 /*! @abstract A fast approximation to 1/x. 941 * @discussion If x is very close to the limits of representation, the 942 * result may overflow or underflow; otherwise this function is accurate to 943 * at least 11 bits for float and 22 bits for double. */ 944 static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x); 945 /*! @abstract A fast approximation to 1/x. 946 * @discussion If x is very close to the limits of representation, the 947 * result may overflow or underflow; otherwise this function is accurate to 948 * at least 11 bits for float and 22 bits for double. */ 949 static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x); 950 /*! @abstract A fast approximation to 1/x. 951 * @discussion If x is very close to the limits of representation, the 952 * result may overflow or underflow; otherwise this function is accurate to 953 * at least 11 bits for float and 22 bits for double. */ 954 static inline SIMD_CFUNC float simd_fast_recip(float x); 955 /*! @abstract A fast approximation to 1/x. 956 * @discussion If x is very close to the limits of representation, the 957 * result may overflow or underflow; otherwise this function is accurate to 958 * at least 11 bits for float and 22 bits for double. */ 959 static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x); 960 /*! @abstract A fast approximation to 1/x. 961 * @discussion If x is very close to the limits of representation, the 962 * result may overflow or underflow; otherwise this function is accurate to 963 * at least 11 bits for float and 22 bits for double. */ 964 static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x); 965 /*! @abstract A fast approximation to 1/x. 966 * @discussion If x is very close to the limits of representation, the 967 * result may overflow or underflow; otherwise this function is accurate to 968 * at least 11 bits for float and 22 bits for double. */ 969 static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x); 970 /*! @abstract A fast approximation to 1/x. 971 * @discussion If x is very close to the limits of representation, the 972 * result may overflow or underflow; otherwise this function is accurate to 973 * at least 11 bits for float and 22 bits for double. */ 974 static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x); 975 /*! @abstract A fast approximation to 1/x. 976 * @discussion If x is very close to the limits of representation, the 977 * result may overflow or underflow; otherwise this function is accurate to 978 * at least 11 bits for float and 22 bits for double. */ 979 static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x); 980 /*! @abstract A fast approximation to 1/x. 981 * @discussion If x is very close to the limits of representation, the 982 * result may overflow or underflow; otherwise this function is accurate to 983 * at least 11 bits for float and 22 bits for double. */ 984 static inline SIMD_CFUNC double simd_fast_recip(double x); 985 /*! @abstract A fast approximation to 1/x. 986 * @discussion If x is very close to the limits of representation, the 987 * result may overflow or underflow; otherwise this function is accurate to 988 * at least 11 bits for float and 22 bits for double. */ 989 static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x); 990 /*! @abstract A fast approximation to 1/x. 991 * @discussion If x is very close to the limits of representation, the 992 * result may overflow or underflow; otherwise this function is accurate to 993 * at least 11 bits for float and 22 bits for double. */ 994 static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x); 995 /*! @abstract A fast approximation to 1/x. 996 * @discussion If x is very close to the limits of representation, the 997 * result may overflow or underflow; otherwise this function is accurate to 998 * at least 11 bits for float and 22 bits for double. */ 999 static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x); 1000 /*! @abstract A fast approximation to 1/x. 1001 * @discussion If x is very close to the limits of representation, the 1002 * result may overflow or underflow; otherwise this function is accurate to 1003 * at least 11 bits for float and 22 bits for double. */ 1004 static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x); 1005 /*! @abstract A fast approximation to 1/x. 1006 * @discussion Deprecated. Use simd_fast_recip(x) instead. */ 1007 #define vector_fast_recip simd_fast_recip 1008 1009 /*! @abstract An approximation to 1/x. 1010 * @discussion If x is very close to the limits of representation, the 1011 * result may overflow or underflow. This function maps to 1012 * simd_fast_recip(x) if -ffast-math is specified, and to 1013 * simd_precise_recip(x) otherwise. */ 1014 static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x); 1015 /*! @abstract An approximation to 1/x. 1016 * @discussion If x is very close to the limits of representation, the 1017 * result may overflow or underflow. This function maps to 1018 * simd_fast_recip(x) if -ffast-math is specified, and to 1019 * simd_precise_recip(x) otherwise. */ 1020 static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x); 1021 /*! @abstract An approximation to 1/x. 1022 * @discussion If x is very close to the limits of representation, the 1023 * result may overflow or underflow. This function maps to 1024 * simd_fast_recip(x) if -ffast-math is specified, and to 1025 * simd_precise_recip(x) otherwise. */ 1026 static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x); 1027 /*! @abstract An approximation to 1/x. 1028 * @discussion If x is very close to the limits of representation, the 1029 * result may overflow or underflow. This function maps to 1030 * simd_fast_recip(x) if -ffast-math is specified, and to 1031 * simd_precise_recip(x) otherwise. */ 1032 static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x); 1033 /*! @abstract An approximation to 1/x. 1034 * @discussion If x is very close to the limits of representation, the 1035 * result may overflow or underflow. This function maps to 1036 * simd_fast_recip(x) if -ffast-math is specified, and to 1037 * simd_precise_recip(x) otherwise. */ 1038 static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x); 1039 /*! @abstract An approximation to 1/x. 1040 * @discussion If x is very close to the limits of representation, the 1041 * result may overflow or underflow. This function maps to 1042 * simd_fast_recip(x) if -ffast-math is specified, and to 1043 * simd_precise_recip(x) otherwise. */ 1044 static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x); 1045 /*! @abstract An approximation to 1/x. 1046 * @discussion If x is very close to the limits of representation, the 1047 * result may overflow or underflow. This function maps to 1048 * simd_fast_recip(x) if -ffast-math is specified, and to 1049 * simd_precise_recip(x) otherwise. */ 1050 static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x); 1051 /*! @abstract An approximation to 1/x. 1052 * @discussion If x is very close to the limits of representation, the 1053 * result may overflow or underflow. This function maps to 1054 * simd_fast_recip(x) if -ffast-math is specified, and to 1055 * simd_precise_recip(x) otherwise. */ 1056 static inline SIMD_CFUNC float simd_recip(float x); 1057 /*! @abstract An approximation to 1/x. 1058 * @discussion If x is very close to the limits of representation, the 1059 * result may overflow or underflow. This function maps to 1060 * simd_fast_recip(x) if -ffast-math is specified, and to 1061 * simd_precise_recip(x) otherwise. */ 1062 static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x); 1063 /*! @abstract An approximation to 1/x. 1064 * @discussion If x is very close to the limits of representation, the 1065 * result may overflow or underflow. This function maps to 1066 * simd_fast_recip(x) if -ffast-math is specified, and to 1067 * simd_precise_recip(x) otherwise. */ 1068 static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x); 1069 /*! @abstract An approximation to 1/x. 1070 * @discussion If x is very close to the limits of representation, the 1071 * result may overflow or underflow. This function maps to 1072 * simd_fast_recip(x) if -ffast-math is specified, and to 1073 * simd_precise_recip(x) otherwise. */ 1074 static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x); 1075 /*! @abstract An approximation to 1/x. 1076 * @discussion If x is very close to the limits of representation, the 1077 * result may overflow or underflow. This function maps to 1078 * simd_fast_recip(x) if -ffast-math is specified, and to 1079 * simd_precise_recip(x) otherwise. */ 1080 static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x); 1081 /*! @abstract An approximation to 1/x. 1082 * @discussion If x is very close to the limits of representation, the 1083 * result may overflow or underflow. This function maps to 1084 * simd_fast_recip(x) if -ffast-math is specified, and to 1085 * simd_precise_recip(x) otherwise. */ 1086 static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x); 1087 /*! @abstract An approximation to 1/x. 1088 * @discussion If x is very close to the limits of representation, the 1089 * result may overflow or underflow. This function maps to 1090 * simd_fast_recip(x) if -ffast-math is specified, and to 1091 * simd_precise_recip(x) otherwise. */ 1092 static inline SIMD_CFUNC double simd_recip(double x); 1093 /*! @abstract An approximation to 1/x. 1094 * @discussion If x is very close to the limits of representation, the 1095 * result may overflow or underflow. This function maps to 1096 * simd_fast_recip(x) if -ffast-math is specified, and to 1097 * simd_precise_recip(x) otherwise. */ 1098 static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x); 1099 /*! @abstract An approximation to 1/x. 1100 * @discussion If x is very close to the limits of representation, the 1101 * result may overflow or underflow. This function maps to 1102 * simd_fast_recip(x) if -ffast-math is specified, and to 1103 * simd_precise_recip(x) otherwise. */ 1104 static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x); 1105 /*! @abstract An approximation to 1/x. 1106 * @discussion If x is very close to the limits of representation, the 1107 * result may overflow or underflow. This function maps to 1108 * simd_fast_recip(x) if -ffast-math is specified, and to 1109 * simd_precise_recip(x) otherwise. */ 1110 static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x); 1111 /*! @abstract An approximation to 1/x. 1112 * @discussion If x is very close to the limits of representation, the 1113 * result may overflow or underflow. This function maps to 1114 * simd_fast_recip(x) if -ffast-math is specified, and to 1115 * simd_precise_recip(x) otherwise. */ 1116 static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x); 1117 /*! @abstract An approximation to 1/x. 1118 * @discussion Deprecated. Use simd_recip(x) instead. */ 1119 #define vector_recip simd_recip 1120 1121 /*! @abstract A good approximation to 1/sqrt(x). 1122 * @discussion This function is accurate to a few units in the last place 1123 * (ULPs). */ 1124 static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x); 1125 /*! @abstract A good approximation to 1/sqrt(x). 1126 * @discussion This function is accurate to a few units in the last place 1127 * (ULPs). */ 1128 static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x); 1129 /*! @abstract A good approximation to 1/sqrt(x). 1130 * @discussion This function is accurate to a few units in the last place 1131 * (ULPs). */ 1132 static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x); 1133 /*! @abstract A good approximation to 1/sqrt(x). 1134 * @discussion This function is accurate to a few units in the last place 1135 * (ULPs). */ 1136 static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x); 1137 /*! @abstract A good approximation to 1/sqrt(x). 1138 * @discussion This function is accurate to a few units in the last place 1139 * (ULPs). */ 1140 static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x); 1141 /*! @abstract A good approximation to 1/sqrt(x). 1142 * @discussion This function is accurate to a few units in the last place 1143 * (ULPs). */ 1144 static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x); 1145 /*! @abstract A good approximation to 1/sqrt(x). 1146 * @discussion This function is accurate to a few units in the last place 1147 * (ULPs). */ 1148 static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x); 1149 /*! @abstract A good approximation to 1/sqrt(x). 1150 * @discussion This function is accurate to a few units in the last place 1151 * (ULPs). */ 1152 static inline SIMD_CFUNC float simd_precise_rsqrt(float x); 1153 /*! @abstract A good approximation to 1/sqrt(x). 1154 * @discussion This function is accurate to a few units in the last place 1155 * (ULPs). */ 1156 static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x); 1157 /*! @abstract A good approximation to 1/sqrt(x). 1158 * @discussion This function is accurate to a few units in the last place 1159 * (ULPs). */ 1160 static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x); 1161 /*! @abstract A good approximation to 1/sqrt(x). 1162 * @discussion This function is accurate to a few units in the last place 1163 * (ULPs). */ 1164 static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x); 1165 /*! @abstract A good approximation to 1/sqrt(x). 1166 * @discussion This function is accurate to a few units in the last place 1167 * (ULPs). */ 1168 static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x); 1169 /*! @abstract A good approximation to 1/sqrt(x). 1170 * @discussion This function is accurate to a few units in the last place 1171 * (ULPs). */ 1172 static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x); 1173 /*! @abstract A good approximation to 1/sqrt(x). 1174 * @discussion This function is accurate to a few units in the last place 1175 * (ULPs). */ 1176 static inline SIMD_CFUNC double simd_precise_rsqrt(double x); 1177 /*! @abstract A good approximation to 1/sqrt(x). 1178 * @discussion This function is accurate to a few units in the last place 1179 * (ULPs). */ 1180 static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x); 1181 /*! @abstract A good approximation to 1/sqrt(x). 1182 * @discussion This function is accurate to a few units in the last place 1183 * (ULPs). */ 1184 static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x); 1185 /*! @abstract A good approximation to 1/sqrt(x). 1186 * @discussion This function is accurate to a few units in the last place 1187 * (ULPs). */ 1188 static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x); 1189 /*! @abstract A good approximation to 1/sqrt(x). 1190 * @discussion This function is accurate to a few units in the last place 1191 * (ULPs). */ 1192 static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x); 1193 /*! @abstract A good approximation to 1/sqrt(x). 1194 * @discussion Deprecated. Use simd_precise_rsqrt(x) instead. */ 1195 #define vector_precise_rsqrt simd_precise_rsqrt 1196 1197 /*! @abstract A fast approximation to 1/sqrt(x). 1198 * @discussion This function is accurate to at least 11 bits for float and 1199 * 22 bits for double. */ 1200 static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x); 1201 /*! @abstract A fast approximation to 1/sqrt(x). 1202 * @discussion This function is accurate to at least 11 bits for float and 1203 * 22 bits for double. */ 1204 static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x); 1205 /*! @abstract A fast approximation to 1/sqrt(x). 1206 * @discussion This function is accurate to at least 11 bits for float and 1207 * 22 bits for double. */ 1208 static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x); 1209 /*! @abstract A fast approximation to 1/sqrt(x). 1210 * @discussion This function is accurate to at least 11 bits for float and 1211 * 22 bits for double. */ 1212 static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x); 1213 /*! @abstract A fast approximation to 1/sqrt(x). 1214 * @discussion This function is accurate to at least 11 bits for float and 1215 * 22 bits for double. */ 1216 static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x); 1217 /*! @abstract A fast approximation to 1/sqrt(x). 1218 * @discussion This function is accurate to at least 11 bits for float and 1219 * 22 bits for double. */ 1220 static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x); 1221 /*! @abstract A fast approximation to 1/sqrt(x). 1222 * @discussion This function is accurate to at least 11 bits for float and 1223 * 22 bits for double. */ 1224 static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x); 1225 /*! @abstract A fast approximation to 1/sqrt(x). 1226 * @discussion This function is accurate to at least 11 bits for float and 1227 * 22 bits for double. */ 1228 static inline SIMD_CFUNC float simd_fast_rsqrt(float x); 1229 /*! @abstract A fast approximation to 1/sqrt(x). 1230 * @discussion This function is accurate to at least 11 bits for float and 1231 * 22 bits for double. */ 1232 static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x); 1233 /*! @abstract A fast approximation to 1/sqrt(x). 1234 * @discussion This function is accurate to at least 11 bits for float and 1235 * 22 bits for double. */ 1236 static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x); 1237 /*! @abstract A fast approximation to 1/sqrt(x). 1238 * @discussion This function is accurate to at least 11 bits for float and 1239 * 22 bits for double. */ 1240 static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x); 1241 /*! @abstract A fast approximation to 1/sqrt(x). 1242 * @discussion This function is accurate to at least 11 bits for float and 1243 * 22 bits for double. */ 1244 static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x); 1245 /*! @abstract A fast approximation to 1/sqrt(x). 1246 * @discussion This function is accurate to at least 11 bits for float and 1247 * 22 bits for double. */ 1248 static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x); 1249 /*! @abstract A fast approximation to 1/sqrt(x). 1250 * @discussion This function is accurate to at least 11 bits for float and 1251 * 22 bits for double. */ 1252 static inline SIMD_CFUNC double simd_fast_rsqrt(double x); 1253 /*! @abstract A fast approximation to 1/sqrt(x). 1254 * @discussion This function is accurate to at least 11 bits for float and 1255 * 22 bits for double. */ 1256 static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x); 1257 /*! @abstract A fast approximation to 1/sqrt(x). 1258 * @discussion This function is accurate to at least 11 bits for float and 1259 * 22 bits for double. */ 1260 static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x); 1261 /*! @abstract A fast approximation to 1/sqrt(x). 1262 * @discussion This function is accurate to at least 11 bits for float and 1263 * 22 bits for double. */ 1264 static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x); 1265 /*! @abstract A fast approximation to 1/sqrt(x). 1266 * @discussion This function is accurate to at least 11 bits for float and 1267 * 22 bits for double. */ 1268 static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x); 1269 /*! @abstract A fast approximation to 1/sqrt(x). 1270 * @discussion Deprecated. Use simd_fast_rsqrt(x) instead. */ 1271 #define vector_fast_rsqrt simd_fast_rsqrt 1272 1273 /*! @abstract An approximation to 1/sqrt(x). 1274 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1275 * specified, and to simd_precise_recip(x) otherwise. */ 1276 static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x); 1277 /*! @abstract An approximation to 1/sqrt(x). 1278 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1279 * specified, and to simd_precise_recip(x) otherwise. */ 1280 static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x); 1281 /*! @abstract An approximation to 1/sqrt(x). 1282 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1283 * specified, and to simd_precise_recip(x) otherwise. */ 1284 static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x); 1285 /*! @abstract An approximation to 1/sqrt(x). 1286 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1287 * specified, and to simd_precise_recip(x) otherwise. */ 1288 static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x); 1289 /*! @abstract An approximation to 1/sqrt(x). 1290 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1291 * specified, and to simd_precise_recip(x) otherwise. */ 1292 static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x); 1293 /*! @abstract An approximation to 1/sqrt(x). 1294 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1295 * specified, and to simd_precise_recip(x) otherwise. */ 1296 static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x); 1297 /*! @abstract An approximation to 1/sqrt(x). 1298 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1299 * specified, and to simd_precise_recip(x) otherwise. */ 1300 static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x); 1301 /*! @abstract An approximation to 1/sqrt(x). 1302 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1303 * specified, and to simd_precise_recip(x) otherwise. */ 1304 static inline SIMD_CFUNC float simd_rsqrt(float x); 1305 /*! @abstract An approximation to 1/sqrt(x). 1306 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1307 * specified, and to simd_precise_recip(x) otherwise. */ 1308 static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x); 1309 /*! @abstract An approximation to 1/sqrt(x). 1310 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1311 * specified, and to simd_precise_recip(x) otherwise. */ 1312 static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x); 1313 /*! @abstract An approximation to 1/sqrt(x). 1314 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1315 * specified, and to simd_precise_recip(x) otherwise. */ 1316 static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x); 1317 /*! @abstract An approximation to 1/sqrt(x). 1318 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1319 * specified, and to simd_precise_recip(x) otherwise. */ 1320 static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x); 1321 /*! @abstract An approximation to 1/sqrt(x). 1322 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1323 * specified, and to simd_precise_recip(x) otherwise. */ 1324 static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x); 1325 /*! @abstract An approximation to 1/sqrt(x). 1326 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1327 * specified, and to simd_precise_recip(x) otherwise. */ 1328 static inline SIMD_CFUNC double simd_rsqrt(double x); 1329 /*! @abstract An approximation to 1/sqrt(x). 1330 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1331 * specified, and to simd_precise_recip(x) otherwise. */ 1332 static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x); 1333 /*! @abstract An approximation to 1/sqrt(x). 1334 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1335 * specified, and to simd_precise_recip(x) otherwise. */ 1336 static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x); 1337 /*! @abstract An approximation to 1/sqrt(x). 1338 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1339 * specified, and to simd_precise_recip(x) otherwise. */ 1340 static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x); 1341 /*! @abstract An approximation to 1/sqrt(x). 1342 * @discussion This function maps to simd_fast_recip(x) if -ffast-math is 1343 * specified, and to simd_precise_recip(x) otherwise. */ 1344 static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x); 1345 /*! @abstract An approximation to 1/sqrt(x). 1346 * @discussion Deprecated. Use simd_rsqrt(x) instead. */ 1347 #define vector_rsqrt simd_rsqrt 1348 1349 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1350 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1351 * positive and finite, then the two values are exactly equal. */ 1352 static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x); 1353 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1354 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1355 * positive and finite, then the two values are exactly equal. */ 1356 static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x); 1357 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1358 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1359 * positive and finite, then the two values are exactly equal. */ 1360 static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x); 1361 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1362 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1363 * positive and finite, then the two values are exactly equal. */ 1364 static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x); 1365 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1366 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1367 * positive and finite, then the two values are exactly equal. */ 1368 static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x); 1369 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1370 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1371 * positive and finite, then the two values are exactly equal. */ 1372 static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x); 1373 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1374 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1375 * positive and finite, then the two values are exactly equal. */ 1376 static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x); 1377 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1378 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1379 * positive and finite, then the two values are exactly equal. */ 1380 static inline SIMD_CFUNC float simd_fract(float x); 1381 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1382 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1383 * positive and finite, then the two values are exactly equal. */ 1384 static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x); 1385 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1386 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1387 * positive and finite, then the two values are exactly equal. */ 1388 static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x); 1389 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1390 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1391 * positive and finite, then the two values are exactly equal. */ 1392 static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x); 1393 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1394 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1395 * positive and finite, then the two values are exactly equal. */ 1396 static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x); 1397 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1398 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1399 * positive and finite, then the two values are exactly equal. */ 1400 static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x); 1401 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1402 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1403 * positive and finite, then the two values are exactly equal. */ 1404 static inline SIMD_CFUNC double simd_fract(double x); 1405 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1406 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1407 * positive and finite, then the two values are exactly equal. */ 1408 static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x); 1409 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1410 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1411 * positive and finite, then the two values are exactly equal. */ 1412 static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x); 1413 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1414 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1415 * positive and finite, then the two values are exactly equal. */ 1416 static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x); 1417 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1418 * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is 1419 * positive and finite, then the two values are exactly equal. */ 1420 static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x); 1421 /*! @abstract The "fractional part" of x, lying in the range [0, 1). 1422 * @discussion Deprecated. Use simd_fract(x) instead. */ 1423 #define vector_fract simd_fract 1424 1425 /*! @abstract 0 if x < edge, and 1 otherwise. 1426 * @discussion Use a scalar value for edge if you want to apply the same 1427 * threshold to all lanes. */ 1428 static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x); 1429 /*! @abstract 0 if x < edge, and 1 otherwise. 1430 * @discussion Use a scalar value for edge if you want to apply the same 1431 * threshold to all lanes. */ 1432 static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x); 1433 /*! @abstract 0 if x < edge, and 1 otherwise. 1434 * @discussion Use a scalar value for edge if you want to apply the same 1435 * threshold to all lanes. */ 1436 static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x); 1437 /*! @abstract 0 if x < edge, and 1 otherwise. 1438 * @discussion Use a scalar value for edge if you want to apply the same 1439 * threshold to all lanes. */ 1440 static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x); 1441 /*! @abstract 0 if x < edge, and 1 otherwise. 1442 * @discussion Use a scalar value for edge if you want to apply the same 1443 * threshold to all lanes. */ 1444 static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x); 1445 /*! @abstract 0 if x < edge, and 1 otherwise. 1446 * @discussion Use a scalar value for edge if you want to apply the same 1447 * threshold to all lanes. */ 1448 static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x); 1449 /*! @abstract 0 if x < edge, and 1 otherwise. 1450 * @discussion Use a scalar value for edge if you want to apply the same 1451 * threshold to all lanes. */ 1452 static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x); 1453 /*! @abstract 0 if x < edge, and 1 otherwise. 1454 * @discussion Use a scalar value for edge if you want to apply the same 1455 * threshold to all lanes. */ 1456 static inline SIMD_CFUNC float simd_step(float edge, float x); 1457 /*! @abstract 0 if x < edge, and 1 otherwise. 1458 * @discussion Use a scalar value for edge if you want to apply the same 1459 * threshold to all lanes. */ 1460 static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x); 1461 /*! @abstract 0 if x < edge, and 1 otherwise. 1462 * @discussion Use a scalar value for edge if you want to apply the same 1463 * threshold to all lanes. */ 1464 static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x); 1465 /*! @abstract 0 if x < edge, and 1 otherwise. 1466 * @discussion Use a scalar value for edge if you want to apply the same 1467 * threshold to all lanes. */ 1468 static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x); 1469 /*! @abstract 0 if x < edge, and 1 otherwise. 1470 * @discussion Use a scalar value for edge if you want to apply the same 1471 * threshold to all lanes. */ 1472 static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x); 1473 /*! @abstract 0 if x < edge, and 1 otherwise. 1474 * @discussion Use a scalar value for edge if you want to apply the same 1475 * threshold to all lanes. */ 1476 static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x); 1477 /*! @abstract 0 if x < edge, and 1 otherwise. 1478 * @discussion Use a scalar value for edge if you want to apply the same 1479 * threshold to all lanes. */ 1480 static inline SIMD_CFUNC double simd_step(double edge, double x); 1481 /*! @abstract 0 if x < edge, and 1 otherwise. 1482 * @discussion Use a scalar value for edge if you want to apply the same 1483 * threshold to all lanes. */ 1484 static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x); 1485 /*! @abstract 0 if x < edge, and 1 otherwise. 1486 * @discussion Use a scalar value for edge if you want to apply the same 1487 * threshold to all lanes. */ 1488 static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x); 1489 /*! @abstract 0 if x < edge, and 1 otherwise. 1490 * @discussion Use a scalar value for edge if you want to apply the same 1491 * threshold to all lanes. */ 1492 static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x); 1493 /*! @abstract 0 if x < edge, and 1 otherwise. 1494 * @discussion Use a scalar value for edge if you want to apply the same 1495 * threshold to all lanes. */ 1496 static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x); 1497 /*! @abstract 0 if x < edge, and 1 otherwise. 1498 * @discussion Deprecated. Use simd_step(edge, x) instead. */ 1499 #define vector_step simd_step 1500 1501 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1502 * @discussion You can use a scalar value for edge0 and edge1 if you want 1503 * to clamp all lanes at the same points. */ 1504 static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x); 1505 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1506 * @discussion You can use a scalar value for edge0 and edge1 if you want 1507 * to clamp all lanes at the same points. */ 1508 static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x); 1509 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1510 * @discussion You can use a scalar value for edge0 and edge1 if you want 1511 * to clamp all lanes at the same points. */ 1512 static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x); 1513 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1514 * @discussion You can use a scalar value for edge0 and edge1 if you want 1515 * to clamp all lanes at the same points. */ 1516 static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x); 1517 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1518 * @discussion You can use a scalar value for edge0 and edge1 if you want 1519 * to clamp all lanes at the same points. */ 1520 static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x); 1521 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1522 * @discussion You can use a scalar value for edge0 and edge1 if you want 1523 * to clamp all lanes at the same points. */ 1524 static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x); 1525 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1526 * @discussion You can use a scalar value for edge0 and edge1 if you want 1527 * to clamp all lanes at the same points. */ 1528 static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x); 1529 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1530 * @discussion You can use a scalar value for edge0 and edge1 if you want 1531 * to clamp all lanes at the same points. */ 1532 static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x); 1533 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1534 * @discussion You can use a scalar value for edge0 and edge1 if you want 1535 * to clamp all lanes at the same points. */ 1536 static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x); 1537 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1538 * @discussion You can use a scalar value for edge0 and edge1 if you want 1539 * to clamp all lanes at the same points. */ 1540 static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x); 1541 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1542 * @discussion You can use a scalar value for edge0 and edge1 if you want 1543 * to clamp all lanes at the same points. */ 1544 static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x); 1545 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1546 * @discussion You can use a scalar value for edge0 and edge1 if you want 1547 * to clamp all lanes at the same points. */ 1548 static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x); 1549 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1550 * @discussion You can use a scalar value for edge0 and edge1 if you want 1551 * to clamp all lanes at the same points. */ 1552 static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x); 1553 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1554 * @discussion You can use a scalar value for edge0 and edge1 if you want 1555 * to clamp all lanes at the same points. */ 1556 static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x); 1557 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1558 * @discussion You can use a scalar value for edge0 and edge1 if you want 1559 * to clamp all lanes at the same points. */ 1560 static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x); 1561 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1562 * @discussion You can use a scalar value for edge0 and edge1 if you want 1563 * to clamp all lanes at the same points. */ 1564 static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x); 1565 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1566 * @discussion You can use a scalar value for edge0 and edge1 if you want 1567 * to clamp all lanes at the same points. */ 1568 static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x); 1569 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1570 * @discussion You can use a scalar value for edge0 and edge1 if you want 1571 * to clamp all lanes at the same points. */ 1572 static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x); 1573 /*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 1574 * @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead. */ 1575 #define vector_smoothstep simd_smoothstep 1576 1577 /*! @abstract Sum of elements in x. 1578 * @discussion This computation may overflow; especial for 8-bit types you 1579 * may need to convert to a wider type before reducing. */ 1580 static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x); 1581 /*! @abstract Sum of elements in x. 1582 * @discussion This computation may overflow; especial for 8-bit types you 1583 * may need to convert to a wider type before reducing. */ 1584 static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x); 1585 /*! @abstract Sum of elements in x. 1586 * @discussion This computation may overflow; especial for 8-bit types you 1587 * may need to convert to a wider type before reducing. */ 1588 static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x); 1589 /*! @abstract Sum of elements in x. 1590 * @discussion This computation may overflow; especial for 8-bit types you 1591 * may need to convert to a wider type before reducing. */ 1592 static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x); 1593 /*! @abstract Sum of elements in x. 1594 * @discussion This computation may overflow; especial for 8-bit types you 1595 * may need to convert to a wider type before reducing. */ 1596 static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x); 1597 /*! @abstract Sum of elements in x. 1598 * @discussion This computation may overflow; especial for 8-bit types you 1599 * may need to convert to a wider type before reducing. */ 1600 static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x); 1601 /*! @abstract Sum of elements in x. 1602 * @discussion This computation may overflow; especial for 8-bit types you 1603 * may need to convert to a wider type before reducing. */ 1604 static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x); 1605 /*! @abstract Sum of elements in x. 1606 * @discussion This computation may overflow; especial for 8-bit types you 1607 * may need to convert to a wider type before reducing. */ 1608 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x); 1609 /*! @abstract Sum of elements in x. 1610 * @discussion This computation may overflow; especial for 8-bit types you 1611 * may need to convert to a wider type before reducing. */ 1612 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x); 1613 /*! @abstract Sum of elements in x. 1614 * @discussion This computation may overflow; especial for 8-bit types you 1615 * may need to convert to a wider type before reducing. */ 1616 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x); 1617 /*! @abstract Sum of elements in x. 1618 * @discussion This computation may overflow; especial for 8-bit types you 1619 * may need to convert to a wider type before reducing. */ 1620 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x); 1621 /*! @abstract Sum of elements in x. 1622 * @discussion This computation may overflow; especial for 8-bit types you 1623 * may need to convert to a wider type before reducing. */ 1624 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x); 1625 /*! @abstract Sum of elements in x. 1626 * @discussion This computation may overflow; especial for 8-bit types you 1627 * may need to convert to a wider type before reducing. */ 1628 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x); 1629 /*! @abstract Sum of elements in x. 1630 * @discussion This computation may overflow; especial for 8-bit types you 1631 * may need to convert to a wider type before reducing. */ 1632 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x); 1633 /*! @abstract Sum of elements in x. 1634 * @discussion This computation may overflow; especial for 8-bit types you 1635 * may need to convert to a wider type before reducing. */ 1636 static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x); 1637 /*! @abstract Sum of elements in x. 1638 * @discussion This computation may overflow; especial for 8-bit types you 1639 * may need to convert to a wider type before reducing. */ 1640 static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x); 1641 /*! @abstract Sum of elements in x. 1642 * @discussion This computation may overflow; especial for 8-bit types you 1643 * may need to convert to a wider type before reducing. */ 1644 static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x); 1645 /*! @abstract Sum of elements in x. 1646 * @discussion This computation may overflow; especial for 8-bit types you 1647 * may need to convert to a wider type before reducing. */ 1648 static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x); 1649 /*! @abstract Sum of elements in x. 1650 * @discussion This computation may overflow; especial for 8-bit types you 1651 * may need to convert to a wider type before reducing. */ 1652 static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x); 1653 /*! @abstract Sum of elements in x. 1654 * @discussion This computation may overflow; especial for 8-bit types you 1655 * may need to convert to a wider type before reducing. */ 1656 static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x); 1657 /*! @abstract Sum of elements in x. 1658 * @discussion This computation may overflow; especial for 8-bit types you 1659 * may need to convert to a wider type before reducing. */ 1660 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x); 1661 /*! @abstract Sum of elements in x. 1662 * @discussion This computation may overflow; especial for 8-bit types you 1663 * may need to convert to a wider type before reducing. */ 1664 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x); 1665 /*! @abstract Sum of elements in x. 1666 * @discussion This computation may overflow; especial for 8-bit types you 1667 * may need to convert to a wider type before reducing. */ 1668 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x); 1669 /*! @abstract Sum of elements in x. 1670 * @discussion This computation may overflow; especial for 8-bit types you 1671 * may need to convert to a wider type before reducing. */ 1672 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x); 1673 /*! @abstract Sum of elements in x. 1674 * @discussion This computation may overflow; especial for 8-bit types you 1675 * may need to convert to a wider type before reducing. */ 1676 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x); 1677 /*! @abstract Sum of elements in x. 1678 * @discussion This computation may overflow; especial for 8-bit types you 1679 * may need to convert to a wider type before reducing. */ 1680 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x); 1681 /*! @abstract Sum of elements in x. 1682 * @discussion This computation may overflow; especial for 8-bit types you 1683 * may need to convert to a wider type before reducing. */ 1684 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x); 1685 /*! @abstract Sum of elements in x. 1686 * @discussion This computation may overflow; especial for 8-bit types you 1687 * may need to convert to a wider type before reducing. */ 1688 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x); 1689 /*! @abstract Sum of elements in x. 1690 * @discussion This computation may overflow; especial for 8-bit types you 1691 * may need to convert to a wider type before reducing. */ 1692 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x); 1693 /*! @abstract Sum of elements in x. 1694 * @discussion This computation may overflow; especial for 8-bit types you 1695 * may need to convert to a wider type before reducing. */ 1696 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x); 1697 /*! @abstract Sum of elements in x. 1698 * @discussion This computation may overflow; especial for 8-bit types you 1699 * may need to convert to a wider type before reducing. */ 1700 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x); 1701 /*! @abstract Sum of elements in x. 1702 * @discussion This computation may overflow; especial for 8-bit types you 1703 * may need to convert to a wider type before reducing. */ 1704 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x); 1705 /*! @abstract Sum of elements in x. 1706 * @discussion This computation may overflow; especial for 8-bit types you 1707 * may need to convert to a wider type before reducing. */ 1708 static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x); 1709 /*! @abstract Sum of elements in x. 1710 * @discussion This computation may overflow; especial for 8-bit types you 1711 * may need to convert to a wider type before reducing. */ 1712 static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x); 1713 /*! @abstract Sum of elements in x. 1714 * @discussion This computation may overflow; especial for 8-bit types you 1715 * may need to convert to a wider type before reducing. */ 1716 static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x); 1717 /*! @abstract Sum of elements in x. 1718 * @discussion This computation may overflow; especial for 8-bit types you 1719 * may need to convert to a wider type before reducing. */ 1720 static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x); 1721 /*! @abstract Sum of elements in x. 1722 * @discussion This computation may overflow; especial for 8-bit types you 1723 * may need to convert to a wider type before reducing. */ 1724 static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x); 1725 /*! @abstract Sum of elements in x. 1726 * @discussion This computation may overflow; especial for 8-bit types you 1727 * may need to convert to a wider type before reducing. */ 1728 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x); 1729 /*! @abstract Sum of elements in x. 1730 * @discussion This computation may overflow; especial for 8-bit types you 1731 * may need to convert to a wider type before reducing. */ 1732 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x); 1733 /*! @abstract Sum of elements in x. 1734 * @discussion This computation may overflow; especial for 8-bit types you 1735 * may need to convert to a wider type before reducing. */ 1736 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x); 1737 /*! @abstract Sum of elements in x. 1738 * @discussion This computation may overflow; especial for 8-bit types you 1739 * may need to convert to a wider type before reducing. */ 1740 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x); 1741 /*! @abstract Sum of elements in x. 1742 * @discussion This computation may overflow; especial for 8-bit types you 1743 * may need to convert to a wider type before reducing. */ 1744 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x); 1745 /*! @abstract Sum of elements in x. 1746 * @discussion This computation may overflow; especial for 8-bit types you 1747 * may need to convert to a wider type before reducing. */ 1748 static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x); 1749 /*! @abstract Sum of elements in x. 1750 * @discussion This computation may overflow; especial for 8-bit types you 1751 * may need to convert to a wider type before reducing. */ 1752 static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x); 1753 /*! @abstract Sum of elements in x. 1754 * @discussion This computation may overflow; especial for 8-bit types you 1755 * may need to convert to a wider type before reducing. */ 1756 static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x); 1757 /*! @abstract Sum of elements in x. 1758 * @discussion This computation may overflow; especial for 8-bit types you 1759 * may need to convert to a wider type before reducing. */ 1760 static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x); 1761 /*! @abstract Sum of elements in x. 1762 * @discussion This computation may overflow; especial for 8-bit types you 1763 * may need to convert to a wider type before reducing. */ 1764 static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x); 1765 /*! @abstract Sum of elements in x. 1766 * @discussion This computation may overflow; especial for 8-bit types you 1767 * may need to convert to a wider type before reducing. */ 1768 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x); 1769 /*! @abstract Sum of elements in x. 1770 * @discussion This computation may overflow; especial for 8-bit types you 1771 * may need to convert to a wider type before reducing. */ 1772 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x); 1773 /*! @abstract Sum of elements in x. 1774 * @discussion This computation may overflow; especial for 8-bit types you 1775 * may need to convert to a wider type before reducing. */ 1776 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x); 1777 /*! @abstract Sum of elements in x. 1778 * @discussion This computation may overflow; especial for 8-bit types you 1779 * may need to convert to a wider type before reducing. */ 1780 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x); 1781 /*! @abstract Sum of elements in x. 1782 * @discussion This computation may overflow; especial for 8-bit types you 1783 * may need to convert to a wider type before reducing. */ 1784 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x); 1785 /*! @abstract Sum of elements in x. 1786 * @discussion This computation may overflow; especial for 8-bit types you 1787 * may need to convert to a wider type before reducing. */ 1788 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x); 1789 /*! @abstract Sum of elements in x. 1790 * @discussion This computation may overflow; especial for 8-bit types you 1791 * may need to convert to a wider type before reducing. */ 1792 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x); 1793 /*! @abstract Sum of elements in x. 1794 * @discussion This computation may overflow; especial for 8-bit types you 1795 * may need to convert to a wider type before reducing. */ 1796 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x); 1797 /*! @abstract Sum of elements in x. 1798 * @discussion This computation may overflow; especial for 8-bit types you 1799 * may need to convert to a wider type before reducing. */ 1800 static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x); 1801 /*! @abstract Sum of elements in x. 1802 * @discussion This computation may overflow; especial for 8-bit types you 1803 * may need to convert to a wider type before reducing. */ 1804 static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x); 1805 /*! @abstract Sum of elements in x. 1806 * @discussion This computation may overflow; especial for 8-bit types you 1807 * may need to convert to a wider type before reducing. */ 1808 static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x); 1809 /*! @abstract Sum of elements in x. 1810 * @discussion This computation may overflow; especial for 8-bit types you 1811 * may need to convert to a wider type before reducing. */ 1812 static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x); 1813 /*! @abstract Sum of elements in x. 1814 * @discussion Deprecated. Use simd_add(x) instead. */ 1815 #define vector_reduce_add simd_reduce_add 1816 1817 /*! @abstract Minimum of elements in x. */ 1818 static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x); 1819 /*! @abstract Minimum of elements in x. */ 1820 static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x); 1821 /*! @abstract Minimum of elements in x. */ 1822 static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x); 1823 /*! @abstract Minimum of elements in x. */ 1824 static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x); 1825 /*! @abstract Minimum of elements in x. */ 1826 static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x); 1827 /*! @abstract Minimum of elements in x. */ 1828 static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x); 1829 /*! @abstract Minimum of elements in x. */ 1830 static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x); 1831 /*! @abstract Minimum of elements in x. */ 1832 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x); 1833 /*! @abstract Minimum of elements in x. */ 1834 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x); 1835 /*! @abstract Minimum of elements in x. */ 1836 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x); 1837 /*! @abstract Minimum of elements in x. */ 1838 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x); 1839 /*! @abstract Minimum of elements in x. */ 1840 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x); 1841 /*! @abstract Minimum of elements in x. */ 1842 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x); 1843 /*! @abstract Minimum of elements in x. */ 1844 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x); 1845 /*! @abstract Minimum of elements in x. */ 1846 static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x); 1847 /*! @abstract Minimum of elements in x. */ 1848 static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x); 1849 /*! @abstract Minimum of elements in x. */ 1850 static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x); 1851 /*! @abstract Minimum of elements in x. */ 1852 static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x); 1853 /*! @abstract Minimum of elements in x. */ 1854 static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x); 1855 /*! @abstract Minimum of elements in x. */ 1856 static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x); 1857 /*! @abstract Minimum of elements in x. */ 1858 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x); 1859 /*! @abstract Minimum of elements in x. */ 1860 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x); 1861 /*! @abstract Minimum of elements in x. */ 1862 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x); 1863 /*! @abstract Minimum of elements in x. */ 1864 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x); 1865 /*! @abstract Minimum of elements in x. */ 1866 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x); 1867 /*! @abstract Minimum of elements in x. */ 1868 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x); 1869 /*! @abstract Minimum of elements in x. */ 1870 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x); 1871 /*! @abstract Minimum of elements in x. */ 1872 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x); 1873 /*! @abstract Minimum of elements in x. */ 1874 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x); 1875 /*! @abstract Minimum of elements in x. */ 1876 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x); 1877 /*! @abstract Minimum of elements in x. */ 1878 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x); 1879 /*! @abstract Minimum of elements in x. */ 1880 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x); 1881 /*! @abstract Minimum of elements in x. */ 1882 static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x); 1883 /*! @abstract Minimum of elements in x. */ 1884 static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x); 1885 /*! @abstract Minimum of elements in x. */ 1886 static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x); 1887 /*! @abstract Minimum of elements in x. */ 1888 static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x); 1889 /*! @abstract Minimum of elements in x. */ 1890 static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x); 1891 /*! @abstract Minimum of elements in x. */ 1892 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x); 1893 /*! @abstract Minimum of elements in x. */ 1894 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x); 1895 /*! @abstract Minimum of elements in x. */ 1896 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x); 1897 /*! @abstract Minimum of elements in x. */ 1898 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x); 1899 /*! @abstract Minimum of elements in x. */ 1900 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x); 1901 /*! @abstract Minimum of elements in x. */ 1902 static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x); 1903 /*! @abstract Minimum of elements in x. */ 1904 static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x); 1905 /*! @abstract Minimum of elements in x. */ 1906 static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x); 1907 /*! @abstract Minimum of elements in x. */ 1908 static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x); 1909 /*! @abstract Minimum of elements in x. */ 1910 static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x); 1911 /*! @abstract Minimum of elements in x. */ 1912 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x); 1913 /*! @abstract Minimum of elements in x. */ 1914 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x); 1915 /*! @abstract Minimum of elements in x. */ 1916 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x); 1917 /*! @abstract Minimum of elements in x. */ 1918 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x); 1919 /*! @abstract Minimum of elements in x. */ 1920 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x); 1921 /*! @abstract Minimum of elements in x. */ 1922 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x); 1923 /*! @abstract Minimum of elements in x. */ 1924 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x); 1925 /*! @abstract Minimum of elements in x. */ 1926 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x); 1927 /*! @abstract Minimum of elements in x. */ 1928 static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x); 1929 /*! @abstract Minimum of elements in x. */ 1930 static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x); 1931 /*! @abstract Minimum of elements in x. */ 1932 static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x); 1933 /*! @abstract Minimum of elements in x. */ 1934 static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x); 1935 /*! @abstract Minimum of elements in x. 1936 * @discussion Deprecated. Use simd_min(x) instead. */ 1937 #define vector_reduce_min simd_reduce_min 1938 1939 /*! @abstract Maximum of elements in x. */ 1940 static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x); 1941 /*! @abstract Maximum of elements in x. */ 1942 static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x); 1943 /*! @abstract Maximum of elements in x. */ 1944 static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x); 1945 /*! @abstract Maximum of elements in x. */ 1946 static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x); 1947 /*! @abstract Maximum of elements in x. */ 1948 static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x); 1949 /*! @abstract Maximum of elements in x. */ 1950 static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x); 1951 /*! @abstract Maximum of elements in x. */ 1952 static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x); 1953 /*! @abstract Maximum of elements in x. */ 1954 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x); 1955 /*! @abstract Maximum of elements in x. */ 1956 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x); 1957 /*! @abstract Maximum of elements in x. */ 1958 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x); 1959 /*! @abstract Maximum of elements in x. */ 1960 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x); 1961 /*! @abstract Maximum of elements in x. */ 1962 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x); 1963 /*! @abstract Maximum of elements in x. */ 1964 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x); 1965 /*! @abstract Maximum of elements in x. */ 1966 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x); 1967 /*! @abstract Maximum of elements in x. */ 1968 static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x); 1969 /*! @abstract Maximum of elements in x. */ 1970 static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x); 1971 /*! @abstract Maximum of elements in x. */ 1972 static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x); 1973 /*! @abstract Maximum of elements in x. */ 1974 static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x); 1975 /*! @abstract Maximum of elements in x. */ 1976 static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x); 1977 /*! @abstract Maximum of elements in x. */ 1978 static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x); 1979 /*! @abstract Maximum of elements in x. */ 1980 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x); 1981 /*! @abstract Maximum of elements in x. */ 1982 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x); 1983 /*! @abstract Maximum of elements in x. */ 1984 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x); 1985 /*! @abstract Maximum of elements in x. */ 1986 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x); 1987 /*! @abstract Maximum of elements in x. */ 1988 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x); 1989 /*! @abstract Maximum of elements in x. */ 1990 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x); 1991 /*! @abstract Maximum of elements in x. */ 1992 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x); 1993 /*! @abstract Maximum of elements in x. */ 1994 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x); 1995 /*! @abstract Maximum of elements in x. */ 1996 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x); 1997 /*! @abstract Maximum of elements in x. */ 1998 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x); 1999 /*! @abstract Maximum of elements in x. */ 2000 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x); 2001 /*! @abstract Maximum of elements in x. */ 2002 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x); 2003 /*! @abstract Maximum of elements in x. */ 2004 static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x); 2005 /*! @abstract Maximum of elements in x. */ 2006 static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x); 2007 /*! @abstract Maximum of elements in x. */ 2008 static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x); 2009 /*! @abstract Maximum of elements in x. */ 2010 static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x); 2011 /*! @abstract Maximum of elements in x. */ 2012 static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x); 2013 /*! @abstract Maximum of elements in x. */ 2014 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x); 2015 /*! @abstract Maximum of elements in x. */ 2016 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x); 2017 /*! @abstract Maximum of elements in x. */ 2018 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x); 2019 /*! @abstract Maximum of elements in x. */ 2020 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x); 2021 /*! @abstract Maximum of elements in x. */ 2022 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x); 2023 /*! @abstract Maximum of elements in x. */ 2024 static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x); 2025 /*! @abstract Maximum of elements in x. */ 2026 static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x); 2027 /*! @abstract Maximum of elements in x. */ 2028 static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x); 2029 /*! @abstract Maximum of elements in x. */ 2030 static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x); 2031 /*! @abstract Maximum of elements in x. */ 2032 static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x); 2033 /*! @abstract Maximum of elements in x. */ 2034 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x); 2035 /*! @abstract Maximum of elements in x. */ 2036 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x); 2037 /*! @abstract Maximum of elements in x. */ 2038 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x); 2039 /*! @abstract Maximum of elements in x. */ 2040 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x); 2041 /*! @abstract Maximum of elements in x. */ 2042 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x); 2043 /*! @abstract Maximum of elements in x. */ 2044 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x); 2045 /*! @abstract Maximum of elements in x. */ 2046 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x); 2047 /*! @abstract Maximum of elements in x. */ 2048 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x); 2049 /*! @abstract Maximum of elements in x. */ 2050 static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x); 2051 /*! @abstract Maximum of elements in x. */ 2052 static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x); 2053 /*! @abstract Maximum of elements in x. */ 2054 static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x); 2055 /*! @abstract Maximum of elements in x. */ 2056 static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x); 2057 /*! @abstract Maximum of elements in x. 2058 * @discussion Deprecated. Use simd_max(x) instead. */ 2059 #define vector_reduce_max simd_reduce_max 2060 2061 /*! @abstract True if and only if each lane of x is equal to the 2062 * corresponding lane of y. */ 2063 static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) { 2064 return simd_all(x == y); 2065 } 2066 /*! @abstract True if and only if each lane of x is equal to the 2067 * corresponding lane of y. */ 2068 static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) { 2069 return simd_all(x == y); 2070 } 2071 /*! @abstract True if and only if each lane of x is equal to the 2072 * corresponding lane of y. */ 2073 static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) { 2074 return simd_all(x == y); 2075 } 2076 /*! @abstract True if and only if each lane of x is equal to the 2077 * corresponding lane of y. */ 2078 static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) { 2079 return simd_all(x == y); 2080 } 2081 /*! @abstract True if and only if each lane of x is equal to the 2082 * corresponding lane of y. */ 2083 static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) { 2084 return simd_all(x == y); 2085 } 2086 /*! @abstract True if and only if each lane of x is equal to the 2087 * corresponding lane of y. */ 2088 static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) { 2089 return simd_all(x == y); 2090 } 2091 /*! @abstract True if and only if each lane of x is equal to the 2092 * corresponding lane of y. */ 2093 static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) { 2094 return simd_all(x == y); 2095 } 2096 /*! @abstract True if and only if each lane of x is equal to the 2097 * corresponding lane of y. */ 2098 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) { 2099 return simd_all(x == y); 2100 } 2101 /*! @abstract True if and only if each lane of x is equal to the 2102 * corresponding lane of y. */ 2103 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) { 2104 return simd_all(x == y); 2105 } 2106 /*! @abstract True if and only if each lane of x is equal to the 2107 * corresponding lane of y. */ 2108 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) { 2109 return simd_all(x == y); 2110 } 2111 /*! @abstract True if and only if each lane of x is equal to the 2112 * corresponding lane of y. */ 2113 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) { 2114 return simd_all(x == y); 2115 } 2116 /*! @abstract True if and only if each lane of x is equal to the 2117 * corresponding lane of y. */ 2118 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) { 2119 return simd_all(x == y); 2120 } 2121 /*! @abstract True if and only if each lane of x is equal to the 2122 * corresponding lane of y. */ 2123 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) { 2124 return simd_all(x == y); 2125 } 2126 /*! @abstract True if and only if each lane of x is equal to the 2127 * corresponding lane of y. */ 2128 static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) { 2129 return simd_all(x == y); 2130 } 2131 /*! @abstract True if and only if each lane of x is equal to the 2132 * corresponding lane of y. */ 2133 static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) { 2134 return simd_all(x == y); 2135 } 2136 /*! @abstract True if and only if each lane of x is equal to the 2137 * corresponding lane of y. */ 2138 static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) { 2139 return simd_all(x == y); 2140 } 2141 /*! @abstract True if and only if each lane of x is equal to the 2142 * corresponding lane of y. */ 2143 static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) { 2144 return simd_all(x == y); 2145 } 2146 /*! @abstract True if and only if each lane of x is equal to the 2147 * corresponding lane of y. */ 2148 static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) { 2149 return simd_all(x == y); 2150 } 2151 /*! @abstract True if and only if each lane of x is equal to the 2152 * corresponding lane of y. */ 2153 static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) { 2154 return simd_all(x == y); 2155 } 2156 /*! @abstract True if and only if each lane of x is equal to the 2157 * corresponding lane of y. */ 2158 static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) { 2159 return simd_all(x == y); 2160 } 2161 /*! @abstract True if and only if each lane of x is equal to the 2162 * corresponding lane of y. */ 2163 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) { 2164 return simd_all(x == y); 2165 } 2166 /*! @abstract True if and only if each lane of x is equal to the 2167 * corresponding lane of y. */ 2168 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) { 2169 return simd_all(x == y); 2170 } 2171 /*! @abstract True if and only if each lane of x is equal to the 2172 * corresponding lane of y. */ 2173 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) { 2174 return simd_all(x == y); 2175 } 2176 /*! @abstract True if and only if each lane of x is equal to the 2177 * corresponding lane of y. */ 2178 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) { 2179 return simd_all(x == y); 2180 } 2181 /*! @abstract True if and only if each lane of x is equal to the 2182 * corresponding lane of y. */ 2183 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) { 2184 return simd_all(x == y); 2185 } 2186 /*! @abstract True if and only if each lane of x is equal to the 2187 * corresponding lane of y. */ 2188 static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) { 2189 return simd_all(x == y); 2190 } 2191 /*! @abstract True if and only if each lane of x is equal to the 2192 * corresponding lane of y. */ 2193 static inline SIMD_CFUNC simd_bool simd_equal(simd_half2 x, simd_half2 y) { 2194 return simd_all(x == y); 2195 } 2196 /*! @abstract True if and only if each lane of x is equal to the 2197 * corresponding lane of y. */ 2198 static inline SIMD_CFUNC simd_bool simd_equal(simd_half3 x, simd_half3 y) { 2199 return simd_all(x == y); 2200 } 2201 /*! @abstract True if and only if each lane of x is equal to the 2202 * corresponding lane of y. */ 2203 static inline SIMD_CFUNC simd_bool simd_equal(simd_half4 x, simd_half4 y) { 2204 return simd_all(x == y); 2205 } 2206 /*! @abstract True if and only if each lane of x is equal to the 2207 * corresponding lane of y. */ 2208 static inline SIMD_CFUNC simd_bool simd_equal(simd_half8 x, simd_half8 y) { 2209 return simd_all(x == y); 2210 } 2211 /*! @abstract True if and only if each lane of x is equal to the 2212 * corresponding lane of y. */ 2213 static inline SIMD_CFUNC simd_bool simd_equal(simd_half16 x, simd_half16 y) { 2214 return simd_all(x == y); 2215 } 2216 /*! @abstract True if and only if each lane of x is equal to the 2217 * corresponding lane of y. */ 2218 static inline SIMD_CFUNC simd_bool simd_equal(simd_half32 x, simd_half32 y) { 2219 return simd_all(x == y); 2220 } 2221 /*! @abstract True if and only if each lane of x is equal to the 2222 * corresponding lane of y. */ 2223 static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) { 2224 return simd_all(x == y); 2225 } 2226 /*! @abstract True if and only if each lane of x is equal to the 2227 * corresponding lane of y. */ 2228 static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) { 2229 return simd_all(x == y); 2230 } 2231 /*! @abstract True if and only if each lane of x is equal to the 2232 * corresponding lane of y. */ 2233 static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) { 2234 return simd_all(x == y); 2235 } 2236 /*! @abstract True if and only if each lane of x is equal to the 2237 * corresponding lane of y. */ 2238 static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) { 2239 return simd_all(x == y); 2240 } 2241 /*! @abstract True if and only if each lane of x is equal to the 2242 * corresponding lane of y. */ 2243 static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) { 2244 return simd_all(x == y); 2245 } 2246 /*! @abstract True if and only if each lane of x is equal to the 2247 * corresponding lane of y. */ 2248 static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) { 2249 return simd_all(x == y); 2250 } 2251 /*! @abstract True if and only if each lane of x is equal to the 2252 * corresponding lane of y. */ 2253 static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) { 2254 return simd_all(x == y); 2255 } 2256 /*! @abstract True if and only if each lane of x is equal to the 2257 * corresponding lane of y. */ 2258 static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) { 2259 return simd_all(x == y); 2260 } 2261 /*! @abstract True if and only if each lane of x is equal to the 2262 * corresponding lane of y. */ 2263 static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) { 2264 return simd_all(x == y); 2265 } 2266 /*! @abstract True if and only if each lane of x is equal to the 2267 * corresponding lane of y. */ 2268 static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) { 2269 return simd_all(x == y); 2270 } 2271 /*! @abstract True if and only if each lane of x is equal to the 2272 * corresponding lane of y. */ 2273 static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) { 2274 return simd_all(x == y); 2275 } 2276 /*! @abstract True if and only if each lane of x is equal to the 2277 * corresponding lane of y. */ 2278 static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) { 2279 return simd_all(x == y); 2280 } 2281 /*! @abstract True if and only if each lane of x is equal to the 2282 * corresponding lane of y. */ 2283 static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) { 2284 return simd_all(x == y); 2285 } 2286 /*! @abstract True if and only if each lane of x is equal to the 2287 * corresponding lane of y. */ 2288 static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) { 2289 return simd_all(x == y); 2290 } 2291 /*! @abstract True if and only if each lane of x is equal to the 2292 * corresponding lane of y. */ 2293 static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) { 2294 return simd_all(x == y); 2295 } 2296 /*! @abstract True if and only if each lane of x is equal to the 2297 * corresponding lane of y. */ 2298 static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) { 2299 return simd_all(x == y); 2300 } 2301 /*! @abstract True if and only if each lane of x is equal to the 2302 * corresponding lane of y. */ 2303 static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) { 2304 return simd_all(x == y); 2305 } 2306 /*! @abstract True if and only if each lane of x is equal to the 2307 * corresponding lane of y. */ 2308 static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) { 2309 return simd_all(x == y); 2310 } 2311 /*! @abstract True if and only if each lane of x is equal to the 2312 * corresponding lane of y. */ 2313 static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) { 2314 return simd_all(x == y); 2315 } 2316 /*! @abstract True if and only if each lane of x is equal to the 2317 * corresponding lane of y. */ 2318 static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) { 2319 return simd_all(x == y); 2320 } 2321 /*! @abstract True if and only if each lane of x is equal to the 2322 * corresponding lane of y. */ 2323 static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) { 2324 return simd_all(x == y); 2325 } 2326 /*! @abstract True if and only if each lane of x is equal to the 2327 * corresponding lane of y. */ 2328 static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) { 2329 return simd_all(x == y); 2330 } 2331 /*! @abstract True if and only if each lane of x is equal to the 2332 * corresponding lane of y. */ 2333 static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) { 2334 return simd_all(x == y); 2335 } 2336 /*! @abstract True if and only if each lane of x is equal to the 2337 * corresponding lane of y. */ 2338 static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) { 2339 return simd_all(x == y); 2340 } 2341 /*! @abstract True if and only if each lane of x is equal to the 2342 * corresponding lane of y. */ 2343 static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) { 2344 return simd_all(x == y); 2345 } 2346 /*! @abstract True if and only if each lane of x is equal to the 2347 * corresponding lane of y. */ 2348 static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) { 2349 return simd_all(x == y); 2350 } 2351 /*! @abstract True if and only if each lane of x is equal to the 2352 * corresponding lane of y. */ 2353 static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) { 2354 return simd_all(x == y); 2355 } 2356 2357 #ifdef __cplusplus 2358 } /* extern "C" */ 2359 2360 namespace simd { 2361 /*! @abstract The lanewise absolute value of x. */ 2362 template <typename typeN> static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); } 2363 /*! @abstract The lanewise maximum of x and y. */ 2364 template <typename typeN> static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); } 2365 /*! @abstract The lanewise minimum of x and y. */ 2366 template <typename typeN> static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); } 2367 /*! @abstract x clamped to the interval [min, max]. */ 2368 template <typename typeN> static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); } 2369 /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise. */ 2370 template <typename fptypeN> static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); } 2371 /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */ 2372 template <typename fptypeN> static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); } 2373 template <typename fptypeN> static SIMD_CPPFUNC fptypeN lerp(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); } 2374 /*! @abstract An approximation to 1/x. */ 2375 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); } 2376 /*! @abstract An approximation to 1/sqrt(x). */ 2377 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); } 2378 /*! @abstract The "fracional part" of x, in the range [0,1). */ 2379 template <typename fptypeN> static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); } 2380 /*! @abstract 0 if x < edge, 1 otherwise. */ 2381 template <typename fptypeN> static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); } 2382 /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1. */ 2383 template <typename fptypeN> static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); } 2384 /*! @abstract True if and only if each lane of x is equal to the 2385 * corresponding lane of y. 2386 * 2387 * @discussion This isn't operator== because that's already defined by 2388 * the compiler to return a lane mask. */ 2389 template <typename fptypeN> static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); } 2390 #if __cpp_decltype_auto 2391 /* If you are targeting an earlier version of the C++ standard that lacks 2392 decltype_auto support, you may use the C-style simd_reduce_* functions 2393 instead. */ 2394 /*! @abstract The sum of the elements in x. May overflow. */ 2395 template <typename typeN> static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); } 2396 /*! @abstract The least element in x. */ 2397 template <typename typeN> static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); } 2398 /*! @abstract The greatest element in x. */ 2399 template <typename typeN> static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); } 2400 #endif 2401 namespace precise { 2402 /*! @abstract An approximation to 1/x. */ 2403 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); } 2404 /*! @abstract An approximation to 1/sqrt(x). */ 2405 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); } 2406 } 2407 namespace fast { 2408 /*! @abstract An approximation to 1/x. */ 2409 template <typename fptypeN> static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); } 2410 /*! @abstract An approximation to 1/sqrt(x). */ 2411 template <typename fptypeN> static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); } 2412 } 2413 } 2414 2415 extern "C" { 2416 #endif /* __cplusplus */ 2417 2418 #pragma mark - Implementation 2419 2420 static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) { 2421 return simd_make_char2(simd_abs(simd_make_char8_undef(x))); 2422 } 2423 2424 static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) { 2425 return simd_make_char3(simd_abs(simd_make_char8_undef(x))); 2426 } 2427 2428 static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) { 2429 return simd_make_char4(simd_abs(simd_make_char8_undef(x))); 2430 } 2431 2432 static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) { 2433 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2434 return vabs_s8(x); 2435 #else 2436 return simd_make_char8(simd_abs(simd_make_char16_undef(x))); 2437 #endif 2438 } 2439 2440 static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) { 2441 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2442 return vabsq_s8(x); 2443 #elif defined __SSE4_1__ 2444 return (simd_char16) _mm_abs_epi8((__m128i)x); 2445 #else 2446 simd_char16 mask = x >> 7; return (x ^ mask) - mask; 2447 #endif 2448 } 2449 2450 static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) { 2451 #if defined __AVX2__ 2452 return _mm256_abs_epi8(x); 2453 #else 2454 return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi)); 2455 #endif 2456 } 2457 2458 static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) { 2459 #if defined __AVX512BW__ 2460 return _mm512_abs_epi8(x); 2461 #else 2462 return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi)); 2463 #endif 2464 } 2465 2466 static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) { 2467 return simd_make_short2(simd_abs(simd_make_short4_undef(x))); 2468 } 2469 2470 static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) { 2471 return simd_make_short3(simd_abs(simd_make_short4_undef(x))); 2472 } 2473 2474 static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) { 2475 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2476 return vabs_s16(x); 2477 #else 2478 return simd_make_short4(simd_abs(simd_make_short8_undef(x))); 2479 #endif 2480 } 2481 2482 static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) { 2483 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2484 return vabsq_s16(x); 2485 #elif defined __SSE4_1__ 2486 return (simd_short8) _mm_abs_epi16((__m128i)x); 2487 #else 2488 simd_short8 mask = x >> 15; return (x ^ mask) - mask; 2489 #endif 2490 } 2491 2492 static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) { 2493 #if defined __AVX2__ 2494 return _mm256_abs_epi16(x); 2495 #else 2496 return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi)); 2497 #endif 2498 } 2499 2500 static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) { 2501 #if defined __AVX512BW__ 2502 return _mm512_abs_epi16(x); 2503 #else 2504 return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi)); 2505 #endif 2506 } 2507 2508 static inline SIMD_CFUNC simd_half2 simd_abs(simd_half2 x) { 2509 return __tg_fabs(x); 2510 } 2511 2512 static inline SIMD_CFUNC simd_half3 simd_abs(simd_half3 x) { 2513 return __tg_fabs(x); 2514 } 2515 2516 static inline SIMD_CFUNC simd_half4 simd_abs(simd_half4 x) { 2517 return __tg_fabs(x); 2518 } 2519 2520 static inline SIMD_CFUNC simd_half8 simd_abs(simd_half8 x) { 2521 return __tg_fabs(x); 2522 } 2523 2524 static inline SIMD_CFUNC simd_half16 simd_abs(simd_half16 x) { 2525 return __tg_fabs(x); 2526 } 2527 2528 static inline SIMD_CFUNC simd_half32 simd_abs(simd_half32 x) { 2529 return __tg_fabs(x); 2530 } 2531 2532 static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) { 2533 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2534 return vabs_s32(x); 2535 #else 2536 return simd_make_int2(simd_abs(simd_make_int4_undef(x))); 2537 #endif 2538 } 2539 2540 static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) { 2541 return simd_make_int3(simd_abs(simd_make_int4_undef(x))); 2542 } 2543 2544 static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) { 2545 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2546 return vabsq_s32(x); 2547 #elif defined __SSE4_1__ 2548 return (simd_int4) _mm_abs_epi32((__m128i)x); 2549 #else 2550 simd_int4 mask = x >> 31; return (x ^ mask) - mask; 2551 #endif 2552 } 2553 2554 static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) { 2555 #if defined __AVX2__ 2556 return _mm256_abs_epi32(x); 2557 #else 2558 return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi)); 2559 #endif 2560 } 2561 2562 static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) { 2563 #if defined __AVX512F__ 2564 return _mm512_abs_epi32(x); 2565 #else 2566 return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi)); 2567 #endif 2568 } 2569 2570 static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) { 2571 return __tg_fabs(x); 2572 } 2573 2574 static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) { 2575 return __tg_fabs(x); 2576 } 2577 2578 static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) { 2579 return __tg_fabs(x); 2580 } 2581 2582 static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) { 2583 return __tg_fabs(x); 2584 } 2585 2586 static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) { 2587 return __tg_fabs(x); 2588 } 2589 2590 static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) { 2591 #if defined __arm64__ || defined __aarch64__ 2592 return vabsq_s64(x); 2593 #elif defined __AVX512VL__ 2594 return (simd_long2) _mm_abs_epi64((__m128i)x); 2595 #else 2596 simd_long2 mask = x >> 63; return (x ^ mask) - mask; 2597 #endif 2598 } 2599 2600 static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) { 2601 return simd_make_long3(simd_abs(simd_make_long4_undef(x))); 2602 } 2603 2604 static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) { 2605 #if defined __AVX512VL__ 2606 return _mm256_abs_epi64(x); 2607 #else 2608 return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi)); 2609 #endif 2610 } 2611 2612 static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) { 2613 #if defined __AVX512F__ 2614 return _mm512_abs_epi64(x); 2615 #else 2616 return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi)); 2617 #endif 2618 } 2619 2620 static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) { 2621 return __tg_fabs(x); 2622 } 2623 2624 static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) { 2625 return __tg_fabs(x); 2626 } 2627 2628 static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) { 2629 return __tg_fabs(x); 2630 } 2631 2632 static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) { 2633 return __tg_fabs(x); 2634 } 2635 2636 static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) { 2637 return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); 2638 } 2639 2640 static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) { 2641 return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); 2642 } 2643 2644 static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) { 2645 return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); 2646 } 2647 2648 static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) { 2649 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2650 return vmin_s8(x, y); 2651 #else 2652 return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y))); 2653 #endif 2654 2655 } 2656 2657 static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) { 2658 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2659 return vminq_s8(x, y); 2660 #elif defined __SSE4_1__ 2661 return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y); 2662 #else 2663 return simd_bitselect(x, y, y < x); 2664 #endif 2665 } 2666 2667 static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) { 2668 #if defined __AVX2__ 2669 return _mm256_min_epi8(x, y); 2670 #else 2671 return simd_bitselect(x, y, y < x); 2672 #endif 2673 } 2674 2675 static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) { 2676 #if defined __AVX512BW__ 2677 return _mm512_min_epi8(x, y); 2678 #else 2679 return simd_bitselect(x, y, y < x); 2680 #endif 2681 } 2682 2683 static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) { 2684 return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 2685 } 2686 2687 static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) { 2688 return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 2689 } 2690 2691 static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) { 2692 return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 2693 } 2694 2695 static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) { 2696 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2697 return vmin_u8(x, y); 2698 #else 2699 return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); 2700 #endif 2701 2702 } 2703 2704 static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) { 2705 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2706 return vminq_u8(x, y); 2707 #elif defined __SSE4_1__ 2708 return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y); 2709 #else 2710 return simd_bitselect(x, y, y < x); 2711 #endif 2712 } 2713 2714 static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) { 2715 #if defined __AVX2__ 2716 return _mm256_min_epu8(x, y); 2717 #else 2718 return simd_bitselect(x, y, y < x); 2719 #endif 2720 } 2721 2722 static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) { 2723 #if defined __AVX512BW__ 2724 return _mm512_min_epu8(x, y); 2725 #else 2726 return simd_bitselect(x, y, y < x); 2727 #endif 2728 } 2729 2730 static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) { 2731 return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); 2732 } 2733 2734 static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) { 2735 return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); 2736 } 2737 2738 static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) { 2739 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2740 return vmin_s16(x, y); 2741 #else 2742 return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y))); 2743 #endif 2744 2745 } 2746 2747 static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) { 2748 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2749 return vminq_s16(x, y); 2750 #elif defined __SSE4_1__ 2751 return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y); 2752 #else 2753 return simd_bitselect(x, y, y < x); 2754 #endif 2755 } 2756 2757 static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) { 2758 #if defined __AVX2__ 2759 return _mm256_min_epi16(x, y); 2760 #else 2761 return simd_bitselect(x, y, y < x); 2762 #endif 2763 } 2764 2765 static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) { 2766 #if defined __AVX512BW__ 2767 return _mm512_min_epi16(x, y); 2768 #else 2769 return simd_bitselect(x, y, y < x); 2770 #endif 2771 } 2772 2773 static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) { 2774 return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); 2775 } 2776 2777 static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) { 2778 return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); 2779 } 2780 2781 static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) { 2782 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2783 return vmin_u16(x, y); 2784 #else 2785 return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); 2786 #endif 2787 2788 } 2789 2790 static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) { 2791 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2792 return vminq_u16(x, y); 2793 #elif defined __SSE4_1__ 2794 return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y); 2795 #else 2796 return simd_bitselect(x, y, y < x); 2797 #endif 2798 } 2799 2800 static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) { 2801 #if defined __AVX2__ 2802 return _mm256_min_epu16(x, y); 2803 #else 2804 return simd_bitselect(x, y, y < x); 2805 #endif 2806 } 2807 2808 static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) { 2809 #if defined __AVX512BW__ 2810 return _mm512_min_epu16(x, y); 2811 #else 2812 return simd_bitselect(x, y, y < x); 2813 #endif 2814 } 2815 2816 static inline SIMD_CFUNC _Float16 simd_min(_Float16 x, _Float16 y) { 2817 return __fminf16(x,y); 2818 } 2819 2820 static inline SIMD_CFUNC simd_half2 simd_min(simd_half2 x, simd_half2 y) { 2821 return __tg_fmin(x,y); 2822 } 2823 2824 static inline SIMD_CFUNC simd_half3 simd_min(simd_half3 x, simd_half3 y) { 2825 return __tg_fmin(x,y); 2826 } 2827 2828 static inline SIMD_CFUNC simd_half4 simd_min(simd_half4 x, simd_half4 y) { 2829 return __tg_fmin(x,y); 2830 } 2831 2832 static inline SIMD_CFUNC simd_half8 simd_min(simd_half8 x, simd_half8 y) { 2833 return __tg_fmin(x,y); 2834 } 2835 2836 static inline SIMD_CFUNC simd_half16 simd_min(simd_half16 x, simd_half16 y) { 2837 return __tg_fmin(x,y); 2838 } 2839 2840 static inline SIMD_CFUNC simd_half32 simd_min(simd_half32 x, simd_half32 y) { 2841 return __tg_fmin(x,y); 2842 } 2843 2844 static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) { 2845 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2846 return vmin_s32(x, y); 2847 #else 2848 return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); 2849 #endif 2850 2851 } 2852 2853 static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) { 2854 return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); 2855 } 2856 2857 static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) { 2858 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2859 return vminq_s32(x, y); 2860 #elif defined __SSE4_1__ 2861 return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y); 2862 #else 2863 return simd_bitselect(x, y, y < x); 2864 #endif 2865 } 2866 2867 static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) { 2868 #if defined __AVX2__ 2869 return _mm256_min_epi32(x, y); 2870 #else 2871 return simd_bitselect(x, y, y < x); 2872 #endif 2873 } 2874 2875 static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) { 2876 #if defined __AVX512F__ 2877 return _mm512_min_epi32(x, y); 2878 #else 2879 return simd_bitselect(x, y, y < x); 2880 #endif 2881 } 2882 2883 static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) { 2884 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2885 return vmin_u32(x, y); 2886 #else 2887 return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); 2888 #endif 2889 2890 } 2891 2892 static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) { 2893 return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); 2894 } 2895 2896 static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) { 2897 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 2898 return vminq_u32(x, y); 2899 #elif defined __SSE4_1__ 2900 return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y); 2901 #else 2902 return simd_bitselect(x, y, y < x); 2903 #endif 2904 } 2905 2906 static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) { 2907 #if defined __AVX2__ 2908 return _mm256_min_epu32(x, y); 2909 #else 2910 return simd_bitselect(x, y, y < x); 2911 #endif 2912 } 2913 2914 static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) { 2915 #if defined __AVX512F__ 2916 return _mm512_min_epu32(x, y); 2917 #else 2918 return simd_bitselect(x, y, y < x); 2919 #endif 2920 } 2921 2922 static inline SIMD_CFUNC float simd_min(float x, float y) { 2923 return __tg_fmin(x,y); 2924 } 2925 2926 static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) { 2927 return __tg_fmin(x,y); 2928 } 2929 2930 static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) { 2931 return __tg_fmin(x,y); 2932 } 2933 2934 static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) { 2935 return __tg_fmin(x,y); 2936 } 2937 2938 static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) { 2939 return __tg_fmin(x,y); 2940 } 2941 2942 static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) { 2943 return __tg_fmin(x,y); 2944 } 2945 2946 static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) { 2947 #if defined __AVX512VL__ 2948 return _mm_min_epi64(x, y); 2949 #else 2950 return simd_bitselect(x, y, y < x); 2951 #endif 2952 } 2953 2954 static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) { 2955 return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y))); 2956 } 2957 2958 static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) { 2959 #if defined __AVX512VL__ 2960 return _mm256_min_epi64(x, y); 2961 #else 2962 return simd_bitselect(x, y, y < x); 2963 #endif 2964 } 2965 2966 static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) { 2967 #if defined __AVX512F__ 2968 return _mm512_min_epi64(x, y); 2969 #else 2970 return simd_bitselect(x, y, y < x); 2971 #endif 2972 } 2973 2974 static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) { 2975 #if defined __AVX512VL__ 2976 return _mm_min_epu64(x, y); 2977 #else 2978 return simd_bitselect(x, y, y < x); 2979 #endif 2980 } 2981 2982 static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) { 2983 return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); 2984 } 2985 2986 static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) { 2987 #if defined __AVX512VL__ 2988 return _mm256_min_epu64(x, y); 2989 #else 2990 return simd_bitselect(x, y, y < x); 2991 #endif 2992 } 2993 2994 static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) { 2995 #if defined __AVX512F__ 2996 return _mm512_min_epu64(x, y); 2997 #else 2998 return simd_bitselect(x, y, y < x); 2999 #endif 3000 } 3001 3002 static inline SIMD_CFUNC double simd_min(double x, double y) { 3003 return __tg_fmin(x,y); 3004 } 3005 3006 static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) { 3007 return __tg_fmin(x,y); 3008 } 3009 3010 static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) { 3011 return __tg_fmin(x,y); 3012 } 3013 3014 static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) { 3015 return __tg_fmin(x,y); 3016 } 3017 3018 static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) { 3019 return __tg_fmin(x,y); 3020 } 3021 3022 static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) { 3023 return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); 3024 } 3025 3026 static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) { 3027 return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); 3028 } 3029 3030 static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) { 3031 return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); 3032 } 3033 3034 static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) { 3035 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3036 return vmax_s8(x, y); 3037 #else 3038 return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y))); 3039 #endif 3040 3041 } 3042 3043 static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) { 3044 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3045 return vmaxq_s8(x, y); 3046 #elif defined __SSE4_1__ 3047 return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y); 3048 #else 3049 return simd_bitselect(x, y, x < y); 3050 #endif 3051 } 3052 3053 static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) { 3054 #if defined __AVX2__ 3055 return _mm256_max_epi8(x, y); 3056 #else 3057 return simd_bitselect(x, y, x < y); 3058 #endif 3059 } 3060 3061 static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) { 3062 #if defined __AVX512BW__ 3063 return _mm512_max_epi8(x, y); 3064 #else 3065 return simd_bitselect(x, y, x < y); 3066 #endif 3067 } 3068 3069 static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) { 3070 return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 3071 } 3072 3073 static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) { 3074 return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 3075 } 3076 3077 static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) { 3078 return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); 3079 } 3080 3081 static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) { 3082 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3083 return vmax_u8(x, y); 3084 #else 3085 return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); 3086 #endif 3087 3088 } 3089 3090 static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) { 3091 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3092 return vmaxq_u8(x, y); 3093 #elif defined __SSE4_1__ 3094 return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y); 3095 #else 3096 return simd_bitselect(x, y, x < y); 3097 #endif 3098 } 3099 3100 static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) { 3101 #if defined __AVX2__ 3102 return _mm256_max_epu8(x, y); 3103 #else 3104 return simd_bitselect(x, y, x < y); 3105 #endif 3106 } 3107 3108 static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) { 3109 #if defined __AVX512BW__ 3110 return _mm512_max_epu8(x, y); 3111 #else 3112 return simd_bitselect(x, y, x < y); 3113 #endif 3114 } 3115 3116 static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) { 3117 return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); 3118 } 3119 3120 static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) { 3121 return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); 3122 } 3123 3124 static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) { 3125 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3126 return vmax_s16(x, y); 3127 #else 3128 return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y))); 3129 #endif 3130 3131 } 3132 3133 static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) { 3134 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3135 return vmaxq_s16(x, y); 3136 #elif defined __SSE4_1__ 3137 return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y); 3138 #else 3139 return simd_bitselect(x, y, x < y); 3140 #endif 3141 } 3142 3143 static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) { 3144 #if defined __AVX2__ 3145 return _mm256_max_epi16(x, y); 3146 #else 3147 return simd_bitselect(x, y, x < y); 3148 #endif 3149 } 3150 3151 static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) { 3152 #if defined __AVX512BW__ 3153 return _mm512_max_epi16(x, y); 3154 #else 3155 return simd_bitselect(x, y, x < y); 3156 #endif 3157 } 3158 3159 static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) { 3160 return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); 3161 } 3162 3163 static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) { 3164 return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); 3165 } 3166 3167 static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) { 3168 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3169 return vmax_u16(x, y); 3170 #else 3171 return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); 3172 #endif 3173 3174 } 3175 3176 static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) { 3177 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3178 return vmaxq_u16(x, y); 3179 #elif defined __SSE4_1__ 3180 return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y); 3181 #else 3182 return simd_bitselect(x, y, x < y); 3183 #endif 3184 } 3185 3186 static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) { 3187 #if defined __AVX2__ 3188 return _mm256_max_epu16(x, y); 3189 #else 3190 return simd_bitselect(x, y, x < y); 3191 #endif 3192 } 3193 3194 static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) { 3195 #if defined __AVX512BW__ 3196 return _mm512_max_epu16(x, y); 3197 #else 3198 return simd_bitselect(x, y, x < y); 3199 #endif 3200 } 3201 3202 static inline SIMD_CFUNC _Float16 simd_max(_Float16 x, _Float16 y) { 3203 return __fmaxf16(x,y); 3204 } 3205 3206 static inline SIMD_CFUNC simd_half2 simd_max(simd_half2 x, simd_half2 y) { 3207 return __tg_fmax(x,y); 3208 } 3209 3210 static inline SIMD_CFUNC simd_half3 simd_max(simd_half3 x, simd_half3 y) { 3211 return __tg_fmax(x,y); 3212 } 3213 3214 static inline SIMD_CFUNC simd_half4 simd_max(simd_half4 x, simd_half4 y) { 3215 return __tg_fmax(x,y); 3216 } 3217 3218 static inline SIMD_CFUNC simd_half8 simd_max(simd_half8 x, simd_half8 y) { 3219 return __tg_fmax(x,y); 3220 } 3221 3222 static inline SIMD_CFUNC simd_half16 simd_max(simd_half16 x, simd_half16 y) { 3223 return __tg_fmax(x,y); 3224 } 3225 3226 static inline SIMD_CFUNC simd_half32 simd_max(simd_half32 x, simd_half32 y) { 3227 return __tg_fmax(x,y); 3228 } 3229 3230 static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) { 3231 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3232 return vmax_s32(x, y); 3233 #else 3234 return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); 3235 #endif 3236 3237 } 3238 3239 static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) { 3240 return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); 3241 } 3242 3243 static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) { 3244 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3245 return vmaxq_s32(x, y); 3246 #elif defined __SSE4_1__ 3247 return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y); 3248 #else 3249 return simd_bitselect(x, y, x < y); 3250 #endif 3251 } 3252 3253 static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) { 3254 #if defined __AVX2__ 3255 return _mm256_max_epi32(x, y); 3256 #else 3257 return simd_bitselect(x, y, x < y); 3258 #endif 3259 } 3260 3261 static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) { 3262 #if defined __AVX512F__ 3263 return _mm512_max_epi32(x, y); 3264 #else 3265 return simd_bitselect(x, y, x < y); 3266 #endif 3267 } 3268 3269 static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) { 3270 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3271 return vmax_u32(x, y); 3272 #else 3273 return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); 3274 #endif 3275 3276 } 3277 3278 static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) { 3279 return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); 3280 } 3281 3282 static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) { 3283 #if defined __arm__ || defined __arm64__ || defined __aarch64__ 3284 return vmaxq_u32(x, y); 3285 #elif defined __SSE4_1__ 3286 return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y); 3287 #else 3288 return simd_bitselect(x, y, x < y); 3289 #endif 3290 } 3291 3292 static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) { 3293 #if defined __AVX2__ 3294 return _mm256_max_epu32(x, y); 3295 #else 3296 return simd_bitselect(x, y, x < y); 3297 #endif 3298 } 3299 3300 static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) { 3301 #if defined __AVX512F__ 3302 return _mm512_max_epu32(x, y); 3303 #else 3304 return simd_bitselect(x, y, x < y); 3305 #endif 3306 } 3307 3308 static inline SIMD_CFUNC float simd_max(float x, float y) { 3309 return __tg_fmax(x,y); 3310 } 3311 3312 static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) { 3313 return __tg_fmax(x,y); 3314 } 3315 3316 static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) { 3317 return __tg_fmax(x,y); 3318 } 3319 3320 static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) { 3321 return __tg_fmax(x,y); 3322 } 3323 3324 static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) { 3325 return __tg_fmax(x,y); 3326 } 3327 3328 static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) { 3329 return __tg_fmax(x,y); 3330 } 3331 3332 static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) { 3333 #if defined __AVX512VL__ 3334 return _mm_max_epi64(x, y); 3335 #else 3336 return simd_bitselect(x, y, x < y); 3337 #endif 3338 } 3339 3340 static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) { 3341 return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y))); 3342 } 3343 3344 static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) { 3345 #if defined __AVX512VL__ 3346 return _mm256_max_epi64(x, y); 3347 #else 3348 return simd_bitselect(x, y, x < y); 3349 #endif 3350 } 3351 3352 static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) { 3353 #if defined __AVX512F__ 3354 return _mm512_max_epi64(x, y); 3355 #else 3356 return simd_bitselect(x, y, x < y); 3357 #endif 3358 } 3359 3360 static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) { 3361 #if defined __AVX512VL__ 3362 return _mm_max_epu64(x, y); 3363 #else 3364 return simd_bitselect(x, y, x < y); 3365 #endif 3366 } 3367 3368 static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) { 3369 return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); 3370 } 3371 3372 static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) { 3373 #if defined __AVX512VL__ 3374 return _mm256_max_epu64(x, y); 3375 #else 3376 return simd_bitselect(x, y, x < y); 3377 #endif 3378 } 3379 3380 static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) { 3381 #if defined __AVX512F__ 3382 return _mm512_max_epu64(x, y); 3383 #else 3384 return simd_bitselect(x, y, x < y); 3385 #endif 3386 } 3387 3388 static inline SIMD_CFUNC double simd_max(double x, double y) { 3389 return __tg_fmax(x,y); 3390 } 3391 3392 static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) { 3393 return __tg_fmax(x,y); 3394 } 3395 3396 static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) { 3397 return __tg_fmax(x,y); 3398 } 3399 3400 static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) { 3401 return __tg_fmax(x,y); 3402 } 3403 3404 static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) { 3405 return __tg_fmax(x,y); 3406 } 3407 3408 static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) { 3409 return simd_min(simd_max(x, min), max); 3410 } 3411 3412 static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) { 3413 return simd_min(simd_max(x, min), max); 3414 } 3415 3416 static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) { 3417 return simd_min(simd_max(x, min), max); 3418 } 3419 3420 static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) { 3421 return simd_min(simd_max(x, min), max); 3422 } 3423 3424 static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) { 3425 return simd_min(simd_max(x, min), max); 3426 } 3427 3428 static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) { 3429 return simd_min(simd_max(x, min), max); 3430 } 3431 3432 static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) { 3433 return simd_min(simd_max(x, min), max); 3434 } 3435 3436 static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) { 3437 return simd_min(simd_max(x, min), max); 3438 } 3439 3440 static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) { 3441 return simd_min(simd_max(x, min), max); 3442 } 3443 3444 static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) { 3445 return simd_min(simd_max(x, min), max); 3446 } 3447 3448 static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) { 3449 return simd_min(simd_max(x, min), max); 3450 } 3451 3452 static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) { 3453 return simd_min(simd_max(x, min), max); 3454 } 3455 3456 static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) { 3457 return simd_min(simd_max(x, min), max); 3458 } 3459 3460 static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) { 3461 return simd_min(simd_max(x, min), max); 3462 } 3463 3464 static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) { 3465 return simd_min(simd_max(x, min), max); 3466 } 3467 3468 static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) { 3469 return simd_min(simd_max(x, min), max); 3470 } 3471 3472 static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) { 3473 return simd_min(simd_max(x, min), max); 3474 } 3475 3476 static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) { 3477 return simd_min(simd_max(x, min), max); 3478 } 3479 3480 static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) { 3481 return simd_min(simd_max(x, min), max); 3482 } 3483 3484 static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) { 3485 return simd_min(simd_max(x, min), max); 3486 } 3487 3488 static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) { 3489 return simd_min(simd_max(x, min), max); 3490 } 3491 3492 static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) { 3493 return simd_min(simd_max(x, min), max); 3494 } 3495 3496 static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) { 3497 return simd_min(simd_max(x, min), max); 3498 } 3499 3500 static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) { 3501 return simd_min(simd_max(x, min), max); 3502 } 3503 3504 static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) { 3505 return simd_min(simd_max(x, min), max); 3506 } 3507 3508 static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) { 3509 return simd_min(simd_max(x, min), max); 3510 } 3511 3512 static inline SIMD_CFUNC _Float16 simd_clamp(_Float16 x, _Float16 min, _Float16 max) { 3513 return simd_min(simd_max(x, min), max); 3514 } 3515 3516 static inline SIMD_CFUNC simd_half2 simd_clamp(simd_half2 x, simd_half2 min, simd_half2 max) { 3517 return simd_min(simd_max(x, min), max); 3518 } 3519 3520 static inline SIMD_CFUNC simd_half3 simd_clamp(simd_half3 x, simd_half3 min, simd_half3 max) { 3521 return simd_min(simd_max(x, min), max); 3522 } 3523 3524 static inline SIMD_CFUNC simd_half4 simd_clamp(simd_half4 x, simd_half4 min, simd_half4 max) { 3525 return simd_min(simd_max(x, min), max); 3526 } 3527 3528 static inline SIMD_CFUNC simd_half8 simd_clamp(simd_half8 x, simd_half8 min, simd_half8 max) { 3529 return simd_min(simd_max(x, min), max); 3530 } 3531 3532 static inline SIMD_CFUNC simd_half16 simd_clamp(simd_half16 x, simd_half16 min, simd_half16 max) { 3533 return simd_min(simd_max(x, min), max); 3534 } 3535 3536 static inline SIMD_CFUNC simd_half32 simd_clamp(simd_half32 x, simd_half32 min, simd_half32 max) { 3537 return simd_min(simd_max(x, min), max); 3538 } 3539 3540 static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) { 3541 return simd_min(simd_max(x, min), max); 3542 } 3543 3544 static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) { 3545 return simd_min(simd_max(x, min), max); 3546 } 3547 3548 static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) { 3549 return simd_min(simd_max(x, min), max); 3550 } 3551 3552 static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) { 3553 return simd_min(simd_max(x, min), max); 3554 } 3555 3556 static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) { 3557 return simd_min(simd_max(x, min), max); 3558 } 3559 3560 static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) { 3561 return simd_min(simd_max(x, min), max); 3562 } 3563 3564 static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) { 3565 return simd_min(simd_max(x, min), max); 3566 } 3567 3568 static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) { 3569 return simd_min(simd_max(x, min), max); 3570 } 3571 3572 static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) { 3573 return simd_min(simd_max(x, min), max); 3574 } 3575 3576 static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) { 3577 return simd_min(simd_max(x, min), max); 3578 } 3579 3580 static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) { 3581 return simd_min(simd_max(x, min), max); 3582 } 3583 3584 static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) { 3585 return simd_min(simd_max(x, min), max); 3586 } 3587 3588 static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) { 3589 return simd_min(simd_max(x, min), max); 3590 } 3591 3592 static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) { 3593 return simd_min(simd_max(x, min), max); 3594 } 3595 3596 static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) { 3597 return simd_min(simd_max(x, min), max); 3598 } 3599 3600 static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) { 3601 return simd_min(simd_max(x, min), max); 3602 } 3603 3604 static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) { 3605 return simd_min(simd_max(x, min), max); 3606 } 3607 3608 static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) { 3609 return simd_min(simd_max(x, min), max); 3610 } 3611 3612 static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) { 3613 return simd_min(simd_max(x, min), max); 3614 } 3615 3616 static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) { 3617 return simd_min(simd_max(x, min), max); 3618 } 3619 3620 static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) { 3621 return simd_min(simd_max(x, min), max); 3622 } 3623 3624 static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) { 3625 return simd_min(simd_max(x, min), max); 3626 } 3627 3628 static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) { 3629 return simd_min(simd_max(x, min), max); 3630 } 3631 3632 static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) { 3633 return simd_min(simd_max(x, min), max); 3634 } 3635 3636 static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) { 3637 return simd_min(simd_max(x, min), max); 3638 } 3639 3640 static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) { 3641 return simd_min(simd_max(x, min), max); 3642 } 3643 3644 static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) { 3645 return simd_min(simd_max(x, min), max); 3646 } 3647 3648 static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) { 3649 return simd_min(simd_max(x, min), max); 3650 } 3651 3652 static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) { 3653 return simd_min(simd_max(x, min), max); 3654 } 3655 3656 3657 static inline SIMD_CFUNC _Float16 simd_sign(_Float16 x) { 3658 return (x == 0 | x != x) ? 0 : __copysignf16(1,x); 3659 } 3660 3661 static inline SIMD_CFUNC simd_half2 simd_sign(simd_half2 x) { 3662 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3663 } 3664 3665 static inline SIMD_CFUNC simd_half3 simd_sign(simd_half3 x) { 3666 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3667 } 3668 3669 static inline SIMD_CFUNC simd_half4 simd_sign(simd_half4 x) { 3670 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3671 } 3672 3673 static inline SIMD_CFUNC simd_half8 simd_sign(simd_half8 x) { 3674 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3675 } 3676 3677 static inline SIMD_CFUNC simd_half16 simd_sign(simd_half16 x) { 3678 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3679 } 3680 3681 static inline SIMD_CFUNC simd_half32 simd_sign(simd_half32 x) { 3682 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3683 } 3684 3685 static inline SIMD_CFUNC float simd_sign(float x) { 3686 return (x == 0 | x != x) ? 0 : copysignf(1,x); 3687 } 3688 3689 static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) { 3690 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3691 } 3692 3693 static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) { 3694 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3695 } 3696 3697 static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) { 3698 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3699 } 3700 3701 static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) { 3702 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3703 } 3704 3705 static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) { 3706 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3707 } 3708 3709 static inline SIMD_CFUNC double simd_sign(double x) { 3710 return (x == 0 | x != x) ? 0 : copysign(1,x); 3711 } 3712 3713 static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) { 3714 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3715 } 3716 3717 static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) { 3718 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3719 } 3720 3721 static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) { 3722 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3723 } 3724 3725 static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) { 3726 return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); 3727 } 3728 3729 static inline SIMD_CFUNC _Float16 simd_mix(_Float16 x, _Float16 y, _Float16 t) { 3730 return x + t*(y - x); 3731 } 3732 3733 static inline SIMD_CFUNC simd_half2 simd_mix(simd_half2 x, simd_half2 y, simd_half2 t) { 3734 return x + t*(y - x); 3735 } 3736 3737 static inline SIMD_CFUNC simd_half3 simd_mix(simd_half3 x, simd_half3 y, simd_half3 t) { 3738 return x + t*(y - x); 3739 } 3740 3741 static inline SIMD_CFUNC simd_half4 simd_mix(simd_half4 x, simd_half4 y, simd_half4 t) { 3742 return x + t*(y - x); 3743 } 3744 3745 static inline SIMD_CFUNC simd_half8 simd_mix(simd_half8 x, simd_half8 y, simd_half8 t) { 3746 return x + t*(y - x); 3747 } 3748 3749 static inline SIMD_CFUNC simd_half16 simd_mix(simd_half16 x, simd_half16 y, simd_half16 t) { 3750 return x + t*(y - x); 3751 } 3752 3753 static inline SIMD_CFUNC simd_half32 simd_mix(simd_half32 x, simd_half32 y, simd_half32 t) { 3754 return x + t*(y - x); 3755 } 3756 3757 static inline SIMD_CFUNC float simd_mix(float x, float y, float t) { 3758 return x + t*(y - x); 3759 } 3760 3761 static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) { 3762 return x + t*(y - x); 3763 } 3764 3765 static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) { 3766 return x + t*(y - x); 3767 } 3768 3769 static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) { 3770 return x + t*(y - x); 3771 } 3772 3773 static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) { 3774 return x + t*(y - x); 3775 } 3776 3777 static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) { 3778 return x + t*(y - x); 3779 } 3780 3781 static inline SIMD_CFUNC double simd_mix(double x, double y, double t) { 3782 return x + t*(y - x); 3783 } 3784 3785 static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) { 3786 return x + t*(y - x); 3787 } 3788 3789 static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) { 3790 return x + t*(y - x); 3791 } 3792 3793 static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) { 3794 return x + t*(y - x); 3795 } 3796 3797 static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) { 3798 return x + t*(y - x); 3799 } 3800 3801 static inline SIMD_CFUNC _Float16 simd_recip(_Float16 x) { 3802 #if __FAST_MATH__ 3803 return simd_fast_recip(x); 3804 #else 3805 return simd_precise_recip(x); 3806 #endif 3807 } 3808 3809 static inline SIMD_CFUNC simd_half2 simd_recip(simd_half2 x) { 3810 #if __FAST_MATH__ 3811 return simd_fast_recip(x); 3812 #else 3813 return simd_precise_recip(x); 3814 #endif 3815 } 3816 3817 static inline SIMD_CFUNC simd_half3 simd_recip(simd_half3 x) { 3818 #if __FAST_MATH__ 3819 return simd_fast_recip(x); 3820 #else 3821 return simd_precise_recip(x); 3822 #endif 3823 } 3824 3825 static inline SIMD_CFUNC simd_half4 simd_recip(simd_half4 x) { 3826 #if __FAST_MATH__ 3827 return simd_fast_recip(x); 3828 #else 3829 return simd_precise_recip(x); 3830 #endif 3831 } 3832 3833 static inline SIMD_CFUNC simd_half8 simd_recip(simd_half8 x) { 3834 #if __FAST_MATH__ 3835 return simd_fast_recip(x); 3836 #else 3837 return simd_precise_recip(x); 3838 #endif 3839 } 3840 3841 static inline SIMD_CFUNC simd_half16 simd_recip(simd_half16 x) { 3842 #if __FAST_MATH__ 3843 return simd_fast_recip(x); 3844 #else 3845 return simd_precise_recip(x); 3846 #endif 3847 } 3848 3849 static inline SIMD_CFUNC simd_half32 simd_recip(simd_half32 x) { 3850 #if __FAST_MATH__ 3851 return simd_fast_recip(x); 3852 #else 3853 return simd_precise_recip(x); 3854 #endif 3855 } 3856 3857 static inline SIMD_CFUNC float simd_recip(float x) { 3858 #if __FAST_MATH__ 3859 return simd_fast_recip(x); 3860 #else 3861 return simd_precise_recip(x); 3862 #endif 3863 } 3864 3865 static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) { 3866 #if __FAST_MATH__ 3867 return simd_fast_recip(x); 3868 #else 3869 return simd_precise_recip(x); 3870 #endif 3871 } 3872 3873 static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) { 3874 #if __FAST_MATH__ 3875 return simd_fast_recip(x); 3876 #else 3877 return simd_precise_recip(x); 3878 #endif 3879 } 3880 3881 static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) { 3882 #if __FAST_MATH__ 3883 return simd_fast_recip(x); 3884 #else 3885 return simd_precise_recip(x); 3886 #endif 3887 } 3888 3889 static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) { 3890 #if __FAST_MATH__ 3891 return simd_fast_recip(x); 3892 #else 3893 return simd_precise_recip(x); 3894 #endif 3895 } 3896 3897 static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) { 3898 #if __FAST_MATH__ 3899 return simd_fast_recip(x); 3900 #else 3901 return simd_precise_recip(x); 3902 #endif 3903 } 3904 3905 static inline SIMD_CFUNC double simd_recip(double x) { 3906 #if __FAST_MATH__ 3907 return simd_fast_recip(x); 3908 #else 3909 return simd_precise_recip(x); 3910 #endif 3911 } 3912 3913 static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) { 3914 #if __FAST_MATH__ 3915 return simd_fast_recip(x); 3916 #else 3917 return simd_precise_recip(x); 3918 #endif 3919 } 3920 3921 static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) { 3922 #if __FAST_MATH__ 3923 return simd_fast_recip(x); 3924 #else 3925 return simd_precise_recip(x); 3926 #endif 3927 } 3928 3929 static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) { 3930 #if __FAST_MATH__ 3931 return simd_fast_recip(x); 3932 #else 3933 return simd_precise_recip(x); 3934 #endif 3935 } 3936 3937 static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) { 3938 #if __FAST_MATH__ 3939 return simd_fast_recip(x); 3940 #else 3941 return simd_precise_recip(x); 3942 #endif 3943 } 3944 3945 static inline SIMD_CFUNC _Float16 simd_fast_recip(_Float16 x) { 3946 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 3947 return simd_fast_recip(simd_make_half4_undef(x)).x; 3948 #else 3949 return simd_precise_recip(x); 3950 #endif 3951 } 3952 3953 static inline SIMD_CFUNC simd_half2 simd_fast_recip(simd_half2 x) { 3954 return simd_fast_recip(simd_make_half4_undef(x)).lo; 3955 } 3956 3957 static inline SIMD_CFUNC simd_half3 simd_fast_recip(simd_half3 x) { 3958 return simd_make_half3(simd_fast_recip(simd_make_half4_undef(x))); 3959 } 3960 3961 static inline SIMD_CFUNC simd_half4 simd_fast_recip(simd_half4 x) { 3962 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 3963 simd_half4 r = vrecpe_f16(x); 3964 return r * vrecps_f16(x, r); 3965 #else 3966 return simd_precise_recip(x); 3967 #endif 3968 } 3969 3970 static inline SIMD_CFUNC simd_half8 simd_fast_recip(simd_half8 x) { 3971 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 3972 simd_half8 r = vrecpeq_f16(x); 3973 return r * vrecpsq_f16(x, r); 3974 #else 3975 return simd_precise_recip(x); 3976 #endif 3977 } 3978 3979 static inline SIMD_CFUNC simd_half16 simd_fast_recip(simd_half16 x) { 3980 return simd_make_half16(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); 3981 } 3982 3983 static inline SIMD_CFUNC simd_half32 simd_fast_recip(simd_half32 x) { 3984 return simd_make_half32(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); 3985 } 3986 3987 static inline SIMD_CFUNC float simd_fast_recip(float x) { 3988 #if defined __AVX512VL__ 3989 simd_float4 x4 = simd_make_float4(x); 3990 return ((simd_float4)_mm_rcp14_ss(x4, x4)).x; 3991 #elif defined __SSE__ 3992 return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x; 3993 #elif defined __ARM_NEON 3994 return simd_fast_recip(simd_make_float2_undef(x)).x; 3995 #else 3996 return simd_precise_recip(x); 3997 #endif 3998 } 3999 4000 static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) { 4001 #if defined __SSE__ 4002 return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x))); 4003 #elif defined __ARM_NEON 4004 simd_float2 r = vrecpe_f32(x); 4005 return r * vrecps_f32(x, r); 4006 #else 4007 return simd_precise_recip(x); 4008 #endif 4009 } 4010 4011 static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) { 4012 return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x))); 4013 } 4014 4015 static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) { 4016 #if defined __AVX512VL__ 4017 return _mm_rcp14_ps(x); 4018 #elif defined __SSE__ 4019 return _mm_rcp_ps(x); 4020 #elif defined __ARM_NEON 4021 simd_float4 r = vrecpeq_f32(x); 4022 return r * vrecpsq_f32(x, r); 4023 #else 4024 return simd_precise_recip(x); 4025 #endif 4026 } 4027 4028 static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) { 4029 #if defined __AVX512VL__ 4030 return _mm256_rcp14_ps(x); 4031 #elif defined __AVX__ 4032 return _mm256_rcp_ps(x); 4033 #else 4034 return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); 4035 #endif 4036 } 4037 4038 static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) { 4039 #if defined __AVX512F__ 4040 return _mm512_rcp14_ps(x); 4041 #else 4042 return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); 4043 #endif 4044 } 4045 4046 static inline SIMD_CFUNC double simd_fast_recip(double x) { 4047 return simd_precise_recip(x); 4048 } 4049 4050 static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) { 4051 return simd_precise_recip(x); 4052 } 4053 4054 static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) { 4055 return simd_precise_recip(x); 4056 } 4057 4058 static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) { 4059 return simd_precise_recip(x); 4060 } 4061 4062 static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) { 4063 return simd_precise_recip(x); 4064 } 4065 4066 static inline SIMD_CFUNC _Float16 simd_precise_recip(_Float16 x) { 4067 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4068 return simd_precise_recip(simd_make_half4_undef(x)).x; 4069 #else 4070 return 1/x; 4071 #endif 4072 } 4073 4074 static inline SIMD_CFUNC simd_half2 simd_precise_recip(simd_half2 x) { 4075 return simd_precise_recip(simd_make_half4_undef(x)).lo; 4076 } 4077 4078 static inline SIMD_CFUNC simd_half3 simd_precise_recip(simd_half3 x) { 4079 return simd_make_half3(simd_precise_recip(simd_make_half4_undef(x))); 4080 } 4081 4082 static inline SIMD_CFUNC simd_half4 simd_precise_recip(simd_half4 x) { 4083 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4084 simd_half4 r = simd_fast_recip(x); 4085 return r*vrecps_f16(x, r); 4086 #else 4087 return 1/x; 4088 #endif 4089 } 4090 4091 static inline SIMD_CFUNC simd_half8 simd_precise_recip(simd_half8 x) { 4092 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4093 simd_half8 r = simd_fast_recip(x); 4094 return r*vrecpsq_f16(x, r); 4095 #else 4096 return 1/x; 4097 #endif 4098 } 4099 4100 static inline SIMD_CFUNC simd_half16 simd_precise_recip(simd_half16 x) { 4101 return simd_make_half16(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); 4102 } 4103 4104 static inline SIMD_CFUNC simd_half32 simd_precise_recip(simd_half32 x) { 4105 return simd_make_half32(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); 4106 } 4107 4108 static inline SIMD_CFUNC float simd_precise_recip(float x) { 4109 #if defined __SSE__ 4110 float r = simd_fast_recip(x); 4111 return r*(2 - (x == 0 ? -INFINITY : x)*r); 4112 #elif defined __ARM_NEON 4113 return simd_precise_recip(simd_make_float2_undef(x)).x; 4114 #else 4115 return 1/x; 4116 #endif 4117 } 4118 4119 static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) { 4120 #if defined __SSE__ 4121 return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x))); 4122 #elif defined __ARM_NEON 4123 simd_float2 r = simd_fast_recip(x); 4124 return r*vrecps_f32(x, r); 4125 #else 4126 return 1/x; 4127 #endif 4128 } 4129 4130 static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) { 4131 return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x))); 4132 } 4133 4134 static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) { 4135 #if defined __SSE__ 4136 simd_float4 r = simd_fast_recip(x); 4137 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); 4138 #elif defined __ARM_NEON 4139 simd_float4 r = simd_fast_recip(x); 4140 return r*vrecpsq_f32(x, r); 4141 #else 4142 return 1/x; 4143 #endif 4144 } 4145 4146 static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) { 4147 #if defined __AVX__ 4148 simd_float8 r = simd_fast_recip(x); 4149 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); 4150 #else 4151 return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); 4152 #endif 4153 } 4154 4155 static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) { 4156 #if defined __AVX512F__ 4157 simd_float16 r = simd_fast_recip(x); 4158 return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); 4159 #else 4160 return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); 4161 #endif 4162 } 4163 4164 static inline SIMD_CFUNC double simd_precise_recip(double x) { 4165 return 1/x; 4166 } 4167 4168 static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) { 4169 return 1/x; 4170 } 4171 4172 static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) { 4173 return 1/x; 4174 } 4175 4176 static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) { 4177 return 1/x; 4178 } 4179 4180 static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) { 4181 return 1/x; 4182 } 4183 4184 static inline SIMD_CFUNC _Float16 simd_rsqrt(_Float16 x) { 4185 #if __FAST_MATH__ 4186 return simd_fast_rsqrt(x); 4187 #else 4188 return simd_precise_rsqrt(x); 4189 #endif 4190 } 4191 4192 static inline SIMD_CFUNC simd_half2 simd_rsqrt(simd_half2 x) { 4193 #if __FAST_MATH__ 4194 return simd_fast_rsqrt(x); 4195 #else 4196 return simd_precise_rsqrt(x); 4197 #endif 4198 } 4199 4200 static inline SIMD_CFUNC simd_half3 simd_rsqrt(simd_half3 x) { 4201 #if __FAST_MATH__ 4202 return simd_fast_rsqrt(x); 4203 #else 4204 return simd_precise_rsqrt(x); 4205 #endif 4206 } 4207 4208 static inline SIMD_CFUNC simd_half4 simd_rsqrt(simd_half4 x) { 4209 #if __FAST_MATH__ 4210 return simd_fast_rsqrt(x); 4211 #else 4212 return simd_precise_rsqrt(x); 4213 #endif 4214 } 4215 4216 static inline SIMD_CFUNC simd_half8 simd_rsqrt(simd_half8 x) { 4217 #if __FAST_MATH__ 4218 return simd_fast_rsqrt(x); 4219 #else 4220 return simd_precise_rsqrt(x); 4221 #endif 4222 } 4223 4224 static inline SIMD_CFUNC simd_half16 simd_rsqrt(simd_half16 x) { 4225 #if __FAST_MATH__ 4226 return simd_fast_rsqrt(x); 4227 #else 4228 return simd_precise_rsqrt(x); 4229 #endif 4230 } 4231 4232 static inline SIMD_CFUNC simd_half32 simd_rsqrt(simd_half32 x) { 4233 #if __FAST_MATH__ 4234 return simd_fast_rsqrt(x); 4235 #else 4236 return simd_precise_rsqrt(x); 4237 #endif 4238 } 4239 4240 static inline SIMD_CFUNC float simd_rsqrt(float x) { 4241 #if __FAST_MATH__ 4242 return simd_fast_rsqrt(x); 4243 #else 4244 return simd_precise_rsqrt(x); 4245 #endif 4246 } 4247 4248 static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) { 4249 #if __FAST_MATH__ 4250 return simd_fast_rsqrt(x); 4251 #else 4252 return simd_precise_rsqrt(x); 4253 #endif 4254 } 4255 4256 static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) { 4257 #if __FAST_MATH__ 4258 return simd_fast_rsqrt(x); 4259 #else 4260 return simd_precise_rsqrt(x); 4261 #endif 4262 } 4263 4264 static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) { 4265 #if __FAST_MATH__ 4266 return simd_fast_rsqrt(x); 4267 #else 4268 return simd_precise_rsqrt(x); 4269 #endif 4270 } 4271 4272 static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) { 4273 #if __FAST_MATH__ 4274 return simd_fast_rsqrt(x); 4275 #else 4276 return simd_precise_rsqrt(x); 4277 #endif 4278 } 4279 4280 static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) { 4281 #if __FAST_MATH__ 4282 return simd_fast_rsqrt(x); 4283 #else 4284 return simd_precise_rsqrt(x); 4285 #endif 4286 } 4287 4288 static inline SIMD_CFUNC double simd_rsqrt(double x) { 4289 #if __FAST_MATH__ 4290 return simd_fast_rsqrt(x); 4291 #else 4292 return simd_precise_rsqrt(x); 4293 #endif 4294 } 4295 4296 static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) { 4297 #if __FAST_MATH__ 4298 return simd_fast_rsqrt(x); 4299 #else 4300 return simd_precise_rsqrt(x); 4301 #endif 4302 } 4303 4304 static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) { 4305 #if __FAST_MATH__ 4306 return simd_fast_rsqrt(x); 4307 #else 4308 return simd_precise_rsqrt(x); 4309 #endif 4310 } 4311 4312 static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) { 4313 #if __FAST_MATH__ 4314 return simd_fast_rsqrt(x); 4315 #else 4316 return simd_precise_rsqrt(x); 4317 #endif 4318 } 4319 4320 static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) { 4321 #if __FAST_MATH__ 4322 return simd_fast_rsqrt(x); 4323 #else 4324 return simd_precise_rsqrt(x); 4325 #endif 4326 } 4327 4328 static inline SIMD_CFUNC _Float16 simd_fast_rsqrt(_Float16 x) { 4329 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4330 return simd_fast_rsqrt(simd_make_half4_undef(x)).x; 4331 #else 4332 return simd_precise_rsqrt(x); 4333 #endif 4334 } 4335 4336 static inline SIMD_CFUNC simd_half2 simd_fast_rsqrt(simd_half2 x) { 4337 return simd_fast_rsqrt(simd_make_half4_undef(x)).lo; 4338 } 4339 4340 static inline SIMD_CFUNC simd_half3 simd_fast_rsqrt(simd_half3 x) { 4341 return simd_make_half3(simd_fast_rsqrt(simd_make_half4_undef(x))); 4342 } 4343 4344 static inline SIMD_CFUNC simd_half4 simd_fast_rsqrt(simd_half4 x) { 4345 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4346 simd_half4 r = vrsqrte_f16(x); 4347 return r * vrsqrts_f16(x, r*r); 4348 #else 4349 return simd_precise_rsqrt(x); 4350 #endif 4351 } 4352 4353 static inline SIMD_CFUNC simd_half8 simd_fast_rsqrt(simd_half8 x) { 4354 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4355 simd_half8 r = vrsqrteq_f16(x); 4356 return r * vrsqrtsq_f16(x, r*r); 4357 #else 4358 return simd_precise_rsqrt(x); 4359 #endif 4360 } 4361 4362 static inline SIMD_CFUNC simd_half16 simd_fast_rsqrt(simd_half16 x) { 4363 return simd_make_half16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); 4364 } 4365 4366 static inline SIMD_CFUNC simd_half32 simd_fast_rsqrt(simd_half32 x) { 4367 return simd_make_half32(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); 4368 } 4369 4370 static inline SIMD_CFUNC float simd_fast_rsqrt(float x) { 4371 #if defined __AVX512VL__ 4372 simd_float4 x4 = simd_make_float4(x); 4373 return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x; 4374 #elif defined __SSE__ 4375 return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x; 4376 #elif defined __ARM_NEON 4377 return simd_fast_rsqrt(simd_make_float2_undef(x)).x; 4378 #else 4379 return simd_precise_rsqrt(x); 4380 #endif 4381 } 4382 4383 static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) { 4384 #if defined __SSE__ 4385 return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x))); 4386 #elif defined __ARM_NEON 4387 simd_float2 r = vrsqrte_f32(x); 4388 return r * vrsqrts_f32(x, r*r); 4389 #else 4390 return simd_precise_rsqrt(x); 4391 #endif 4392 } 4393 4394 static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) { 4395 return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x))); 4396 } 4397 4398 static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) { 4399 #if defined __AVX512VL__ 4400 return _mm_rsqrt14_ps(x); 4401 #elif defined __SSE__ 4402 return _mm_rsqrt_ps(x); 4403 #elif defined __ARM_NEON 4404 simd_float4 r = vrsqrteq_f32(x); 4405 return r * vrsqrtsq_f32(x, r*r); 4406 #else 4407 return simd_precise_rsqrt(x); 4408 #endif 4409 } 4410 4411 static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) { 4412 #if defined __AVX512VL__ 4413 return _mm256_rsqrt14_ps(x); 4414 #elif defined __AVX__ 4415 return _mm256_rsqrt_ps(x); 4416 #else 4417 return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); 4418 #endif 4419 } 4420 4421 static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) { 4422 #if defined __AVX512F__ 4423 return _mm512_rsqrt14_ps(x); 4424 #else 4425 return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); 4426 #endif 4427 } 4428 4429 static inline SIMD_CFUNC double simd_fast_rsqrt(double x) { 4430 return simd_precise_rsqrt(x); 4431 } 4432 4433 static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) { 4434 return simd_precise_rsqrt(x); 4435 } 4436 4437 static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) { 4438 return simd_precise_rsqrt(x); 4439 } 4440 4441 static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) { 4442 return simd_precise_rsqrt(x); 4443 } 4444 4445 static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) { 4446 return simd_precise_rsqrt(x); 4447 } 4448 4449 static inline SIMD_CFUNC _Float16 simd_precise_rsqrt(_Float16 x) { 4450 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4451 return simd_precise_rsqrt(simd_make_half4_undef(x)).x; 4452 #else 4453 return 1/__sqrtf16(x); 4454 #endif 4455 } 4456 4457 static inline SIMD_CFUNC simd_half2 simd_precise_rsqrt(simd_half2 x) { 4458 return simd_precise_rsqrt(simd_make_half4_undef(x)).lo; 4459 } 4460 4461 static inline SIMD_CFUNC simd_half3 simd_precise_rsqrt(simd_half3 x) { 4462 return simd_make_half3(simd_precise_rsqrt(simd_make_half4_undef(x))); 4463 } 4464 4465 static inline SIMD_CFUNC simd_half4 simd_precise_rsqrt(simd_half4 x) { 4466 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4467 simd_half4 r = simd_fast_rsqrt(x); 4468 return r*vrsqrts_f16(x, r*r); 4469 #else 4470 return 1/__tg_sqrt(x); 4471 #endif 4472 } 4473 4474 static inline SIMD_CFUNC simd_half8 simd_precise_rsqrt(simd_half8 x) { 4475 #if defined __ARM_FEATURE_FP16_VECTOR_ARITHMETIC 4476 simd_half8 r = simd_fast_rsqrt(x); 4477 return r*vrsqrtsq_f16(x, r*r); 4478 #else 4479 return 1/__tg_sqrt(x); 4480 #endif 4481 } 4482 4483 static inline SIMD_CFUNC simd_half16 simd_precise_rsqrt(simd_half16 x) { 4484 return simd_make_half16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); 4485 } 4486 4487 static inline SIMD_CFUNC simd_half32 simd_precise_rsqrt(simd_half32 x) { 4488 return simd_make_half32(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); 4489 } 4490 4491 static inline SIMD_CFUNC float simd_precise_rsqrt(float x) { 4492 #if defined __SSE__ 4493 float r = simd_fast_rsqrt(x); 4494 return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r); 4495 #elif defined __ARM_NEON 4496 return simd_precise_rsqrt(simd_make_float2_undef(x)).x; 4497 #else 4498 return 1/sqrtf(x); 4499 #endif 4500 } 4501 4502 static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) { 4503 #if defined __SSE__ 4504 return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x))); 4505 #elif defined __ARM_NEON 4506 simd_float2 r = simd_fast_rsqrt(x); 4507 return r*vrsqrts_f32(x, r*r); 4508 #else 4509 return 1/__tg_sqrt(x); 4510 #endif 4511 } 4512 4513 static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) { 4514 return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x))); 4515 } 4516 4517 static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) { 4518 #if defined __SSE__ 4519 simd_float4 r = simd_fast_rsqrt(x); 4520 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); 4521 #elif defined __ARM_NEON 4522 simd_float4 r = simd_fast_rsqrt(x); 4523 return r*vrsqrtsq_f32(x, r*r); 4524 #else 4525 return 1/__tg_sqrt(x); 4526 #endif 4527 } 4528 4529 static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) { 4530 #if defined __AVX__ 4531 simd_float8 r = simd_fast_rsqrt(x); 4532 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); 4533 #else 4534 return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); 4535 #endif 4536 } 4537 4538 static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) { 4539 #if defined __AVX512F__ 4540 simd_float16 r = simd_fast_rsqrt(x); 4541 return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); 4542 #else 4543 return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); 4544 #endif 4545 } 4546 4547 static inline SIMD_CFUNC double simd_precise_rsqrt(double x) { 4548 return 1/sqrt(x); 4549 } 4550 4551 static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) { 4552 return 1/__tg_sqrt(x); 4553 } 4554 4555 static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) { 4556 return 1/__tg_sqrt(x); 4557 } 4558 4559 static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) { 4560 return 1/__tg_sqrt(x); 4561 } 4562 4563 static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) { 4564 return 1/__tg_sqrt(x); 4565 } 4566 4567 static inline SIMD_CFUNC _Float16 simd_fract(_Float16 x) { 4568 return __fminf16(x - __floorf16(x), 0x1.ffcp-1f16); 4569 } 4570 4571 static inline SIMD_CFUNC simd_half2 simd_fract(simd_half2 x) { 4572 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4573 } 4574 4575 static inline SIMD_CFUNC simd_half3 simd_fract(simd_half3 x) { 4576 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4577 } 4578 4579 static inline SIMD_CFUNC simd_half4 simd_fract(simd_half4 x) { 4580 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4581 } 4582 4583 static inline SIMD_CFUNC simd_half8 simd_fract(simd_half8 x) { 4584 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4585 } 4586 4587 static inline SIMD_CFUNC simd_half16 simd_fract(simd_half16 x) { 4588 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4589 } 4590 4591 static inline SIMD_CFUNC simd_half32 simd_fract(simd_half32 x) { 4592 return __tg_fmin(x - __tg_floor(x), 0x1.ffcp-1f16); 4593 } 4594 4595 static inline SIMD_CFUNC float simd_fract(float x) { 4596 return fminf(x - floorf(x), 0x1.fffffep-1f); 4597 } 4598 4599 static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) { 4600 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); 4601 } 4602 4603 static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) { 4604 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); 4605 } 4606 4607 static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) { 4608 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); 4609 } 4610 4611 static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) { 4612 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); 4613 } 4614 4615 static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) { 4616 return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); 4617 } 4618 4619 static inline SIMD_CFUNC double simd_fract(double x) { 4620 return fmin(x - floor(x), 0x1.fffffffffffffp-1); 4621 } 4622 4623 static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) { 4624 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); 4625 } 4626 4627 static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) { 4628 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); 4629 } 4630 4631 static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) { 4632 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); 4633 } 4634 4635 static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) { 4636 return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); 4637 } 4638 4639 static inline SIMD_CFUNC _Float16 simd_step(_Float16 edge, _Float16 x) { 4640 return !(x < edge); 4641 } 4642 4643 static inline SIMD_CFUNC simd_half2 simd_step(simd_half2 edge, simd_half2 x) { 4644 return simd_bitselect((simd_half2)1, 0, x < edge); 4645 } 4646 4647 static inline SIMD_CFUNC simd_half3 simd_step(simd_half3 edge, simd_half3 x) { 4648 return simd_bitselect((simd_half3)1, 0, x < edge); 4649 } 4650 4651 static inline SIMD_CFUNC simd_half4 simd_step(simd_half4 edge, simd_half4 x) { 4652 return simd_bitselect((simd_half4)1, 0, x < edge); 4653 } 4654 4655 static inline SIMD_CFUNC simd_half8 simd_step(simd_half8 edge, simd_half8 x) { 4656 return simd_bitselect((simd_half8)1, 0, x < edge); 4657 } 4658 4659 static inline SIMD_CFUNC simd_half16 simd_step(simd_half16 edge, simd_half16 x) { 4660 return simd_bitselect((simd_half16)1, 0, x < edge); 4661 } 4662 4663 static inline SIMD_CFUNC simd_half32 simd_step(simd_half32 edge, simd_half32 x) { 4664 return simd_bitselect((simd_half32)1, 0, x < edge); 4665 } 4666 4667 static inline SIMD_CFUNC float simd_step(float edge, float x) { 4668 return !(x < edge); 4669 } 4670 4671 static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) { 4672 return simd_bitselect((simd_float2)1, 0, x < edge); 4673 } 4674 4675 static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) { 4676 return simd_bitselect((simd_float3)1, 0, x < edge); 4677 } 4678 4679 static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) { 4680 return simd_bitselect((simd_float4)1, 0, x < edge); 4681 } 4682 4683 static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) { 4684 return simd_bitselect((simd_float8)1, 0, x < edge); 4685 } 4686 4687 static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) { 4688 return simd_bitselect((simd_float16)1, 0, x < edge); 4689 } 4690 4691 static inline SIMD_CFUNC double simd_step(double edge, double x) { 4692 return !(x < edge); 4693 } 4694 4695 static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) { 4696 return simd_bitselect((simd_double2)1, 0, x < edge); 4697 } 4698 4699 static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) { 4700 return simd_bitselect((simd_double3)1, 0, x < edge); 4701 } 4702 4703 static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) { 4704 return simd_bitselect((simd_double4)1, 0, x < edge); 4705 } 4706 4707 static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) { 4708 return simd_bitselect((simd_double8)1, 0, x < edge); 4709 } 4710 4711 static inline SIMD_CFUNC _Float16 simd_smoothstep(_Float16 edge0, _Float16 edge1, _Float16 x) { 4712 _Float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4713 return t*t*(3 - 2*t); 4714 } 4715 4716 static inline SIMD_CFUNC simd_half2 simd_smoothstep(simd_half2 edge0, simd_half2 edge1, simd_half2 x) { 4717 simd_half2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4718 return t*t*(3 - 2*t); 4719 } 4720 4721 static inline SIMD_CFUNC simd_half3 simd_smoothstep(simd_half3 edge0, simd_half3 edge1, simd_half3 x) { 4722 simd_half3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4723 return t*t*(3 - 2*t); 4724 } 4725 4726 static inline SIMD_CFUNC simd_half4 simd_smoothstep(simd_half4 edge0, simd_half4 edge1, simd_half4 x) { 4727 simd_half4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4728 return t*t*(3 - 2*t); 4729 } 4730 4731 static inline SIMD_CFUNC simd_half8 simd_smoothstep(simd_half8 edge0, simd_half8 edge1, simd_half8 x) { 4732 simd_half8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4733 return t*t*(3 - 2*t); 4734 } 4735 4736 static inline SIMD_CFUNC simd_half16 simd_smoothstep(simd_half16 edge0, simd_half16 edge1, simd_half16 x) { 4737 simd_half16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4738 return t*t*(3 - 2*t); 4739 } 4740 4741 static inline SIMD_CFUNC simd_half32 simd_smoothstep(simd_half32 edge0, simd_half32 edge1, simd_half32 x) { 4742 simd_half32 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4743 return t*t*(3 - 2*t); 4744 } 4745 4746 static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) { 4747 float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4748 return t*t*(3 - 2*t); 4749 } 4750 4751 static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) { 4752 simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4753 return t*t*(3 - 2*t); 4754 } 4755 4756 static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) { 4757 simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4758 return t*t*(3 - 2*t); 4759 } 4760 4761 static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) { 4762 simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4763 return t*t*(3 - 2*t); 4764 } 4765 4766 static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) { 4767 simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4768 return t*t*(3 - 2*t); 4769 } 4770 4771 static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) { 4772 simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4773 return t*t*(3 - 2*t); 4774 } 4775 4776 static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) { 4777 double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4778 return t*t*(3 - 2*t); 4779 } 4780 4781 static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) { 4782 simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4783 return t*t*(3 - 2*t); 4784 } 4785 4786 static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) { 4787 simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4788 return t*t*(3 - 2*t); 4789 } 4790 4791 static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) { 4792 simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4793 return t*t*(3 - 2*t); 4794 } 4795 4796 static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) { 4797 simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); 4798 return t*t*(3 - 2*t); 4799 } 4800 4801 static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) { 4802 return x.x + x.y; 4803 } 4804 4805 static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) { 4806 return x.x + x.y + x.z; 4807 } 4808 4809 static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) { 4810 return simd_reduce_add(x.lo + x.hi); 4811 } 4812 4813 static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) { 4814 return simd_reduce_add(x.lo + x.hi); 4815 } 4816 4817 static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) { 4818 #if defined __arm64__ || defined __aarch64__ 4819 return vaddvq_s8(x); 4820 #else 4821 return simd_reduce_add(x.lo + x.hi); 4822 #endif 4823 } 4824 4825 static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) { 4826 return simd_reduce_add(x.lo + x.hi); 4827 } 4828 4829 static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) { 4830 return simd_reduce_add(x.lo + x.hi); 4831 } 4832 4833 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) { 4834 return x.x + x.y; 4835 } 4836 4837 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) { 4838 return x.x + x.y + x.z; 4839 } 4840 4841 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) { 4842 return simd_reduce_add(x.lo + x.hi); 4843 } 4844 4845 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) { 4846 return simd_reduce_add(x.lo + x.hi); 4847 } 4848 4849 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) { 4850 #if defined __arm64__ || defined __aarch64__ 4851 return vaddvq_u8(x); 4852 #else 4853 return simd_reduce_add(x.lo + x.hi); 4854 #endif 4855 } 4856 4857 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) { 4858 return simd_reduce_add(x.lo + x.hi); 4859 } 4860 4861 static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) { 4862 return simd_reduce_add(x.lo + x.hi); 4863 } 4864 4865 static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) { 4866 return x.x + x.y; 4867 } 4868 4869 static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) { 4870 return x.x + x.y + x.z; 4871 } 4872 4873 static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) { 4874 return simd_reduce_add(x.lo + x.hi); 4875 } 4876 4877 static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) { 4878 #if defined __arm64__ || defined __aarch64__ 4879 return vaddvq_s16(x); 4880 #else 4881 return simd_reduce_add(x.lo + x.hi); 4882 #endif 4883 } 4884 4885 static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) { 4886 return simd_reduce_add(x.lo + x.hi); 4887 } 4888 4889 static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) { 4890 return simd_reduce_add(x.lo + x.hi); 4891 } 4892 4893 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) { 4894 return x.x + x.y; 4895 } 4896 4897 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) { 4898 return x.x + x.y + x.z; 4899 } 4900 4901 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) { 4902 return simd_reduce_add(x.lo + x.hi); 4903 } 4904 4905 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) { 4906 #if defined __arm64__ || defined __aarch64__ 4907 return vaddvq_u16(x); 4908 #else 4909 return simd_reduce_add(x.lo + x.hi); 4910 #endif 4911 } 4912 4913 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) { 4914 return simd_reduce_add(x.lo + x.hi); 4915 } 4916 4917 static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) { 4918 return simd_reduce_add(x.lo + x.hi); 4919 } 4920 4921 static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) { 4922 return x.x + x.y; 4923 } 4924 4925 static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) { 4926 return x.x + x.y + x.z; 4927 } 4928 4929 static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) { 4930 #if defined __arm64__ || defined __aarch64__ 4931 return vaddvq_s32(x); 4932 #else 4933 return simd_reduce_add(x.lo + x.hi); 4934 #endif 4935 } 4936 4937 static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) { 4938 return simd_reduce_add(x.lo + x.hi); 4939 } 4940 4941 static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) { 4942 return simd_reduce_add(x.lo + x.hi); 4943 } 4944 4945 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) { 4946 return x.x + x.y; 4947 } 4948 4949 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) { 4950 return x.x + x.y + x.z; 4951 } 4952 4953 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) { 4954 #if defined __arm64__ || defined __aarch64__ 4955 return vaddvq_u32(x); 4956 #else 4957 return simd_reduce_add(x.lo + x.hi); 4958 #endif 4959 } 4960 4961 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) { 4962 return simd_reduce_add(x.lo + x.hi); 4963 } 4964 4965 static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) { 4966 return simd_reduce_add(x.lo + x.hi); 4967 } 4968 4969 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) { 4970 return x.x + x.y; 4971 } 4972 4973 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) { 4974 return x.x + x.y + x.z; 4975 } 4976 4977 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) { 4978 return simd_reduce_add(x.lo + x.hi); 4979 } 4980 4981 static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) { 4982 return simd_reduce_add(x.lo + x.hi); 4983 } 4984 4985 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) { 4986 return x.x + x.y; 4987 } 4988 4989 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) { 4990 return x.x + x.y + x.z; 4991 } 4992 4993 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) { 4994 return simd_reduce_add(x.lo + x.hi); 4995 } 4996 4997 static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) { 4998 return simd_reduce_add(x.lo + x.hi); 4999 } 5000 5001 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half2 x) { 5002 return x.x + x.y; 5003 } 5004 5005 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half3 x) { 5006 return x.x + x.y + x.z; 5007 } 5008 5009 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half4 x) { 5010 return simd_reduce_add(x.lo + x.hi); 5011 } 5012 5013 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half8 x) { 5014 return simd_reduce_add(x.lo + x.hi); 5015 } 5016 5017 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half16 x) { 5018 return simd_reduce_add(x.lo + x.hi); 5019 } 5020 5021 static inline SIMD_CFUNC _Float16 simd_reduce_add(simd_half32 x) { 5022 return simd_reduce_add(x.lo + x.hi); 5023 } 5024 5025 static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) { 5026 return x.x + x.y; 5027 } 5028 5029 static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) { 5030 return x.x + x.y + x.z; 5031 } 5032 5033 static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) { 5034 return simd_reduce_add(x.lo + x.hi); 5035 } 5036 5037 static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) { 5038 return simd_reduce_add(x.lo + x.hi); 5039 } 5040 5041 static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) { 5042 return simd_reduce_add(x.lo + x.hi); 5043 } 5044 5045 static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) { 5046 return x.x + x.y; 5047 } 5048 5049 static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) { 5050 return x.x + x.y + x.z; 5051 } 5052 5053 static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) { 5054 return simd_reduce_add(x.lo + x.hi); 5055 } 5056 5057 static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) { 5058 return simd_reduce_add(x.lo + x.hi); 5059 } 5060 5061 static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) { 5062 return x.y < x.x ? x.y : x.x; 5063 } 5064 5065 static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) { 5066 char t = x.z < x.x ? x.z : x.x; 5067 return x.y < t ? x.y : t; 5068 } 5069 5070 static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) { 5071 return simd_reduce_min(simd_min(x.lo, x.hi)); 5072 } 5073 5074 static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) { 5075 return simd_reduce_min(simd_min(x.lo, x.hi)); 5076 } 5077 5078 static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) { 5079 #if defined __arm64__ || defined __aarch64__ 5080 return vminvq_s8(x); 5081 #else 5082 return simd_reduce_min(simd_min(x.lo, x.hi)); 5083 #endif 5084 } 5085 5086 static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) { 5087 return simd_reduce_min(simd_min(x.lo, x.hi)); 5088 } 5089 5090 static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) { 5091 return simd_reduce_min(simd_min(x.lo, x.hi)); 5092 } 5093 5094 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) { 5095 return x.y < x.x ? x.y : x.x; 5096 } 5097 5098 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) { 5099 unsigned char t = x.z < x.x ? x.z : x.x; 5100 return x.y < t ? x.y : t; 5101 } 5102 5103 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) { 5104 return simd_reduce_min(simd_min(x.lo, x.hi)); 5105 } 5106 5107 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) { 5108 return simd_reduce_min(simd_min(x.lo, x.hi)); 5109 } 5110 5111 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) { 5112 #if defined __arm64__ || defined __aarch64__ 5113 return vminvq_u8(x); 5114 #else 5115 return simd_reduce_min(simd_min(x.lo, x.hi)); 5116 #endif 5117 } 5118 5119 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) { 5120 return simd_reduce_min(simd_min(x.lo, x.hi)); 5121 } 5122 5123 static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) { 5124 return simd_reduce_min(simd_min(x.lo, x.hi)); 5125 } 5126 5127 static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) { 5128 return x.y < x.x ? x.y : x.x; 5129 } 5130 5131 static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) { 5132 short t = x.z < x.x ? x.z : x.x; 5133 return x.y < t ? x.y : t; 5134 } 5135 5136 static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) { 5137 return simd_reduce_min(simd_min(x.lo, x.hi)); 5138 } 5139 5140 static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) { 5141 #if defined __arm64__ || defined __aarch64__ 5142 return vminvq_s16(x); 5143 #else 5144 return simd_reduce_min(simd_min(x.lo, x.hi)); 5145 #endif 5146 } 5147 5148 static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) { 5149 return simd_reduce_min(simd_min(x.lo, x.hi)); 5150 } 5151 5152 static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) { 5153 return simd_reduce_min(simd_min(x.lo, x.hi)); 5154 } 5155 5156 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) { 5157 return x.y < x.x ? x.y : x.x; 5158 } 5159 5160 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) { 5161 unsigned short t = x.z < x.x ? x.z : x.x; 5162 return x.y < t ? x.y : t; 5163 } 5164 5165 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) { 5166 return simd_reduce_min(simd_min(x.lo, x.hi)); 5167 } 5168 5169 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) { 5170 #if defined __arm64__ || defined __aarch64__ 5171 return vminvq_u16(x); 5172 #else 5173 return simd_reduce_min(simd_min(x.lo, x.hi)); 5174 #endif 5175 } 5176 5177 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) { 5178 return simd_reduce_min(simd_min(x.lo, x.hi)); 5179 } 5180 5181 static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) { 5182 return simd_reduce_min(simd_min(x.lo, x.hi)); 5183 } 5184 5185 static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) { 5186 return x.y < x.x ? x.y : x.x; 5187 } 5188 5189 static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) { 5190 int t = x.z < x.x ? x.z : x.x; 5191 return x.y < t ? x.y : t; 5192 } 5193 5194 static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) { 5195 #if defined __arm64__ || defined __aarch64__ 5196 return vminvq_s32(x); 5197 #else 5198 return simd_reduce_min(simd_min(x.lo, x.hi)); 5199 #endif 5200 } 5201 5202 static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) { 5203 return simd_reduce_min(simd_min(x.lo, x.hi)); 5204 } 5205 5206 static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) { 5207 return simd_reduce_min(simd_min(x.lo, x.hi)); 5208 } 5209 5210 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) { 5211 return x.y < x.x ? x.y : x.x; 5212 } 5213 5214 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) { 5215 unsigned int t = x.z < x.x ? x.z : x.x; 5216 return x.y < t ? x.y : t; 5217 } 5218 5219 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) { 5220 #if defined __arm64__ || defined __aarch64__ 5221 return vminvq_u32(x); 5222 #else 5223 return simd_reduce_min(simd_min(x.lo, x.hi)); 5224 #endif 5225 } 5226 5227 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) { 5228 return simd_reduce_min(simd_min(x.lo, x.hi)); 5229 } 5230 5231 static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) { 5232 return simd_reduce_min(simd_min(x.lo, x.hi)); 5233 } 5234 5235 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) { 5236 return x.y < x.x ? x.y : x.x; 5237 } 5238 5239 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) { 5240 simd_long1 t = x.z < x.x ? x.z : x.x; 5241 return x.y < t ? x.y : t; 5242 } 5243 5244 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) { 5245 return simd_reduce_min(simd_min(x.lo, x.hi)); 5246 } 5247 5248 static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) { 5249 return simd_reduce_min(simd_min(x.lo, x.hi)); 5250 } 5251 5252 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) { 5253 return x.y < x.x ? x.y : x.x; 5254 } 5255 5256 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) { 5257 simd_ulong1 t = x.z < x.x ? x.z : x.x; 5258 return x.y < t ? x.y : t; 5259 } 5260 5261 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) { 5262 return simd_reduce_min(simd_min(x.lo, x.hi)); 5263 } 5264 5265 static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) { 5266 return simd_reduce_min(simd_min(x.lo, x.hi)); 5267 } 5268 5269 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half2 x) { 5270 return __fminf16(x.x, x.y); 5271 } 5272 5273 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half3 x) { 5274 return __fminf16(__fminf16(x.x, x.z), x.y); 5275 } 5276 5277 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half4 x) { 5278 return simd_reduce_min(simd_min(x.lo, x.hi)); 5279 } 5280 5281 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half8 x) { 5282 return simd_reduce_min(simd_min(x.lo, x.hi)); 5283 } 5284 5285 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half16 x) { 5286 return simd_reduce_min(simd_min(x.lo, x.hi)); 5287 } 5288 5289 static inline SIMD_CFUNC _Float16 simd_reduce_min(simd_half32 x) { 5290 return simd_reduce_min(simd_min(x.lo, x.hi)); 5291 } 5292 5293 static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) { 5294 return fmin(x.x, x.y); 5295 } 5296 5297 static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) { 5298 return fmin(fmin(x.x, x.z), x.y); 5299 } 5300 5301 static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) { 5302 #if defined __arm64__ || defined __aarch64__ 5303 return vminvq_f32(x); 5304 #else 5305 return simd_reduce_min(simd_min(x.lo, x.hi)); 5306 #endif 5307 } 5308 5309 static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) { 5310 return simd_reduce_min(simd_min(x.lo, x.hi)); 5311 } 5312 5313 static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) { 5314 return simd_reduce_min(simd_min(x.lo, x.hi)); 5315 } 5316 5317 static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) { 5318 #if defined __arm64__ || defined __aarch64__ 5319 return vminvq_f64(x); 5320 #else 5321 return fmin(x.x, x.y); 5322 #endif 5323 } 5324 5325 static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) { 5326 return fmin(fmin(x.x, x.z), x.y); 5327 } 5328 5329 static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) { 5330 return simd_reduce_min(simd_min(x.lo, x.hi)); 5331 } 5332 5333 static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) { 5334 return simd_reduce_min(simd_min(x.lo, x.hi)); 5335 } 5336 5337 static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) { 5338 return x.y > x.x ? x.y : x.x; 5339 } 5340 5341 static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) { 5342 char t = x.z > x.x ? x.z : x.x; 5343 return x.y > t ? x.y : t; 5344 } 5345 5346 static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) { 5347 return simd_reduce_max(simd_max(x.lo, x.hi)); 5348 } 5349 5350 static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) { 5351 return simd_reduce_max(simd_max(x.lo, x.hi)); 5352 } 5353 5354 static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) { 5355 #if defined __arm64__ || defined __aarch64__ 5356 return vmaxvq_s8(x); 5357 #else 5358 return simd_reduce_max(simd_max(x.lo, x.hi)); 5359 #endif 5360 } 5361 5362 static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) { 5363 return simd_reduce_max(simd_max(x.lo, x.hi)); 5364 } 5365 5366 static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) { 5367 return simd_reduce_max(simd_max(x.lo, x.hi)); 5368 } 5369 5370 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) { 5371 return x.y > x.x ? x.y : x.x; 5372 } 5373 5374 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) { 5375 unsigned char t = x.z > x.x ? x.z : x.x; 5376 return x.y > t ? x.y : t; 5377 } 5378 5379 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) { 5380 return simd_reduce_max(simd_max(x.lo, x.hi)); 5381 } 5382 5383 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) { 5384 return simd_reduce_max(simd_max(x.lo, x.hi)); 5385 } 5386 5387 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) { 5388 #if defined __arm64__ || defined __aarch64__ 5389 return vmaxvq_u8(x); 5390 #else 5391 return simd_reduce_max(simd_max(x.lo, x.hi)); 5392 #endif 5393 } 5394 5395 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) { 5396 return simd_reduce_max(simd_max(x.lo, x.hi)); 5397 } 5398 5399 static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) { 5400 return simd_reduce_max(simd_max(x.lo, x.hi)); 5401 } 5402 5403 static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) { 5404 return x.y > x.x ? x.y : x.x; 5405 } 5406 5407 static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) { 5408 short t = x.z > x.x ? x.z : x.x; 5409 return x.y > t ? x.y : t; 5410 } 5411 5412 static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) { 5413 return simd_reduce_max(simd_max(x.lo, x.hi)); 5414 } 5415 5416 static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) { 5417 #if defined __arm64__ || defined __aarch64__ 5418 return vmaxvq_s16(x); 5419 #else 5420 return simd_reduce_max(simd_max(x.lo, x.hi)); 5421 #endif 5422 } 5423 5424 static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) { 5425 return simd_reduce_max(simd_max(x.lo, x.hi)); 5426 } 5427 5428 static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) { 5429 return simd_reduce_max(simd_max(x.lo, x.hi)); 5430 } 5431 5432 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) { 5433 return x.y > x.x ? x.y : x.x; 5434 } 5435 5436 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) { 5437 unsigned short t = x.z > x.x ? x.z : x.x; 5438 return x.y > t ? x.y : t; 5439 } 5440 5441 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) { 5442 return simd_reduce_max(simd_max(x.lo, x.hi)); 5443 } 5444 5445 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) { 5446 #if defined __arm64__ || defined __aarch64__ 5447 return vmaxvq_u16(x); 5448 #else 5449 return simd_reduce_max(simd_max(x.lo, x.hi)); 5450 #endif 5451 } 5452 5453 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) { 5454 return simd_reduce_max(simd_max(x.lo, x.hi)); 5455 } 5456 5457 static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) { 5458 return simd_reduce_max(simd_max(x.lo, x.hi)); 5459 } 5460 5461 static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) { 5462 return x.y > x.x ? x.y : x.x; 5463 } 5464 5465 static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) { 5466 int t = x.z > x.x ? x.z : x.x; 5467 return x.y > t ? x.y : t; 5468 } 5469 5470 static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) { 5471 #if defined __arm64__ || defined __aarch64__ 5472 return vmaxvq_s32(x); 5473 #else 5474 return simd_reduce_max(simd_max(x.lo, x.hi)); 5475 #endif 5476 } 5477 5478 static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) { 5479 return simd_reduce_max(simd_max(x.lo, x.hi)); 5480 } 5481 5482 static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) { 5483 return simd_reduce_max(simd_max(x.lo, x.hi)); 5484 } 5485 5486 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) { 5487 return x.y > x.x ? x.y : x.x; 5488 } 5489 5490 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) { 5491 unsigned int t = x.z > x.x ? x.z : x.x; 5492 return x.y > t ? x.y : t; 5493 } 5494 5495 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) { 5496 #if defined __arm64__ || defined __aarch64__ 5497 return vmaxvq_u32(x); 5498 #else 5499 return simd_reduce_max(simd_max(x.lo, x.hi)); 5500 #endif 5501 } 5502 5503 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) { 5504 return simd_reduce_max(simd_max(x.lo, x.hi)); 5505 } 5506 5507 static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) { 5508 return simd_reduce_max(simd_max(x.lo, x.hi)); 5509 } 5510 5511 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) { 5512 return x.y > x.x ? x.y : x.x; 5513 } 5514 5515 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) { 5516 simd_long1 t = x.z > x.x ? x.z : x.x; 5517 return x.y > t ? x.y : t; 5518 } 5519 5520 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) { 5521 return simd_reduce_max(simd_max(x.lo, x.hi)); 5522 } 5523 5524 static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) { 5525 return simd_reduce_max(simd_max(x.lo, x.hi)); 5526 } 5527 5528 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) { 5529 return x.y > x.x ? x.y : x.x; 5530 } 5531 5532 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) { 5533 simd_ulong1 t = x.z > x.x ? x.z : x.x; 5534 return x.y > t ? x.y : t; 5535 } 5536 5537 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) { 5538 return simd_reduce_max(simd_max(x.lo, x.hi)); 5539 } 5540 5541 static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) { 5542 return simd_reduce_max(simd_max(x.lo, x.hi)); 5543 } 5544 5545 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half2 x) { 5546 return __fmaxf16(x.x, x.y); 5547 } 5548 5549 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half3 x) { 5550 return __fmaxf16(__fmaxf16(x.x, x.z), x.y); 5551 } 5552 5553 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half4 x) { 5554 return simd_reduce_max(simd_max(x.lo, x.hi)); 5555 } 5556 5557 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half8 x) { 5558 return simd_reduce_max(simd_max(x.lo, x.hi)); 5559 } 5560 5561 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half16 x) { 5562 return simd_reduce_max(simd_max(x.lo, x.hi)); 5563 } 5564 5565 static inline SIMD_CFUNC _Float16 simd_reduce_max(simd_half32 x) { 5566 return simd_reduce_max(simd_max(x.lo, x.hi)); 5567 } 5568 5569 static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) { 5570 return fmax(x.x, x.y); 5571 } 5572 5573 static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) { 5574 return fmax(fmax(x.x, x.z), x.y); 5575 } 5576 5577 static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) { 5578 #if defined __arm64__ || defined __aarch64__ 5579 return vmaxvq_f32(x); 5580 #else 5581 return simd_reduce_max(simd_max(x.lo, x.hi)); 5582 #endif 5583 } 5584 5585 static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) { 5586 return simd_reduce_max(simd_max(x.lo, x.hi)); 5587 } 5588 5589 static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) { 5590 return simd_reduce_max(simd_max(x.lo, x.hi)); 5591 } 5592 5593 static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) { 5594 #if defined __arm64__ || defined __aarch64__ 5595 return vmaxvq_f64(x); 5596 #else 5597 return fmax(x.x, x.y); 5598 #endif 5599 } 5600 5601 static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) { 5602 return fmax(fmax(x.x, x.z), x.y); 5603 } 5604 5605 static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) { 5606 return simd_reduce_max(simd_max(x.lo, x.hi)); 5607 } 5608 5609 static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) { 5610 return simd_reduce_max(simd_max(x.lo, x.hi)); 5611 } 5612 5613 #ifdef __cplusplus 5614 } 5615 #endif 5616 #endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ 5617 #endif /* SIMD_COMMON_HEADER */