zig

fork of https://codeberg.org/ziglang/zig
Log | Files | Refs | README | LICENSE

matrix.h (242017B) - Raw


      1 /* Copyright (c) 2014-2017 Apple, Inc. All rights reserved.
      2  *
      3  *      Function                        Result
      4  *      ------------------------------------------------------------------
      5  *
      6  *      simd_diagonal_matrix(x)         A square matrix with the vector x
      7  *                                      as its diagonal.
      8  *
      9  *      simd_matrix(c0, c1, ... )       A matrix with the specified vectors
     10  *                                      as columns.
     11  *
     12  *      simd_matrix_from_rows(r0, r1, ... )  A matrix with the specified vectors
     13  *                                      as rows.
     14  *
     15  *      simd_mul(a,x)                   Scalar product a*x.
     16  *
     17  *      simd_linear_combination(a,x,b,y)  a*x + b*y.
     18  *
     19  *      simd_add(x,y)                   Macro wrapping linear_combination
     20  *                                      to compute x + y.
     21  *
     22  *      simd_sub(x,y)                   Macro wrapping linear_combination
     23  *                                      to compute x - y.
     24  *
     25  *      simd_transpose(x)               Transpose of the matrix x.
     26  *
     27  *      simd_trace(x)                   Trace of the matrix x.
     28  *
     29  *      simd_determinant(x)             Determinant of the matrix x.
     30  *
     31  *      simd_inverse(x)                 Inverse of x if x is non-singular.  If
     32  *                                      x is singular, the result is undefined.
     33  *
     34  *      simd_mul(x,y)                   If x is a matrix, returns the matrix
     35  *                                      product x*y, where y is either a matrix
     36  *                                      or a column vector.  If x is a vector,
     37  *                                      returns the product x*y where x is
     38  *                                      interpreted as a row vector.
     39  *
     40  *      simd_equal(x,y)                 Returns true if and only if every
     41  *                                      element of x is exactly equal to the
     42  *                                      corresponding element of y.
     43  *
     44  *      simd_almost_equal_elements(x,y,tol)
     45  *                                      Returns true if and only if for each
     46  *                                      entry xij in x, the corresponding
     47  *                                      element yij in y satisfies
     48  *                                      |xij - yij| <= tol.
     49  *
     50  *      simd_almost_equal_elements_relative(x,y,tol)
     51  *                                      Returns true if and only if for each
     52  *                                      entry xij in x, the corresponding
     53  *                                      element yij in y satisfies
     54  *                                      |xij - yij| <= tol*|xij|.
     55  *
     56  *  The header also defines a few useful global matrix objects:
     57  *  matrix_identity_floatNxM and matrix_identity_doubleNxM, may be used to get
     58  *  an identity matrix of the specified size.
     59  *
     60  *  In C++, we are able to use namespacing to make the functions more concise;
     61  *  we also overload some common arithmetic operators to work with the matrix
     62  *  types:
     63  *
     64  *      C++ Function                    Equivalent C Function
     65  *      --------------------------------------------------------------------
     66  *      simd::inverse                   simd_inverse
     67  *      simd::transpose                 simd_transpose
     68  *      operator+                       simd_add
     69  *      operator-                       simd_sub
     70  *      operator+=                      N/A
     71  *      operator-=                      N/A
     72  *      operator*                       simd_mul or simd_mul
     73  *      operator*=                      simd_mul or simd_mul
     74  *      operator==                      simd_equal
     75  *      operator!=                      !simd_equal
     76  *      simd::almost_equal_elements     simd_almost_equal_elements
     77  *      simd::almost_equal_elements_relative  simd_almost_equal_elements_relative
     78  *
     79  *  <simd/matrix_types.h> provides constructors for C++ matrix types.
     80  */
     81 
     82 #ifndef SIMD_MATRIX_HEADER
     83 #define SIMD_MATRIX_HEADER
     84 
     85 #include <simd/base.h>
     86 #if SIMD_COMPILER_HAS_REQUIRED_FEATURES
     87 #include <simd/matrix_types.h>
     88 #include <simd/geometry.h>
     89 #include <simd/extern.h>
     90 #include <simd/logic.h>
     91 
     92 #ifdef __cplusplus
     93     extern "C" {
     94 #endif
     95 
     96 extern const simd_half2x2  matrix_identity_half2x2   __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
     97 extern const simd_half3x3  matrix_identity_half3x3   __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
     98 extern const simd_half4x4  matrix_identity_half4x4   __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
     99 extern const simd_float2x2 matrix_identity_float2x2  __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    100 extern const simd_float3x3 matrix_identity_float3x3  __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    101 extern const simd_float4x4 matrix_identity_float4x4  __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    102 extern const simd_double2x2 matrix_identity_double2x2 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    103 extern const simd_double3x3 matrix_identity_double3x3 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    104 extern const simd_double4x4 matrix_identity_double4x4 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    105 
    106 static simd_half2x2 SIMD_CFUNC simd_diagonal_matrix(simd_half2 __x);
    107 static simd_half3x3 SIMD_CFUNC simd_diagonal_matrix(simd_half3 __x);
    108 static simd_half4x4 SIMD_CFUNC simd_diagonal_matrix(simd_half4 __x);
    109 static simd_float2x2 SIMD_CFUNC simd_diagonal_matrix(simd_float2 __x);
    110 static simd_float3x3 SIMD_CFUNC simd_diagonal_matrix(simd_float3 __x);
    111 static simd_float4x4 SIMD_CFUNC simd_diagonal_matrix(simd_float4 __x);
    112 static simd_double2x2 SIMD_CFUNC simd_diagonal_matrix(simd_double2 __x);
    113 static simd_double3x3 SIMD_CFUNC simd_diagonal_matrix(simd_double3 __x);
    114 static simd_double4x4 SIMD_CFUNC simd_diagonal_matrix(simd_double4 __x);
    115 #define matrix_from_diagonal simd_diagonal_matrix
    116 
    117 static simd_half2x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1);
    118 static simd_half3x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1, simd_half2  col2);
    119 static simd_half4x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1, simd_half2  col2, simd_half2  col3);
    120 static simd_half2x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1);
    121 static simd_half3x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1, simd_half3  col2);
    122 static simd_half4x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1, simd_half3  col2, simd_half3  col3);
    123 static simd_half2x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1);
    124 static simd_half3x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1, simd_half4  col2);
    125 static simd_half4x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1, simd_half4  col2, simd_half4  col3);
    126 static simd_float2x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1);
    127 static simd_float3x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2);
    128 static simd_float4x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2, simd_float2 col3);
    129 static simd_float2x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1);
    130 static simd_float3x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2);
    131 static simd_float4x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2, simd_float3 col3);
    132 static simd_float2x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1);
    133 static simd_float3x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2);
    134 static simd_float4x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2, simd_float4 col3);
    135 static simd_double2x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1);
    136 static simd_double3x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2);
    137 static simd_double4x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2, simd_double2 col3);
    138 static simd_double2x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1);
    139 static simd_double3x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2);
    140 static simd_double4x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2, simd_double3 col3);
    141 static simd_double2x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1);
    142 static simd_double3x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2);
    143 static simd_double4x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2, simd_double4 col3);
    144 #define matrix_from_columns simd_matrix
    145 
    146 static simd_half2x2 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1);
    147 static simd_half2x3 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1, simd_half2  row2);
    148 static simd_half2x4 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1, simd_half2  row2, simd_half2 row3);
    149 static simd_half3x2 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1);
    150 static simd_half3x3 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1, simd_half3  row2);
    151 static simd_half3x4 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1, simd_half3  row2, simd_half3 row3);
    152 static simd_half4x2 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1);
    153 static simd_half4x3 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1, simd_half4  row2);
    154 static simd_half4x4 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1, simd_half4  row2, simd_half4 row3);
    155 static simd_float2x2 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1);
    156 static simd_float2x3 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2);
    157 static simd_float2x4 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2, simd_float2 row3);
    158 static simd_float3x2 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1);
    159 static simd_float3x3 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2);
    160 static simd_float3x4 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2, simd_float3 row3);
    161 static simd_float4x2 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1);
    162 static simd_float4x3 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2);
    163 static simd_float4x4 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2, simd_float4 row3);
    164 static simd_double2x2 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1);
    165 static simd_double2x3 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2);
    166 static simd_double2x4 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2, simd_double2 row3);
    167 static simd_double3x2 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1);
    168 static simd_double3x3 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2);
    169 static simd_double3x4 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2, simd_double3 row3);
    170 static simd_double4x2 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1);
    171 static simd_double4x3 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2);
    172 static simd_double4x4 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2, simd_double4 row3);
    173 #define matrix_from_rows simd_matrix_from_rows
    174         
    175 static   simd_half3x3 SIMD_NOINLINE simd_matrix3x3(simd_quath q);
    176 static   simd_half4x4 SIMD_NOINLINE simd_matrix4x4(simd_quath q);
    177 static  simd_float3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatf q);
    178 static  simd_float4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatf q);
    179 static simd_double3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatd q);
    180 static simd_double4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatd q);
    181 
    182 static simd_half2x2  SIMD_CFUNC simd_mul(_Float16 __a, simd_half2x2 __x);
    183 static simd_half3x2  SIMD_CFUNC simd_mul(_Float16 __a, simd_half3x2 __x);
    184 static simd_half4x2  SIMD_CFUNC simd_mul(_Float16 __a, simd_half4x2 __x);
    185 static simd_half2x3  SIMD_CFUNC simd_mul(_Float16 __a, simd_half2x3 __x);
    186 static simd_half3x3  SIMD_CFUNC simd_mul(_Float16 __a, simd_half3x3 __x);
    187 static simd_half4x3  SIMD_CFUNC simd_mul(_Float16 __a, simd_half4x3 __x);
    188 static simd_half2x4  SIMD_CFUNC simd_mul(_Float16 __a, simd_half2x4 __x);
    189 static simd_half3x4  SIMD_CFUNC simd_mul(_Float16 __a, simd_half3x4 __x);
    190 static simd_half4x4  SIMD_CFUNC simd_mul(_Float16 __a, simd_half4x4 __x);
    191 static simd_float2x2 SIMD_CFUNC simd_mul(float __a, simd_float2x2 __x);
    192 static simd_float3x2 SIMD_CFUNC simd_mul(float __a, simd_float3x2 __x);
    193 static simd_float4x2 SIMD_CFUNC simd_mul(float __a, simd_float4x2 __x);
    194 static simd_float2x3 SIMD_CFUNC simd_mul(float __a, simd_float2x3 __x);
    195 static simd_float3x3 SIMD_CFUNC simd_mul(float __a, simd_float3x3 __x);
    196 static simd_float4x3 SIMD_CFUNC simd_mul(float __a, simd_float4x3 __x);
    197 static simd_float2x4 SIMD_CFUNC simd_mul(float __a, simd_float2x4 __x);
    198 static simd_float3x4 SIMD_CFUNC simd_mul(float __a, simd_float3x4 __x);
    199 static simd_float4x4 SIMD_CFUNC simd_mul(float __a, simd_float4x4 __x);
    200 static simd_double2x2 SIMD_CFUNC simd_mul(double __a, simd_double2x2 __x);
    201 static simd_double3x2 SIMD_CFUNC simd_mul(double __a, simd_double3x2 __x);
    202 static simd_double4x2 SIMD_CFUNC simd_mul(double __a, simd_double4x2 __x);
    203 static simd_double2x3 SIMD_CFUNC simd_mul(double __a, simd_double2x3 __x);
    204 static simd_double3x3 SIMD_CFUNC simd_mul(double __a, simd_double3x3 __x);
    205 static simd_double4x3 SIMD_CFUNC simd_mul(double __a, simd_double4x3 __x);
    206 static simd_double2x4 SIMD_CFUNC simd_mul(double __a, simd_double2x4 __x);
    207 static simd_double3x4 SIMD_CFUNC simd_mul(double __a, simd_double3x4 __x);
    208 static simd_double4x4 SIMD_CFUNC simd_mul(double __a, simd_double4x4 __x);
    209 
    210 static simd_half2x2 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half2x2 __x, _Float16 __b, simd_half2x2 __y);
    211 static simd_half3x2 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half3x2 __x, _Float16 __b, simd_half3x2 __y);
    212 static simd_half4x2 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half4x2 __x, _Float16 __b, simd_half4x2 __y);
    213 static simd_half2x3 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half2x3 __x, _Float16 __b, simd_half2x3 __y);
    214 static simd_half3x3 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half3x3 __x, _Float16 __b, simd_half3x3 __y);
    215 static simd_half4x3 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half4x3 __x, _Float16 __b, simd_half4x3 __y);
    216 static simd_half2x4 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half2x4 __x, _Float16 __b, simd_half2x4 __y);
    217 static simd_half3x4 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half3x4 __x, _Float16 __b, simd_half3x4 __y);
    218 static simd_half4x4 SIMD_CFUNC simd_linear_combination(_Float16 __a, simd_half4x4 __x, _Float16 __b, simd_half4x4 __y);
    219 static simd_float2x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x2 __x, float __b, simd_float2x2 __y);
    220 static simd_float3x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x2 __x, float __b, simd_float3x2 __y);
    221 static simd_float4x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x2 __x, float __b, simd_float4x2 __y);
    222 static simd_float2x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x3 __x, float __b, simd_float2x3 __y);
    223 static simd_float3x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x3 __x, float __b, simd_float3x3 __y);
    224 static simd_float4x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x3 __x, float __b, simd_float4x3 __y);
    225 static simd_float2x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x4 __x, float __b, simd_float2x4 __y);
    226 static simd_float3x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x4 __x, float __b, simd_float3x4 __y);
    227 static simd_float4x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x4 __x, float __b, simd_float4x4 __y);
    228 static simd_double2x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x2 __x, double __b, simd_double2x2 __y);
    229 static simd_double3x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x2 __x, double __b, simd_double3x2 __y);
    230 static simd_double4x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x2 __x, double __b, simd_double4x2 __y);
    231 static simd_double2x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x3 __x, double __b, simd_double2x3 __y);
    232 static simd_double3x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x3 __x, double __b, simd_double3x3 __y);
    233 static simd_double4x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x3 __x, double __b, simd_double4x3 __y);
    234 static simd_double2x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x4 __x, double __b, simd_double2x4 __y);
    235 static simd_double3x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x4 __x, double __b, simd_double3x4 __y);
    236 static simd_double4x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x4 __x, double __b, simd_double4x4 __y);
    237 #define matrix_linear_combination simd_linear_combination
    238       
    239 static simd_half2x2 SIMD_CFUNC simd_add(simd_half2x2  __x, simd_half2x2  __y);
    240 static simd_half3x2 SIMD_CFUNC simd_add(simd_half3x2  __x, simd_half3x2  __y);
    241 static simd_half4x2 SIMD_CFUNC simd_add(simd_half4x2  __x, simd_half4x2  __y);
    242 static simd_half2x3 SIMD_CFUNC simd_add(simd_half2x3  __x, simd_half2x3  __y);
    243 static simd_half3x3 SIMD_CFUNC simd_add(simd_half3x3  __x, simd_half3x3  __y);
    244 static simd_half4x3 SIMD_CFUNC simd_add(simd_half4x3  __x, simd_half4x3  __y);
    245 static simd_half2x4 SIMD_CFUNC simd_add(simd_half2x4  __x, simd_half2x4  __y);
    246 static simd_half3x4 SIMD_CFUNC simd_add(simd_half3x4  __x, simd_half3x4  __y);
    247 static simd_half4x4 SIMD_CFUNC simd_add(simd_half4x4  __x, simd_half4x4  __y);
    248 static simd_float2x2 SIMD_CFUNC simd_add(simd_float2x2 __x, simd_float2x2 __y);
    249 static simd_float3x2 SIMD_CFUNC simd_add(simd_float3x2 __x, simd_float3x2 __y);
    250 static simd_float4x2 SIMD_CFUNC simd_add(simd_float4x2 __x, simd_float4x2 __y);
    251 static simd_float2x3 SIMD_CFUNC simd_add(simd_float2x3 __x, simd_float2x3 __y);
    252 static simd_float3x3 SIMD_CFUNC simd_add(simd_float3x3 __x, simd_float3x3 __y);
    253 static simd_float4x3 SIMD_CFUNC simd_add(simd_float4x3 __x, simd_float4x3 __y);
    254 static simd_float2x4 SIMD_CFUNC simd_add(simd_float2x4 __x, simd_float2x4 __y);
    255 static simd_float3x4 SIMD_CFUNC simd_add(simd_float3x4 __x, simd_float3x4 __y);
    256 static simd_float4x4 SIMD_CFUNC simd_add(simd_float4x4 __x, simd_float4x4 __y);
    257 static simd_double2x2 SIMD_CFUNC simd_add(simd_double2x2 __x, simd_double2x2 __y);
    258 static simd_double3x2 SIMD_CFUNC simd_add(simd_double3x2 __x, simd_double3x2 __y);
    259 static simd_double4x2 SIMD_CFUNC simd_add(simd_double4x2 __x, simd_double4x2 __y);
    260 static simd_double2x3 SIMD_CFUNC simd_add(simd_double2x3 __x, simd_double2x3 __y);
    261 static simd_double3x3 SIMD_CFUNC simd_add(simd_double3x3 __x, simd_double3x3 __y);
    262 static simd_double4x3 SIMD_CFUNC simd_add(simd_double4x3 __x, simd_double4x3 __y);
    263 static simd_double2x4 SIMD_CFUNC simd_add(simd_double2x4 __x, simd_double2x4 __y);
    264 static simd_double3x4 SIMD_CFUNC simd_add(simd_double3x4 __x, simd_double3x4 __y);
    265 static simd_double4x4 SIMD_CFUNC simd_add(simd_double4x4 __x, simd_double4x4 __y);
    266 #define matrix_add simd_add
    267       
    268 static simd_half2x2 SIMD_CFUNC simd_sub(simd_half2x2  __x, simd_half2x2  __y);
    269 static simd_half3x2 SIMD_CFUNC simd_sub(simd_half3x2  __x, simd_half3x2  __y);
    270 static simd_half4x2 SIMD_CFUNC simd_sub(simd_half4x2  __x, simd_half4x2  __y);
    271 static simd_half2x3 SIMD_CFUNC simd_sub(simd_half2x3  __x, simd_half2x3  __y);
    272 static simd_half3x3 SIMD_CFUNC simd_sub(simd_half3x3  __x, simd_half3x3  __y);
    273 static simd_half4x3 SIMD_CFUNC simd_sub(simd_half4x3  __x, simd_half4x3  __y);
    274 static simd_half2x4 SIMD_CFUNC simd_sub(simd_half2x4  __x, simd_half2x4  __y);
    275 static simd_half3x4 SIMD_CFUNC simd_sub(simd_half3x4  __x, simd_half3x4  __y);
    276 static simd_half4x4 SIMD_CFUNC simd_sub(simd_half4x4  __x, simd_half4x4  __y);
    277 static simd_float2x2 SIMD_CFUNC simd_sub(simd_float2x2 __x, simd_float2x2 __y);
    278 static simd_float3x2 SIMD_CFUNC simd_sub(simd_float3x2 __x, simd_float3x2 __y);
    279 static simd_float4x2 SIMD_CFUNC simd_sub(simd_float4x2 __x, simd_float4x2 __y);
    280 static simd_float2x3 SIMD_CFUNC simd_sub(simd_float2x3 __x, simd_float2x3 __y);
    281 static simd_float3x3 SIMD_CFUNC simd_sub(simd_float3x3 __x, simd_float3x3 __y);
    282 static simd_float4x3 SIMD_CFUNC simd_sub(simd_float4x3 __x, simd_float4x3 __y);
    283 static simd_float2x4 SIMD_CFUNC simd_sub(simd_float2x4 __x, simd_float2x4 __y);
    284 static simd_float3x4 SIMD_CFUNC simd_sub(simd_float3x4 __x, simd_float3x4 __y);
    285 static simd_float4x4 SIMD_CFUNC simd_sub(simd_float4x4 __x, simd_float4x4 __y);
    286 static simd_double2x2 SIMD_CFUNC simd_sub(simd_double2x2 __x, simd_double2x2 __y);
    287 static simd_double3x2 SIMD_CFUNC simd_sub(simd_double3x2 __x, simd_double3x2 __y);
    288 static simd_double4x2 SIMD_CFUNC simd_sub(simd_double4x2 __x, simd_double4x2 __y);
    289 static simd_double2x3 SIMD_CFUNC simd_sub(simd_double2x3 __x, simd_double2x3 __y);
    290 static simd_double3x3 SIMD_CFUNC simd_sub(simd_double3x3 __x, simd_double3x3 __y);
    291 static simd_double4x3 SIMD_CFUNC simd_sub(simd_double4x3 __x, simd_double4x3 __y);
    292 static simd_double2x4 SIMD_CFUNC simd_sub(simd_double2x4 __x, simd_double2x4 __y);
    293 static simd_double3x4 SIMD_CFUNC simd_sub(simd_double3x4 __x, simd_double3x4 __y);
    294 static simd_double4x4 SIMD_CFUNC simd_sub(simd_double4x4 __x, simd_double4x4 __y);
    295 #define matrix_sub simd_sub
    296 
    297 static simd_half2x2 SIMD_CFUNC simd_transpose(simd_half2x2  __x);
    298 static simd_half2x3 SIMD_CFUNC simd_transpose(simd_half3x2  __x);
    299 static simd_half2x4 SIMD_CFUNC simd_transpose(simd_half4x2  __x);
    300 static simd_half3x2 SIMD_CFUNC simd_transpose(simd_half2x3  __x);
    301 static simd_half3x3 SIMD_CFUNC simd_transpose(simd_half3x3  __x);
    302 static simd_half3x4 SIMD_CFUNC simd_transpose(simd_half4x3  __x);
    303 static simd_half4x2 SIMD_CFUNC simd_transpose(simd_half2x4  __x);
    304 static simd_half4x3 SIMD_CFUNC simd_transpose(simd_half3x4  __x);
    305 static simd_half4x4 SIMD_CFUNC simd_transpose(simd_half4x4  __x);
    306 static simd_float2x2 SIMD_CFUNC simd_transpose(simd_float2x2 __x);
    307 static simd_float2x3 SIMD_CFUNC simd_transpose(simd_float3x2 __x);
    308 static simd_float2x4 SIMD_CFUNC simd_transpose(simd_float4x2 __x);
    309 static simd_float3x2 SIMD_CFUNC simd_transpose(simd_float2x3 __x);
    310 static simd_float3x3 SIMD_CFUNC simd_transpose(simd_float3x3 __x);
    311 static simd_float3x4 SIMD_CFUNC simd_transpose(simd_float4x3 __x);
    312 static simd_float4x2 SIMD_CFUNC simd_transpose(simd_float2x4 __x);
    313 static simd_float4x3 SIMD_CFUNC simd_transpose(simd_float3x4 __x);
    314 static simd_float4x4 SIMD_CFUNC simd_transpose(simd_float4x4 __x);
    315 static simd_double2x2 SIMD_CFUNC simd_transpose(simd_double2x2 __x);
    316 static simd_double2x3 SIMD_CFUNC simd_transpose(simd_double3x2 __x);
    317 static simd_double2x4 SIMD_CFUNC simd_transpose(simd_double4x2 __x);
    318 static simd_double3x2 SIMD_CFUNC simd_transpose(simd_double2x3 __x);
    319 static simd_double3x3 SIMD_CFUNC simd_transpose(simd_double3x3 __x);
    320 static simd_double3x4 SIMD_CFUNC simd_transpose(simd_double4x3 __x);
    321 static simd_double4x2 SIMD_CFUNC simd_transpose(simd_double2x4 __x);
    322 static simd_double4x3 SIMD_CFUNC simd_transpose(simd_double3x4 __x);
    323 static simd_double4x4 SIMD_CFUNC simd_transpose(simd_double4x4 __x);
    324 #define matrix_transpose simd_transpose
    325 
    326 static _Float16 SIMD_CFUNC simd_trace(simd_half2x2 __x);
    327 static _Float16 SIMD_CFUNC simd_trace(simd_half3x3 __x);
    328 static _Float16 SIMD_CFUNC simd_trace(simd_half4x4 __x);
    329 static float SIMD_CFUNC simd_trace(simd_float2x2 __x);
    330 static float SIMD_CFUNC simd_trace(simd_float3x3 __x);
    331 static float SIMD_CFUNC simd_trace(simd_float4x4 __x);
    332 static double SIMD_CFUNC simd_trace(simd_double2x2 __x);
    333 static double SIMD_CFUNC simd_trace(simd_double3x3 __x);
    334 static double SIMD_CFUNC simd_trace(simd_double4x4 __x);
    335 #define matrix_trace simd_trace
    336 
    337 static _Float16 SIMD_CFUNC simd_determinant(simd_half2x2 __x);
    338 static _Float16 SIMD_CFUNC simd_determinant(simd_half3x3 __x);
    339 static _Float16 SIMD_CFUNC simd_determinant(simd_half4x4 __x);
    340 static float SIMD_CFUNC simd_determinant(simd_float2x2 __x);
    341 static float SIMD_CFUNC simd_determinant(simd_float3x3 __x);
    342 static float SIMD_CFUNC simd_determinant(simd_float4x4 __x);
    343 static double SIMD_CFUNC simd_determinant(simd_double2x2 __x);
    344 static double SIMD_CFUNC simd_determinant(simd_double3x3 __x);
    345 static double SIMD_CFUNC simd_determinant(simd_double4x4 __x);
    346 #define matrix_determinant simd_determinant
    347 
    348 static simd_half2x2 SIMD_CFUNC simd_inverse(simd_half2x2 __x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
    349 static simd_half3x3 SIMD_CFUNC simd_inverse(simd_half3x3 __x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
    350 static simd_half4x4 SIMD_CFUNC simd_inverse(simd_half4x4 __x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0));
    351 static simd_float2x2 SIMD_CFUNC simd_inverse(simd_float2x2 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    352 static simd_float3x3 SIMD_CFUNC simd_inverse(simd_float3x3 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    353 static simd_float4x4 SIMD_CFUNC simd_inverse(simd_float4x4 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    354 static simd_double2x2 SIMD_CFUNC simd_inverse(simd_double2x2 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    355 static simd_double3x3 SIMD_CFUNC simd_inverse(simd_double3x3 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    356 static simd_double4x4 SIMD_CFUNC simd_inverse(simd_double4x4 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0));
    357 #define matrix_invert simd_inverse
    358 
    359 static simd_half2 SIMD_CFUNC simd_mul(simd_half2x2 __x, simd_half2 __y);
    360 static simd_half2 SIMD_CFUNC simd_mul(simd_half3x2 __x, simd_half3 __y);
    361 static simd_half2 SIMD_CFUNC simd_mul(simd_half4x2 __x, simd_half4 __y);
    362 static simd_half3 SIMD_CFUNC simd_mul(simd_half2x3 __x, simd_half2 __y);
    363 static simd_half3 SIMD_CFUNC simd_mul(simd_half3x3 __x, simd_half3 __y);
    364 static simd_half3 SIMD_CFUNC simd_mul(simd_half4x3 __x, simd_half4 __y);
    365 static simd_half4 SIMD_CFUNC simd_mul(simd_half2x4 __x, simd_half2 __y);
    366 static simd_half4 SIMD_CFUNC simd_mul(simd_half3x4 __x, simd_half3 __y);
    367 static simd_half4 SIMD_CFUNC simd_mul(simd_half4x4 __x, simd_half4 __y);
    368 static simd_float2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float2 __y);
    369 static simd_float2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float3 __y);
    370 static simd_float2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float4 __y);
    371 static simd_float3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float2 __y);
    372 static simd_float3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float3 __y);
    373 static simd_float3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float4 __y);
    374 static simd_float4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float2 __y);
    375 static simd_float4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float3 __y);
    376 static simd_float4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float4 __y);
    377 static simd_double2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2 __y);
    378 static simd_double2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3 __y);
    379 static simd_double2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4 __y);
    380 static simd_double3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2 __y);
    381 static simd_double3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3 __y);
    382 static simd_double3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4 __y);
    383 static simd_double4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2 __y);
    384 static simd_double4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3 __y);
    385 static simd_double4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4 __y);
    386 static simd_half2 SIMD_CFUNC simd_mul(simd_half2 __x, simd_half2x2 __y);
    387 static simd_half3 SIMD_CFUNC simd_mul(simd_half2 __x, simd_half3x2 __y);
    388 static simd_half4 SIMD_CFUNC simd_mul(simd_half2 __x, simd_half4x2 __y);
    389 static simd_half2 SIMD_CFUNC simd_mul(simd_half3 __x, simd_half2x3 __y);
    390 static simd_half3 SIMD_CFUNC simd_mul(simd_half3 __x, simd_half3x3 __y);
    391 static simd_half4 SIMD_CFUNC simd_mul(simd_half3 __x, simd_half4x3 __y);
    392 static simd_half2 SIMD_CFUNC simd_mul(simd_half4 __x, simd_half2x4 __y);
    393 static simd_half3 SIMD_CFUNC simd_mul(simd_half4 __x, simd_half3x4 __y);
    394 static simd_half4 SIMD_CFUNC simd_mul(simd_half4 __x, simd_half4x4 __y);
    395 static simd_float2 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float2x2 __y);
    396 static simd_float3 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float3x2 __y);
    397 static simd_float4 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float4x2 __y);
    398 static simd_float2 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float2x3 __y);
    399 static simd_float3 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float3x3 __y);
    400 static simd_float4 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float4x3 __y);
    401 static simd_float2 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float2x4 __y);
    402 static simd_float3 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float3x4 __y);
    403 static simd_float4 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float4x4 __y);
    404 static simd_double2 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double2x2 __y);
    405 static simd_double3 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double3x2 __y);
    406 static simd_double4 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double4x2 __y);
    407 static simd_double2 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double2x3 __y);
    408 static simd_double3 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double3x3 __y);
    409 static simd_double4 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double4x3 __y);
    410 static simd_double2 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double2x4 __y);
    411 static simd_double3 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double3x4 __y);
    412 static simd_double4 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double4x4 __y);
    413 static simd_half2x2 SIMD_CFUNC simd_mul(simd_half2x2 __x, simd_half2x2 __y);
    414 static simd_half3x2 SIMD_CFUNC simd_mul(simd_half2x2 __x, simd_half3x2 __y);
    415 static simd_half4x2 SIMD_CFUNC simd_mul(simd_half2x2 __x, simd_half4x2 __y);
    416 static simd_half2x3 SIMD_CFUNC simd_mul(simd_half2x3 __x, simd_half2x2 __y);
    417 static simd_half3x3 SIMD_CFUNC simd_mul(simd_half2x3 __x, simd_half3x2 __y);
    418 static simd_half4x3 SIMD_CFUNC simd_mul(simd_half2x3 __x, simd_half4x2 __y);
    419 static simd_half2x4 SIMD_CFUNC simd_mul(simd_half2x4 __x, simd_half2x2 __y);
    420 static simd_half3x4 SIMD_CFUNC simd_mul(simd_half2x4 __x, simd_half3x2 __y);
    421 static simd_half4x4 SIMD_CFUNC simd_mul(simd_half2x4 __x, simd_half4x2 __y);
    422 static simd_float2x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float2x2 __y);
    423 static simd_float3x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float3x2 __y);
    424 static simd_float4x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float4x2 __y);
    425 static simd_float2x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float2x2 __y);
    426 static simd_float3x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float3x2 __y);
    427 static simd_float4x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float4x2 __y);
    428 static simd_float2x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float2x2 __y);
    429 static simd_float3x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float3x2 __y);
    430 static simd_float4x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float4x2 __y);
    431 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2x2 __y);
    432 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double3x2 __y);
    433 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double4x2 __y);
    434 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2x2 __y);
    435 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double3x2 __y);
    436 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double4x2 __y);
    437 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2x2 __y);
    438 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double3x2 __y);
    439 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double4x2 __y);
    440 static simd_half2x2 SIMD_CFUNC simd_mul(simd_half3x2 __x, simd_half2x3 __y);
    441 static simd_half3x2 SIMD_CFUNC simd_mul(simd_half3x2 __x, simd_half3x3 __y);
    442 static simd_half4x2 SIMD_CFUNC simd_mul(simd_half3x2 __x, simd_half4x3 __y);
    443 static simd_half2x3 SIMD_CFUNC simd_mul(simd_half3x3 __x, simd_half2x3 __y);
    444 static simd_half3x3 SIMD_CFUNC simd_mul(simd_half3x3 __x, simd_half3x3 __y);
    445 static simd_half4x3 SIMD_CFUNC simd_mul(simd_half3x3 __x, simd_half4x3 __y);
    446 static simd_half2x4 SIMD_CFUNC simd_mul(simd_half3x4 __x, simd_half2x3 __y);
    447 static simd_half3x4 SIMD_CFUNC simd_mul(simd_half3x4 __x, simd_half3x3 __y);
    448 static simd_half4x4 SIMD_CFUNC simd_mul(simd_half3x4 __x, simd_half4x3 __y);
    449 static simd_float2x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float2x3 __y);
    450 static simd_float3x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float3x3 __y);
    451 static simd_float4x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float4x3 __y);
    452 static simd_float2x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float2x3 __y);
    453 static simd_float3x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float3x3 __y);
    454 static simd_float4x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float4x3 __y);
    455 static simd_float2x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float2x3 __y);
    456 static simd_float3x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float3x3 __y);
    457 static simd_float4x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float4x3 __y);
    458 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double2x3 __y);
    459 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3x3 __y);
    460 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double4x3 __y);
    461 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double2x3 __y);
    462 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3x3 __y);
    463 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double4x3 __y);
    464 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double2x3 __y);
    465 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3x3 __y);
    466 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double4x3 __y);
    467 static simd_half2x2 SIMD_CFUNC simd_mul(simd_half4x2 __x, simd_half2x4 __y);
    468 static simd_half3x2 SIMD_CFUNC simd_mul(simd_half4x2 __x, simd_half3x4 __y);
    469 static simd_half4x2 SIMD_CFUNC simd_mul(simd_half4x2 __x, simd_half4x4 __y);
    470 static simd_half2x3 SIMD_CFUNC simd_mul(simd_half4x3 __x, simd_half2x4 __y);
    471 static simd_half3x3 SIMD_CFUNC simd_mul(simd_half4x3 __x, simd_half3x4 __y);
    472 static simd_half4x3 SIMD_CFUNC simd_mul(simd_half4x3 __x, simd_half4x4 __y);
    473 static simd_half2x4 SIMD_CFUNC simd_mul(simd_half4x4 __x, simd_half2x4 __y);
    474 static simd_half3x4 SIMD_CFUNC simd_mul(simd_half4x4 __x, simd_half3x4 __y);
    475 static simd_half4x4 SIMD_CFUNC simd_mul(simd_half4x4 __x, simd_half4x4 __y);
    476 static simd_float2x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float2x4 __y);
    477 static simd_float3x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float3x4 __y);
    478 static simd_float4x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float4x4 __y);
    479 static simd_float2x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float2x4 __y);
    480 static simd_float3x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float3x4 __y);
    481 static simd_float4x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float4x4 __y);
    482 static simd_float2x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float2x4 __y);
    483 static simd_float3x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float3x4 __y);
    484 static simd_float4x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float4x4 __y);
    485 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double2x4 __y);
    486 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double3x4 __y);
    487 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4x4 __y);
    488 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double2x4 __y);
    489 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double3x4 __y);
    490 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4x4 __y);
    491 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double2x4 __y);
    492 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double3x4 __y);
    493 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4x4 __y);
    494     
    495 static simd_bool SIMD_CFUNC simd_equal(simd_half2x2 __x, simd_half2x2 __y);
    496 static simd_bool SIMD_CFUNC simd_equal(simd_half2x3 __x, simd_half2x3 __y);
    497 static simd_bool SIMD_CFUNC simd_equal(simd_half2x4 __x, simd_half2x4 __y);
    498 static simd_bool SIMD_CFUNC simd_equal(simd_half3x2 __x, simd_half3x2 __y);
    499 static simd_bool SIMD_CFUNC simd_equal(simd_half3x3 __x, simd_half3x3 __y);
    500 static simd_bool SIMD_CFUNC simd_equal(simd_half3x4 __x, simd_half3x4 __y);
    501 static simd_bool SIMD_CFUNC simd_equal(simd_half4x2 __x, simd_half4x2 __y);
    502 static simd_bool SIMD_CFUNC simd_equal(simd_half4x3 __x, simd_half4x3 __y);
    503 static simd_bool SIMD_CFUNC simd_equal(simd_half4x4 __x, simd_half4x4 __y);
    504 static simd_bool SIMD_CFUNC simd_equal(simd_float2x2 __x, simd_float2x2 __y);
    505 static simd_bool SIMD_CFUNC simd_equal(simd_float2x3 __x, simd_float2x3 __y);
    506 static simd_bool SIMD_CFUNC simd_equal(simd_float2x4 __x, simd_float2x4 __y);
    507 static simd_bool SIMD_CFUNC simd_equal(simd_float3x2 __x, simd_float3x2 __y);
    508 static simd_bool SIMD_CFUNC simd_equal(simd_float3x3 __x, simd_float3x3 __y);
    509 static simd_bool SIMD_CFUNC simd_equal(simd_float3x4 __x, simd_float3x4 __y);
    510 static simd_bool SIMD_CFUNC simd_equal(simd_float4x2 __x, simd_float4x2 __y);
    511 static simd_bool SIMD_CFUNC simd_equal(simd_float4x3 __x, simd_float4x3 __y);
    512 static simd_bool SIMD_CFUNC simd_equal(simd_float4x4 __x, simd_float4x4 __y);
    513 static simd_bool SIMD_CFUNC simd_equal(simd_double2x2 __x, simd_double2x2 __y);
    514 static simd_bool SIMD_CFUNC simd_equal(simd_double2x3 __x, simd_double2x3 __y);
    515 static simd_bool SIMD_CFUNC simd_equal(simd_double2x4 __x, simd_double2x4 __y);
    516 static simd_bool SIMD_CFUNC simd_equal(simd_double3x2 __x, simd_double3x2 __y);
    517 static simd_bool SIMD_CFUNC simd_equal(simd_double3x3 __x, simd_double3x3 __y);
    518 static simd_bool SIMD_CFUNC simd_equal(simd_double3x4 __x, simd_double3x4 __y);
    519 static simd_bool SIMD_CFUNC simd_equal(simd_double4x2 __x, simd_double4x2 __y);
    520 static simd_bool SIMD_CFUNC simd_equal(simd_double4x3 __x, simd_double4x3 __y);
    521 static simd_bool SIMD_CFUNC simd_equal(simd_double4x4 __x, simd_double4x4 __y);
    522 #define matrix_equal simd_equal
    523       
    524 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x2 __x, simd_half2x2 __y, _Float16 __tol);
    525 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x3 __x, simd_half2x3 __y, _Float16 __tol);
    526 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x4 __x, simd_half2x4 __y, _Float16 __tol);
    527 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x2 __x, simd_half3x2 __y, _Float16 __tol);
    528 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x3 __x, simd_half3x3 __y, _Float16 __tol);
    529 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x4 __x, simd_half3x4 __y, _Float16 __tol);
    530 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x2 __x, simd_half4x2 __y, _Float16 __tol);
    531 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x3 __x, simd_half4x3 __y, _Float16 __tol);
    532 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x4 __x, simd_half4x4 __y, _Float16 __tol);
    533 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x2 __x, simd_float2x2 __y, float __tol);
    534 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x3 __x, simd_float2x3 __y, float __tol);
    535 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x4 __x, simd_float2x4 __y, float __tol);
    536 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x2 __x, simd_float3x2 __y, float __tol);
    537 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x3 __x, simd_float3x3 __y, float __tol);
    538 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x4 __x, simd_float3x4 __y, float __tol);
    539 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x2 __x, simd_float4x2 __y, float __tol);
    540 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x3 __x, simd_float4x3 __y, float __tol);
    541 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x4 __x, simd_float4x4 __y, float __tol);
    542 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x2 __x, simd_double2x2 __y, double __tol);
    543 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x3 __x, simd_double2x3 __y, double __tol);
    544 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x4 __x, simd_double2x4 __y, double __tol);
    545 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x2 __x, simd_double3x2 __y, double __tol);
    546 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x3 __x, simd_double3x3 __y, double __tol);
    547 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x4 __x, simd_double3x4 __y, double __tol);
    548 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x2 __x, simd_double4x2 __y, double __tol);
    549 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x3 __x, simd_double4x3 __y, double __tol);
    550 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x4 __x, simd_double4x4 __y, double __tol);
    551 #define matrix_almost_equal_elements simd_almost_equal_elements
    552       
    553 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x2 __x, simd_half2x2 __y, _Float16 __tol);
    554 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x3 __x, simd_half2x3 __y, _Float16 __tol);
    555 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x4 __x, simd_half2x4 __y, _Float16 __tol);
    556 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x2 __x, simd_half3x2 __y, _Float16 __tol);
    557 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x3 __x, simd_half3x3 __y, _Float16 __tol);
    558 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x4 __x, simd_half3x4 __y, _Float16 __tol);
    559 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x2 __x, simd_half4x2 __y, _Float16 __tol);
    560 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x3 __x, simd_half4x3 __y, _Float16 __tol);
    561 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x4 __x, simd_half4x4 __y, _Float16 __tol);
    562 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x2 __x, simd_float2x2 __y, float __tol);
    563 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x3 __x, simd_float2x3 __y, float __tol);
    564 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x4 __x, simd_float2x4 __y, float __tol);
    565 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x2 __x, simd_float3x2 __y, float __tol);
    566 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x3 __x, simd_float3x3 __y, float __tol);
    567 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x4 __x, simd_float3x4 __y, float __tol);
    568 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x2 __x, simd_float4x2 __y, float __tol);
    569 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x3 __x, simd_float4x3 __y, float __tol);
    570 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x4 __x, simd_float4x4 __y, float __tol);
    571 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x2 __x, simd_double2x2 __y, double __tol);
    572 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x3 __x, simd_double2x3 __y, double __tol);
    573 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x4 __x, simd_double2x4 __y, double __tol);
    574 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x2 __x, simd_double3x2 __y, double __tol);
    575 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x3 __x, simd_double3x3 __y, double __tol);
    576 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x4 __x, simd_double3x4 __y, double __tol);
    577 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x2 __x, simd_double4x2 __y, double __tol);
    578 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x3 __x, simd_double4x3 __y, double __tol);
    579 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x4 __x, simd_double4x4 __y, double __tol);
    580 #define matrix_almost_equal_elements_relative simd_almost_equal_elements_relative
    581 
    582 #ifdef __cplusplus
    583 } /* extern "C" */
    584 
    585 namespace simd {
    586   static SIMD_CPPFUNC half2x2 operator+(const half2x2 x, const half2x2 y) { return half2x2(::simd_linear_combination(1, x, 1, y)); }
    587   static SIMD_CPPFUNC half2x3 operator+(const half2x3 x, const half2x3 y) { return half2x3(::simd_linear_combination(1, x, 1, y)); }
    588   static SIMD_CPPFUNC half2x4 operator+(const half2x4 x, const half2x4 y) { return half2x4(::simd_linear_combination(1, x, 1, y)); }
    589   static SIMD_CPPFUNC half3x2 operator+(const half3x2 x, const half3x2 y) { return half3x2(::simd_linear_combination(1, x, 1, y)); }
    590   static SIMD_CPPFUNC half3x3 operator+(const half3x3 x, const half3x3 y) { return half3x3(::simd_linear_combination(1, x, 1, y)); }
    591   static SIMD_CPPFUNC half3x4 operator+(const half3x4 x, const half3x4 y) { return half3x4(::simd_linear_combination(1, x, 1, y)); }
    592   static SIMD_CPPFUNC half4x2 operator+(const half4x2 x, const half4x2 y) { return half4x2(::simd_linear_combination(1, x, 1, y)); }
    593   static SIMD_CPPFUNC half4x3 operator+(const half4x3 x, const half4x3 y) { return half4x3(::simd_linear_combination(1, x, 1, y)); }
    594   static SIMD_CPPFUNC half4x4 operator+(const half4x4 x, const half4x4 y) { return half4x4(::simd_linear_combination(1, x, 1, y)); }
    595 
    596   static SIMD_CPPFUNC half2x2 operator-(const half2x2 x, const half2x2 y) { return half2x2(::simd_linear_combination(1, x, -1, y)); }
    597   static SIMD_CPPFUNC half2x3 operator-(const half2x3 x, const half2x3 y) { return half2x3(::simd_linear_combination(1, x, -1, y)); }
    598   static SIMD_CPPFUNC half2x4 operator-(const half2x4 x, const half2x4 y) { return half2x4(::simd_linear_combination(1, x, -1, y)); }
    599   static SIMD_CPPFUNC half3x2 operator-(const half3x2 x, const half3x2 y) { return half3x2(::simd_linear_combination(1, x, -1, y)); }
    600   static SIMD_CPPFUNC half3x3 operator-(const half3x3 x, const half3x3 y) { return half3x3(::simd_linear_combination(1, x, -1, y)); }
    601   static SIMD_CPPFUNC half3x4 operator-(const half3x4 x, const half3x4 y) { return half3x4(::simd_linear_combination(1, x, -1, y)); }
    602   static SIMD_CPPFUNC half4x2 operator-(const half4x2 x, const half4x2 y) { return half4x2(::simd_linear_combination(1, x, -1, y)); }
    603   static SIMD_CPPFUNC half4x3 operator-(const half4x3 x, const half4x3 y) { return half4x3(::simd_linear_combination(1, x, -1, y)); }
    604   static SIMD_CPPFUNC half4x4 operator-(const half4x4 x, const half4x4 y) { return half4x4(::simd_linear_combination(1, x, -1, y)); }
    605 
    606   static SIMD_INLINE SIMD_NODEBUG half2x2& operator+=(half2x2& x, const half2x2 y) { x = x + y; return x; }
    607   static SIMD_INLINE SIMD_NODEBUG half2x3& operator+=(half2x3& x, const half2x3 y) { x = x + y; return x; }
    608   static SIMD_INLINE SIMD_NODEBUG half2x4& operator+=(half2x4& x, const half2x4 y) { x = x + y; return x; }
    609   static SIMD_INLINE SIMD_NODEBUG half3x2& operator+=(half3x2& x, const half3x2 y) { x = x + y; return x; }
    610   static SIMD_INLINE SIMD_NODEBUG half3x3& operator+=(half3x3& x, const half3x3 y) { x = x + y; return x; }
    611   static SIMD_INLINE SIMD_NODEBUG half3x4& operator+=(half3x4& x, const half3x4 y) { x = x + y; return x; }
    612   static SIMD_INLINE SIMD_NODEBUG half4x2& operator+=(half4x2& x, const half4x2 y) { x = x + y; return x; }
    613   static SIMD_INLINE SIMD_NODEBUG half4x3& operator+=(half4x3& x, const half4x3 y) { x = x + y; return x; }
    614   static SIMD_INLINE SIMD_NODEBUG half4x4& operator+=(half4x4& x, const half4x4 y) { x = x + y; return x; }
    615 
    616   static SIMD_INLINE SIMD_NODEBUG half2x2& operator-=(half2x2& x, const half2x2 y) { x = x - y; return x; }
    617   static SIMD_INLINE SIMD_NODEBUG half2x3& operator-=(half2x3& x, const half2x3 y) { x = x - y; return x; }
    618   static SIMD_INLINE SIMD_NODEBUG half2x4& operator-=(half2x4& x, const half2x4 y) { x = x - y; return x; }
    619   static SIMD_INLINE SIMD_NODEBUG half3x2& operator-=(half3x2& x, const half3x2 y) { x = x - y; return x; }
    620   static SIMD_INLINE SIMD_NODEBUG half3x3& operator-=(half3x3& x, const half3x3 y) { x = x - y; return x; }
    621   static SIMD_INLINE SIMD_NODEBUG half3x4& operator-=(half3x4& x, const half3x4 y) { x = x - y; return x; }
    622   static SIMD_INLINE SIMD_NODEBUG half4x2& operator-=(half4x2& x, const half4x2 y) { x = x - y; return x; }
    623   static SIMD_INLINE SIMD_NODEBUG half4x3& operator-=(half4x3& x, const half4x3 y) { x = x - y; return x; }
    624   static SIMD_INLINE SIMD_NODEBUG half4x4& operator-=(half4x4& x, const half4x4 y) { x = x - y; return x; }
    625 
    626   static SIMD_CPPFUNC half2x2 transpose(const half2x2 x) { return ::simd_transpose(x); }
    627   static SIMD_CPPFUNC half2x3 transpose(const half3x2 x) { return ::simd_transpose(x); }
    628   static SIMD_CPPFUNC half2x4 transpose(const half4x2 x) { return ::simd_transpose(x); }
    629   static SIMD_CPPFUNC half3x2 transpose(const half2x3 x) { return ::simd_transpose(x); }
    630   static SIMD_CPPFUNC half3x3 transpose(const half3x3 x) { return ::simd_transpose(x); }
    631   static SIMD_CPPFUNC half3x4 transpose(const half4x3 x) { return ::simd_transpose(x); }
    632   static SIMD_CPPFUNC half4x2 transpose(const half2x4 x) { return ::simd_transpose(x); }
    633   static SIMD_CPPFUNC half4x3 transpose(const half3x4 x) { return ::simd_transpose(x); }
    634   static SIMD_CPPFUNC half4x4 transpose(const half4x4 x) { return ::simd_transpose(x); }
    635 
    636   static SIMD_CPPFUNC _Float16 trace(const half2x2 x) { return ::simd_trace(x); }
    637   static SIMD_CPPFUNC _Float16 trace(const half3x3 x) { return ::simd_trace(x); }
    638   static SIMD_CPPFUNC _Float16 trace(const half4x4 x) { return ::simd_trace(x); }
    639 
    640   static SIMD_CPPFUNC _Float16 determinant(const half2x2 x) { return ::simd_determinant(x); }
    641   static SIMD_CPPFUNC _Float16 determinant(const half3x3 x) { return ::simd_determinant(x); }
    642   static SIMD_CPPFUNC _Float16 determinant(const half4x4 x) { return ::simd_determinant(x); }
    643 
    644 #pragma clang diagnostic push
    645 #pragma clang diagnostic ignored "-Wgcc-compat"
    646   static SIMD_CPPFUNC half2x2 inverse(const half2x2 x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0)) { return ::simd_inverse(x); }
    647   static SIMD_CPPFUNC half3x3 inverse(const half3x3 x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0)) { return ::simd_inverse(x); }
    648   static SIMD_CPPFUNC half4x4 inverse(const half4x4 x) __API_AVAILABLE(macos(15.0), ios(18.0), watchos(11.0), tvos(18.0)) { return ::simd_inverse(x); }
    649 #pragma clang diagnostic pop
    650 
    651   static SIMD_CPPFUNC half2x2 operator*(const _Float16 a, const half2x2 x) { return ::simd_mul(a, x); }
    652   static SIMD_CPPFUNC half2x3 operator*(const _Float16 a, const half2x3 x) { return ::simd_mul(a, x); }
    653   static SIMD_CPPFUNC half2x4 operator*(const _Float16 a, const half2x4 x) { return ::simd_mul(a, x); }
    654   static SIMD_CPPFUNC half3x2 operator*(const _Float16 a, const half3x2 x) { return ::simd_mul(a, x); }
    655   static SIMD_CPPFUNC half3x3 operator*(const _Float16 a, const half3x3 x) { return ::simd_mul(a, x); }
    656   static SIMD_CPPFUNC half3x4 operator*(const _Float16 a, const half3x4 x) { return ::simd_mul(a, x); }
    657   static SIMD_CPPFUNC half4x2 operator*(const _Float16 a, const half4x2 x) { return ::simd_mul(a, x); }
    658   static SIMD_CPPFUNC half4x3 operator*(const _Float16 a, const half4x3 x) { return ::simd_mul(a, x); }
    659   static SIMD_CPPFUNC half4x4 operator*(const _Float16 a, const half4x4 x) { return ::simd_mul(a, x); }
    660   static SIMD_CPPFUNC half2x2 operator*(const half2x2 x, const _Float16 a) { return ::simd_mul(a, x); }
    661   static SIMD_CPPFUNC half2x3 operator*(const half2x3 x, const _Float16 a) { return ::simd_mul(a, x); }
    662   static SIMD_CPPFUNC half2x4 operator*(const half2x4 x, const _Float16 a) { return ::simd_mul(a, x); }
    663   static SIMD_CPPFUNC half3x2 operator*(const half3x2 x, const _Float16 a) { return ::simd_mul(a, x); }
    664   static SIMD_CPPFUNC half3x3 operator*(const half3x3 x, const _Float16 a) { return ::simd_mul(a, x); }
    665   static SIMD_CPPFUNC half3x4 operator*(const half3x4 x, const _Float16 a) { return ::simd_mul(a, x); }
    666   static SIMD_CPPFUNC half4x2 operator*(const half4x2 x, const _Float16 a) { return ::simd_mul(a, x); }
    667   static SIMD_CPPFUNC half4x3 operator*(const half4x3 x, const _Float16 a) { return ::simd_mul(a, x); }
    668   static SIMD_CPPFUNC half4x4 operator*(const half4x4 x, const _Float16 a) { return ::simd_mul(a, x); }
    669   static SIMD_INLINE SIMD_NODEBUG half2x2& operator*=(half2x2& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    670   static SIMD_INLINE SIMD_NODEBUG half2x3& operator*=(half2x3& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    671   static SIMD_INLINE SIMD_NODEBUG half2x4& operator*=(half2x4& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    672   static SIMD_INLINE SIMD_NODEBUG half3x2& operator*=(half3x2& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    673   static SIMD_INLINE SIMD_NODEBUG half3x3& operator*=(half3x3& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    674   static SIMD_INLINE SIMD_NODEBUG half3x4& operator*=(half3x4& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    675   static SIMD_INLINE SIMD_NODEBUG half4x2& operator*=(half4x2& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    676   static SIMD_INLINE SIMD_NODEBUG half4x3& operator*=(half4x3& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    677   static SIMD_INLINE SIMD_NODEBUG half4x4& operator*=(half4x4& x, const _Float16 a) { x = ::simd_mul(a, x); return x; }
    678 
    679   static SIMD_CPPFUNC half2 operator*(const half2 x, const half2x2 y) { return ::simd_mul(x, y); }
    680   static SIMD_CPPFUNC half3 operator*(const half2 x, const half3x2 y) { return ::simd_mul(x, y); }
    681   static SIMD_CPPFUNC half4 operator*(const half2 x, const half4x2 y) { return ::simd_mul(x, y); }
    682   static SIMD_CPPFUNC half2 operator*(const half3 x, const half2x3 y) { return ::simd_mul(x, y); }
    683   static SIMD_CPPFUNC half3 operator*(const half3 x, const half3x3 y) { return ::simd_mul(x, y); }
    684   static SIMD_CPPFUNC half4 operator*(const half3 x, const half4x3 y) { return ::simd_mul(x, y); }
    685   static SIMD_CPPFUNC half2 operator*(const half4 x, const half2x4 y) { return ::simd_mul(x, y); }
    686   static SIMD_CPPFUNC half3 operator*(const half4 x, const half3x4 y) { return ::simd_mul(x, y); }
    687   static SIMD_CPPFUNC half4 operator*(const half4 x, const half4x4 y) { return ::simd_mul(x, y); }
    688   static SIMD_CPPFUNC half2 operator*(const half2x2 x, const half2 y) { return ::simd_mul(x, y); }
    689   static SIMD_CPPFUNC half2 operator*(const half3x2 x, const half3 y) { return ::simd_mul(x, y); }
    690   static SIMD_CPPFUNC half2 operator*(const half4x2 x, const half4 y) { return ::simd_mul(x, y); }
    691   static SIMD_CPPFUNC half3 operator*(const half2x3 x, const half2 y) { return ::simd_mul(x, y); }
    692   static SIMD_CPPFUNC half3 operator*(const half3x3 x, const half3 y) { return ::simd_mul(x, y); }
    693   static SIMD_CPPFUNC half3 operator*(const half4x3 x, const half4 y) { return ::simd_mul(x, y); }
    694   static SIMD_CPPFUNC half4 operator*(const half2x4 x, const half2 y) { return ::simd_mul(x, y); }
    695   static SIMD_CPPFUNC half4 operator*(const half3x4 x, const half3 y) { return ::simd_mul(x, y); }
    696   static SIMD_CPPFUNC half4 operator*(const half4x4 x, const half4 y) { return ::simd_mul(x, y); }
    697   static SIMD_INLINE SIMD_NODEBUG half2& operator*=(half2& x, const half2x2 y) { x = ::simd_mul(x, y); return x; }
    698   static SIMD_INLINE SIMD_NODEBUG half3& operator*=(half3& x, const half3x3 y) { x = ::simd_mul(x, y); return x; }
    699   static SIMD_INLINE SIMD_NODEBUG half4& operator*=(half4& x, const half4x4 y) { x = ::simd_mul(x, y); return x; }
    700 
    701   static SIMD_CPPFUNC half2x2 operator*(const half2x2 x, const half2x2 y) { return ::simd_mul(x, y); }
    702   static SIMD_CPPFUNC half3x2 operator*(const half2x2 x, const half3x2 y) { return ::simd_mul(x, y); }
    703   static SIMD_CPPFUNC half4x2 operator*(const half2x2 x, const half4x2 y) { return ::simd_mul(x, y); }
    704   static SIMD_CPPFUNC half2x3 operator*(const half2x3 x, const half2x2 y) { return ::simd_mul(x, y); }
    705   static SIMD_CPPFUNC half3x3 operator*(const half2x3 x, const half3x2 y) { return ::simd_mul(x, y); }
    706   static SIMD_CPPFUNC half4x3 operator*(const half2x3 x, const half4x2 y) { return ::simd_mul(x, y); }
    707   static SIMD_CPPFUNC half2x4 operator*(const half2x4 x, const half2x2 y) { return ::simd_mul(x, y); }
    708   static SIMD_CPPFUNC half3x4 operator*(const half2x4 x, const half3x2 y) { return ::simd_mul(x, y); }
    709   static SIMD_CPPFUNC half4x4 operator*(const half2x4 x, const half4x2 y) { return ::simd_mul(x, y); }
    710   static SIMD_CPPFUNC half2x2 operator*(const half3x2 x, const half2x3 y) { return ::simd_mul(x, y); }
    711   static SIMD_CPPFUNC half3x2 operator*(const half3x2 x, const half3x3 y) { return ::simd_mul(x, y); }
    712   static SIMD_CPPFUNC half4x2 operator*(const half3x2 x, const half4x3 y) { return ::simd_mul(x, y); }
    713   static SIMD_CPPFUNC half2x3 operator*(const half3x3 x, const half2x3 y) { return ::simd_mul(x, y); }
    714   static SIMD_CPPFUNC half3x3 operator*(const half3x3 x, const half3x3 y) { return ::simd_mul(x, y); }
    715   static SIMD_CPPFUNC half4x3 operator*(const half3x3 x, const half4x3 y) { return ::simd_mul(x, y); }
    716   static SIMD_CPPFUNC half2x4 operator*(const half3x4 x, const half2x3 y) { return ::simd_mul(x, y); }
    717   static SIMD_CPPFUNC half3x4 operator*(const half3x4 x, const half3x3 y) { return ::simd_mul(x, y); }
    718   static SIMD_CPPFUNC half4x4 operator*(const half3x4 x, const half4x3 y) { return ::simd_mul(x, y); }
    719   static SIMD_CPPFUNC half2x2 operator*(const half4x2 x, const half2x4 y) { return ::simd_mul(x, y); }
    720   static SIMD_CPPFUNC half3x2 operator*(const half4x2 x, const half3x4 y) { return ::simd_mul(x, y); }
    721   static SIMD_CPPFUNC half4x2 operator*(const half4x2 x, const half4x4 y) { return ::simd_mul(x, y); }
    722   static SIMD_CPPFUNC half2x3 operator*(const half4x3 x, const half2x4 y) { return ::simd_mul(x, y); }
    723   static SIMD_CPPFUNC half3x3 operator*(const half4x3 x, const half3x4 y) { return ::simd_mul(x, y); }
    724   static SIMD_CPPFUNC half4x3 operator*(const half4x3 x, const half4x4 y) { return ::simd_mul(x, y); }
    725   static SIMD_CPPFUNC half2x4 operator*(const half4x4 x, const half2x4 y) { return ::simd_mul(x, y); }
    726   static SIMD_CPPFUNC half3x4 operator*(const half4x4 x, const half3x4 y) { return ::simd_mul(x, y); }
    727   static SIMD_CPPFUNC half4x4 operator*(const half4x4 x, const half4x4 y) { return ::simd_mul(x, y); }
    728   static SIMD_INLINE SIMD_NODEBUG half2x2& operator*=(half2x2& x, const half2x2 y) { x = ::simd_mul(x, y); return x; }
    729   static SIMD_INLINE SIMD_NODEBUG half2x3& operator*=(half2x3& x, const half2x2 y) { x = ::simd_mul(x, y); return x; }
    730   static SIMD_INLINE SIMD_NODEBUG half2x4& operator*=(half2x4& x, const half2x2 y) { x = ::simd_mul(x, y); return x; }
    731   static SIMD_INLINE SIMD_NODEBUG half3x2& operator*=(half3x2& x, const half3x3 y) { x = ::simd_mul(x, y); return x; }
    732   static SIMD_INLINE SIMD_NODEBUG half3x3& operator*=(half3x3& x, const half3x3 y) { x = ::simd_mul(x, y); return x; }
    733   static SIMD_INLINE SIMD_NODEBUG half3x4& operator*=(half3x4& x, const half3x3 y) { x = ::simd_mul(x, y); return x; }
    734   static SIMD_INLINE SIMD_NODEBUG half4x2& operator*=(half4x2& x, const half4x4 y) { x = ::simd_mul(x, y); return x; }
    735   static SIMD_INLINE SIMD_NODEBUG half4x3& operator*=(half4x3& x, const half4x4 y) { x = ::simd_mul(x, y); return x; }
    736   static SIMD_INLINE SIMD_NODEBUG half4x4& operator*=(half4x4& x, const half4x4 y) { x = ::simd_mul(x, y); return x; }
    737 
    738   static SIMD_CPPFUNC bool operator==(const half2x2& x, const half2x2& y) { return ::simd_equal(x, y); }
    739   static SIMD_CPPFUNC bool operator==(const half2x3& x, const half2x3& y) { return ::simd_equal(x, y); }
    740   static SIMD_CPPFUNC bool operator==(const half2x4& x, const half2x4& y) { return ::simd_equal(x, y); }
    741   static SIMD_CPPFUNC bool operator==(const half3x2& x, const half3x2& y) { return ::simd_equal(x, y); }
    742   static SIMD_CPPFUNC bool operator==(const half3x3& x, const half3x3& y) { return ::simd_equal(x, y); }
    743   static SIMD_CPPFUNC bool operator==(const half3x4& x, const half3x4& y) { return ::simd_equal(x, y); }
    744   static SIMD_CPPFUNC bool operator==(const half4x2& x, const half4x2& y) { return ::simd_equal(x, y); }
    745   static SIMD_CPPFUNC bool operator==(const half4x3& x, const half4x3& y) { return ::simd_equal(x, y); }
    746   static SIMD_CPPFUNC bool operator==(const half4x4& x, const half4x4& y) { return ::simd_equal(x, y); }
    747 
    748   static SIMD_CPPFUNC bool operator!=(const half2x2& x, const half2x2& y) { return !(x == y); }
    749   static SIMD_CPPFUNC bool operator!=(const half2x3& x, const half2x3& y) { return !(x == y); }
    750   static SIMD_CPPFUNC bool operator!=(const half2x4& x, const half2x4& y) { return !(x == y); }
    751   static SIMD_CPPFUNC bool operator!=(const half3x2& x, const half3x2& y) { return !(x == y); }
    752   static SIMD_CPPFUNC bool operator!=(const half3x3& x, const half3x3& y) { return !(x == y); }
    753   static SIMD_CPPFUNC bool operator!=(const half3x4& x, const half3x4& y) { return !(x == y); }
    754   static SIMD_CPPFUNC bool operator!=(const half4x2& x, const half4x2& y) { return !(x == y); }
    755   static SIMD_CPPFUNC bool operator!=(const half4x3& x, const half4x3& y) { return !(x == y); }
    756   static SIMD_CPPFUNC bool operator!=(const half4x4& x, const half4x4& y) { return !(x == y); }
    757 
    758   static SIMD_CPPFUNC bool almost_equal_elements(const half2x2 x, const half2x2 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    759   static SIMD_CPPFUNC bool almost_equal_elements(const half2x3 x, const half2x3 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    760   static SIMD_CPPFUNC bool almost_equal_elements(const half2x4 x, const half2x4 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    761   static SIMD_CPPFUNC bool almost_equal_elements(const half3x2 x, const half3x2 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    762   static SIMD_CPPFUNC bool almost_equal_elements(const half3x3 x, const half3x3 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    763   static SIMD_CPPFUNC bool almost_equal_elements(const half3x4 x, const half3x4 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    764   static SIMD_CPPFUNC bool almost_equal_elements(const half4x2 x, const half4x2 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    765   static SIMD_CPPFUNC bool almost_equal_elements(const half4x3 x, const half4x3 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    766   static SIMD_CPPFUNC bool almost_equal_elements(const half4x4 x, const half4x4 y, const _Float16 tol) { return ::simd_almost_equal_elements(x, y, tol); }
    767 
    768   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half2x2 x, const half2x2 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    769   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half2x3 x, const half2x3 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    770   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half2x4 x, const half2x4 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    771   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half3x2 x, const half3x2 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    772   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half3x3 x, const half3x3 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    773   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half3x4 x, const half3x4 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    774   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half4x2 x, const half4x2 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    775   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half4x3 x, const half4x3 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    776   static SIMD_CPPFUNC bool almost_equal_elements_relative(const half4x4 x, const half4x4 y, const _Float16 tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    777 
    778   static SIMD_CPPFUNC float2x2 operator+(const float2x2 x, const float2x2 y) { return float2x2(::simd_linear_combination(1, x, 1, y)); }
    779   static SIMD_CPPFUNC float2x3 operator+(const float2x3 x, const float2x3 y) { return float2x3(::simd_linear_combination(1, x, 1, y)); }
    780   static SIMD_CPPFUNC float2x4 operator+(const float2x4 x, const float2x4 y) { return float2x4(::simd_linear_combination(1, x, 1, y)); }
    781   static SIMD_CPPFUNC float3x2 operator+(const float3x2 x, const float3x2 y) { return float3x2(::simd_linear_combination(1, x, 1, y)); }
    782   static SIMD_CPPFUNC float3x3 operator+(const float3x3 x, const float3x3 y) { return float3x3(::simd_linear_combination(1, x, 1, y)); }
    783   static SIMD_CPPFUNC float3x4 operator+(const float3x4 x, const float3x4 y) { return float3x4(::simd_linear_combination(1, x, 1, y)); }
    784   static SIMD_CPPFUNC float4x2 operator+(const float4x2 x, const float4x2 y) { return float4x2(::simd_linear_combination(1, x, 1, y)); }
    785   static SIMD_CPPFUNC float4x3 operator+(const float4x3 x, const float4x3 y) { return float4x3(::simd_linear_combination(1, x, 1, y)); }
    786   static SIMD_CPPFUNC float4x4 operator+(const float4x4 x, const float4x4 y) { return float4x4(::simd_linear_combination(1, x, 1, y)); }
    787   
    788   static SIMD_CPPFUNC float2x2 operator-(const float2x2 x, const float2x2 y) { return float2x2(::simd_linear_combination(1, x, -1, y)); }
    789   static SIMD_CPPFUNC float2x3 operator-(const float2x3 x, const float2x3 y) { return float2x3(::simd_linear_combination(1, x, -1, y)); }
    790   static SIMD_CPPFUNC float2x4 operator-(const float2x4 x, const float2x4 y) { return float2x4(::simd_linear_combination(1, x, -1, y)); }
    791   static SIMD_CPPFUNC float3x2 operator-(const float3x2 x, const float3x2 y) { return float3x2(::simd_linear_combination(1, x, -1, y)); }
    792   static SIMD_CPPFUNC float3x3 operator-(const float3x3 x, const float3x3 y) { return float3x3(::simd_linear_combination(1, x, -1, y)); }
    793   static SIMD_CPPFUNC float3x4 operator-(const float3x4 x, const float3x4 y) { return float3x4(::simd_linear_combination(1, x, -1, y)); }
    794   static SIMD_CPPFUNC float4x2 operator-(const float4x2 x, const float4x2 y) { return float4x2(::simd_linear_combination(1, x, -1, y)); }
    795   static SIMD_CPPFUNC float4x3 operator-(const float4x3 x, const float4x3 y) { return float4x3(::simd_linear_combination(1, x, -1, y)); }
    796   static SIMD_CPPFUNC float4x4 operator-(const float4x4 x, const float4x4 y) { return float4x4(::simd_linear_combination(1, x, -1, y)); }
    797   
    798   static SIMD_INLINE SIMD_NODEBUG float2x2& operator+=(float2x2& x, const float2x2 y) { x = x + y; return x; }
    799   static SIMD_INLINE SIMD_NODEBUG float2x3& operator+=(float2x3& x, const float2x3 y) { x = x + y; return x; }
    800   static SIMD_INLINE SIMD_NODEBUG float2x4& operator+=(float2x4& x, const float2x4 y) { x = x + y; return x; }
    801   static SIMD_INLINE SIMD_NODEBUG float3x2& operator+=(float3x2& x, const float3x2 y) { x = x + y; return x; }
    802   static SIMD_INLINE SIMD_NODEBUG float3x3& operator+=(float3x3& x, const float3x3 y) { x = x + y; return x; }
    803   static SIMD_INLINE SIMD_NODEBUG float3x4& operator+=(float3x4& x, const float3x4 y) { x = x + y; return x; }
    804   static SIMD_INLINE SIMD_NODEBUG float4x2& operator+=(float4x2& x, const float4x2 y) { x = x + y; return x; }
    805   static SIMD_INLINE SIMD_NODEBUG float4x3& operator+=(float4x3& x, const float4x3 y) { x = x + y; return x; }
    806   static SIMD_INLINE SIMD_NODEBUG float4x4& operator+=(float4x4& x, const float4x4 y) { x = x + y; return x; }
    807   
    808   static SIMD_INLINE SIMD_NODEBUG float2x2& operator-=(float2x2& x, const float2x2 y) { x = x - y; return x; }
    809   static SIMD_INLINE SIMD_NODEBUG float2x3& operator-=(float2x3& x, const float2x3 y) { x = x - y; return x; }
    810   static SIMD_INLINE SIMD_NODEBUG float2x4& operator-=(float2x4& x, const float2x4 y) { x = x - y; return x; }
    811   static SIMD_INLINE SIMD_NODEBUG float3x2& operator-=(float3x2& x, const float3x2 y) { x = x - y; return x; }
    812   static SIMD_INLINE SIMD_NODEBUG float3x3& operator-=(float3x3& x, const float3x3 y) { x = x - y; return x; }
    813   static SIMD_INLINE SIMD_NODEBUG float3x4& operator-=(float3x4& x, const float3x4 y) { x = x - y; return x; }
    814   static SIMD_INLINE SIMD_NODEBUG float4x2& operator-=(float4x2& x, const float4x2 y) { x = x - y; return x; }
    815   static SIMD_INLINE SIMD_NODEBUG float4x3& operator-=(float4x3& x, const float4x3 y) { x = x - y; return x; }
    816   static SIMD_INLINE SIMD_NODEBUG float4x4& operator-=(float4x4& x, const float4x4 y) { x = x - y; return x; }
    817   
    818   static SIMD_CPPFUNC float2x2 transpose(const float2x2 x) { return ::simd_transpose(x); }
    819   static SIMD_CPPFUNC float2x3 transpose(const float3x2 x) { return ::simd_transpose(x); }
    820   static SIMD_CPPFUNC float2x4 transpose(const float4x2 x) { return ::simd_transpose(x); }
    821   static SIMD_CPPFUNC float3x2 transpose(const float2x3 x) { return ::simd_transpose(x); }
    822   static SIMD_CPPFUNC float3x3 transpose(const float3x3 x) { return ::simd_transpose(x); }
    823   static SIMD_CPPFUNC float3x4 transpose(const float4x3 x) { return ::simd_transpose(x); }
    824   static SIMD_CPPFUNC float4x2 transpose(const float2x4 x) { return ::simd_transpose(x); }
    825   static SIMD_CPPFUNC float4x3 transpose(const float3x4 x) { return ::simd_transpose(x); }
    826   static SIMD_CPPFUNC float4x4 transpose(const float4x4 x) { return ::simd_transpose(x); }
    827 
    828   static SIMD_CPPFUNC float trace(const float2x2 x) { return ::simd_trace(x); }
    829   static SIMD_CPPFUNC float trace(const float3x3 x) { return ::simd_trace(x); }
    830   static SIMD_CPPFUNC float trace(const float4x4 x) { return ::simd_trace(x); }
    831 
    832   static SIMD_CPPFUNC float determinant(const float2x2 x) { return ::simd_determinant(x); }
    833   static SIMD_CPPFUNC float determinant(const float3x3 x) { return ::simd_determinant(x); }
    834   static SIMD_CPPFUNC float determinant(const float4x4 x) { return ::simd_determinant(x); }
    835   
    836 #pragma clang diagnostic push
    837 #pragma clang diagnostic ignored "-Wgcc-compat"
    838   static SIMD_CPPFUNC float2x2 inverse(const float2x2 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
    839   static SIMD_CPPFUNC float3x3 inverse(const float3x3 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
    840   static SIMD_CPPFUNC float4x4 inverse(const float4x4 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
    841 #pragma clang diagnostic pop
    842   
    843   static SIMD_CPPFUNC float2x2 operator*(const float a, const float2x2 x) { return ::simd_mul(a, x); }
    844   static SIMD_CPPFUNC float2x3 operator*(const float a, const float2x3 x) { return ::simd_mul(a, x); }
    845   static SIMD_CPPFUNC float2x4 operator*(const float a, const float2x4 x) { return ::simd_mul(a, x); }
    846   static SIMD_CPPFUNC float3x2 operator*(const float a, const float3x2 x) { return ::simd_mul(a, x); }
    847   static SIMD_CPPFUNC float3x3 operator*(const float a, const float3x3 x) { return ::simd_mul(a, x); }
    848   static SIMD_CPPFUNC float3x4 operator*(const float a, const float3x4 x) { return ::simd_mul(a, x); }
    849   static SIMD_CPPFUNC float4x2 operator*(const float a, const float4x2 x) { return ::simd_mul(a, x); }
    850   static SIMD_CPPFUNC float4x3 operator*(const float a, const float4x3 x) { return ::simd_mul(a, x); }
    851   static SIMD_CPPFUNC float4x4 operator*(const float a, const float4x4 x) { return ::simd_mul(a, x); }
    852   static SIMD_CPPFUNC float2x2 operator*(const float2x2 x, const float a) { return ::simd_mul(a, x); }
    853   static SIMD_CPPFUNC float2x3 operator*(const float2x3 x, const float a) { return ::simd_mul(a, x); }
    854   static SIMD_CPPFUNC float2x4 operator*(const float2x4 x, const float a) { return ::simd_mul(a, x); }
    855   static SIMD_CPPFUNC float3x2 operator*(const float3x2 x, const float a) { return ::simd_mul(a, x); }
    856   static SIMD_CPPFUNC float3x3 operator*(const float3x3 x, const float a) { return ::simd_mul(a, x); }
    857   static SIMD_CPPFUNC float3x4 operator*(const float3x4 x, const float a) { return ::simd_mul(a, x); }
    858   static SIMD_CPPFUNC float4x2 operator*(const float4x2 x, const float a) { return ::simd_mul(a, x); }
    859   static SIMD_CPPFUNC float4x3 operator*(const float4x3 x, const float a) { return ::simd_mul(a, x); }
    860   static SIMD_CPPFUNC float4x4 operator*(const float4x4 x, const float a) { return ::simd_mul(a, x); }
    861   static SIMD_INLINE SIMD_NODEBUG float2x2& operator*=(float2x2& x, const float a) { x = ::simd_mul(a, x); return x; }
    862   static SIMD_INLINE SIMD_NODEBUG float2x3& operator*=(float2x3& x, const float a) { x = ::simd_mul(a, x); return x; }
    863   static SIMD_INLINE SIMD_NODEBUG float2x4& operator*=(float2x4& x, const float a) { x = ::simd_mul(a, x); return x; }
    864   static SIMD_INLINE SIMD_NODEBUG float3x2& operator*=(float3x2& x, const float a) { x = ::simd_mul(a, x); return x; }
    865   static SIMD_INLINE SIMD_NODEBUG float3x3& operator*=(float3x3& x, const float a) { x = ::simd_mul(a, x); return x; }
    866   static SIMD_INLINE SIMD_NODEBUG float3x4& operator*=(float3x4& x, const float a) { x = ::simd_mul(a, x); return x; }
    867   static SIMD_INLINE SIMD_NODEBUG float4x2& operator*=(float4x2& x, const float a) { x = ::simd_mul(a, x); return x; }
    868   static SIMD_INLINE SIMD_NODEBUG float4x3& operator*=(float4x3& x, const float a) { x = ::simd_mul(a, x); return x; }
    869   static SIMD_INLINE SIMD_NODEBUG float4x4& operator*=(float4x4& x, const float a) { x = ::simd_mul(a, x); return x; }
    870   
    871   static SIMD_CPPFUNC float2 operator*(const float2 x, const float2x2 y) { return ::simd_mul(x, y); }
    872   static SIMD_CPPFUNC float3 operator*(const float2 x, const float3x2 y) { return ::simd_mul(x, y); }
    873   static SIMD_CPPFUNC float4 operator*(const float2 x, const float4x2 y) { return ::simd_mul(x, y); }
    874   static SIMD_CPPFUNC float2 operator*(const float3 x, const float2x3 y) { return ::simd_mul(x, y); }
    875   static SIMD_CPPFUNC float3 operator*(const float3 x, const float3x3 y) { return ::simd_mul(x, y); }
    876   static SIMD_CPPFUNC float4 operator*(const float3 x, const float4x3 y) { return ::simd_mul(x, y); }
    877   static SIMD_CPPFUNC float2 operator*(const float4 x, const float2x4 y) { return ::simd_mul(x, y); }
    878   static SIMD_CPPFUNC float3 operator*(const float4 x, const float3x4 y) { return ::simd_mul(x, y); }
    879   static SIMD_CPPFUNC float4 operator*(const float4 x, const float4x4 y) { return ::simd_mul(x, y); }
    880   static SIMD_CPPFUNC float2 operator*(const float2x2 x, const float2 y) { return ::simd_mul(x, y); }
    881   static SIMD_CPPFUNC float2 operator*(const float3x2 x, const float3 y) { return ::simd_mul(x, y); }
    882   static SIMD_CPPFUNC float2 operator*(const float4x2 x, const float4 y) { return ::simd_mul(x, y); }
    883   static SIMD_CPPFUNC float3 operator*(const float2x3 x, const float2 y) { return ::simd_mul(x, y); }
    884   static SIMD_CPPFUNC float3 operator*(const float3x3 x, const float3 y) { return ::simd_mul(x, y); }
    885   static SIMD_CPPFUNC float3 operator*(const float4x3 x, const float4 y) { return ::simd_mul(x, y); }
    886   static SIMD_CPPFUNC float4 operator*(const float2x4 x, const float2 y) { return ::simd_mul(x, y); }
    887   static SIMD_CPPFUNC float4 operator*(const float3x4 x, const float3 y) { return ::simd_mul(x, y); }
    888   static SIMD_CPPFUNC float4 operator*(const float4x4 x, const float4 y) { return ::simd_mul(x, y); }
    889   static SIMD_INLINE SIMD_NODEBUG float2& operator*=(float2& x, const float2x2 y) { x = ::simd_mul(x, y); return x; }
    890   static SIMD_INLINE SIMD_NODEBUG float3& operator*=(float3& x, const float3x3 y) { x = ::simd_mul(x, y); return x; }
    891   static SIMD_INLINE SIMD_NODEBUG float4& operator*=(float4& x, const float4x4 y) { x = ::simd_mul(x, y); return x; }
    892   
    893   static SIMD_CPPFUNC float2x2 operator*(const float2x2 x, const float2x2 y) { return ::simd_mul(x, y); }
    894   static SIMD_CPPFUNC float3x2 operator*(const float2x2 x, const float3x2 y) { return ::simd_mul(x, y); }
    895   static SIMD_CPPFUNC float4x2 operator*(const float2x2 x, const float4x2 y) { return ::simd_mul(x, y); }
    896   static SIMD_CPPFUNC float2x3 operator*(const float2x3 x, const float2x2 y) { return ::simd_mul(x, y); }
    897   static SIMD_CPPFUNC float3x3 operator*(const float2x3 x, const float3x2 y) { return ::simd_mul(x, y); }
    898   static SIMD_CPPFUNC float4x3 operator*(const float2x3 x, const float4x2 y) { return ::simd_mul(x, y); }
    899   static SIMD_CPPFUNC float2x4 operator*(const float2x4 x, const float2x2 y) { return ::simd_mul(x, y); }
    900   static SIMD_CPPFUNC float3x4 operator*(const float2x4 x, const float3x2 y) { return ::simd_mul(x, y); }
    901   static SIMD_CPPFUNC float4x4 operator*(const float2x4 x, const float4x2 y) { return ::simd_mul(x, y); }
    902   static SIMD_CPPFUNC float2x2 operator*(const float3x2 x, const float2x3 y) { return ::simd_mul(x, y); }
    903   static SIMD_CPPFUNC float3x2 operator*(const float3x2 x, const float3x3 y) { return ::simd_mul(x, y); }
    904   static SIMD_CPPFUNC float4x2 operator*(const float3x2 x, const float4x3 y) { return ::simd_mul(x, y); }
    905   static SIMD_CPPFUNC float2x3 operator*(const float3x3 x, const float2x3 y) { return ::simd_mul(x, y); }
    906   static SIMD_CPPFUNC float3x3 operator*(const float3x3 x, const float3x3 y) { return ::simd_mul(x, y); }
    907   static SIMD_CPPFUNC float4x3 operator*(const float3x3 x, const float4x3 y) { return ::simd_mul(x, y); }
    908   static SIMD_CPPFUNC float2x4 operator*(const float3x4 x, const float2x3 y) { return ::simd_mul(x, y); }
    909   static SIMD_CPPFUNC float3x4 operator*(const float3x4 x, const float3x3 y) { return ::simd_mul(x, y); }
    910   static SIMD_CPPFUNC float4x4 operator*(const float3x4 x, const float4x3 y) { return ::simd_mul(x, y); }
    911   static SIMD_CPPFUNC float2x2 operator*(const float4x2 x, const float2x4 y) { return ::simd_mul(x, y); }
    912   static SIMD_CPPFUNC float3x2 operator*(const float4x2 x, const float3x4 y) { return ::simd_mul(x, y); }
    913   static SIMD_CPPFUNC float4x2 operator*(const float4x2 x, const float4x4 y) { return ::simd_mul(x, y); }
    914   static SIMD_CPPFUNC float2x3 operator*(const float4x3 x, const float2x4 y) { return ::simd_mul(x, y); }
    915   static SIMD_CPPFUNC float3x3 operator*(const float4x3 x, const float3x4 y) { return ::simd_mul(x, y); }
    916   static SIMD_CPPFUNC float4x3 operator*(const float4x3 x, const float4x4 y) { return ::simd_mul(x, y); }
    917   static SIMD_CPPFUNC float2x4 operator*(const float4x4 x, const float2x4 y) { return ::simd_mul(x, y); }
    918   static SIMD_CPPFUNC float3x4 operator*(const float4x4 x, const float3x4 y) { return ::simd_mul(x, y); }
    919   static SIMD_CPPFUNC float4x4 operator*(const float4x4 x, const float4x4 y) { return ::simd_mul(x, y); }
    920   static SIMD_INLINE SIMD_NODEBUG float2x2& operator*=(float2x2& x, const float2x2 y) { x = ::simd_mul(x, y); return x; }
    921   static SIMD_INLINE SIMD_NODEBUG float2x3& operator*=(float2x3& x, const float2x2 y) { x = ::simd_mul(x, y); return x; }
    922   static SIMD_INLINE SIMD_NODEBUG float2x4& operator*=(float2x4& x, const float2x2 y) { x = ::simd_mul(x, y); return x; }
    923   static SIMD_INLINE SIMD_NODEBUG float3x2& operator*=(float3x2& x, const float3x3 y) { x = ::simd_mul(x, y); return x; }
    924   static SIMD_INLINE SIMD_NODEBUG float3x3& operator*=(float3x3& x, const float3x3 y) { x = ::simd_mul(x, y); return x; }
    925   static SIMD_INLINE SIMD_NODEBUG float3x4& operator*=(float3x4& x, const float3x3 y) { x = ::simd_mul(x, y); return x; }
    926   static SIMD_INLINE SIMD_NODEBUG float4x2& operator*=(float4x2& x, const float4x4 y) { x = ::simd_mul(x, y); return x; }
    927   static SIMD_INLINE SIMD_NODEBUG float4x3& operator*=(float4x3& x, const float4x4 y) { x = ::simd_mul(x, y); return x; }
    928   static SIMD_INLINE SIMD_NODEBUG float4x4& operator*=(float4x4& x, const float4x4 y) { x = ::simd_mul(x, y); return x; }
    929   
    930   static SIMD_CPPFUNC bool operator==(const float2x2& x, const float2x2& y) { return ::simd_equal(x, y); }
    931   static SIMD_CPPFUNC bool operator==(const float2x3& x, const float2x3& y) { return ::simd_equal(x, y); }
    932   static SIMD_CPPFUNC bool operator==(const float2x4& x, const float2x4& y) { return ::simd_equal(x, y); }
    933   static SIMD_CPPFUNC bool operator==(const float3x2& x, const float3x2& y) { return ::simd_equal(x, y); }
    934   static SIMD_CPPFUNC bool operator==(const float3x3& x, const float3x3& y) { return ::simd_equal(x, y); }
    935   static SIMD_CPPFUNC bool operator==(const float3x4& x, const float3x4& y) { return ::simd_equal(x, y); }
    936   static SIMD_CPPFUNC bool operator==(const float4x2& x, const float4x2& y) { return ::simd_equal(x, y); }
    937   static SIMD_CPPFUNC bool operator==(const float4x3& x, const float4x3& y) { return ::simd_equal(x, y); }
    938   static SIMD_CPPFUNC bool operator==(const float4x4& x, const float4x4& y) { return ::simd_equal(x, y); }
    939   
    940   static SIMD_CPPFUNC bool operator!=(const float2x2& x, const float2x2& y) { return !(x == y); }
    941   static SIMD_CPPFUNC bool operator!=(const float2x3& x, const float2x3& y) { return !(x == y); }
    942   static SIMD_CPPFUNC bool operator!=(const float2x4& x, const float2x4& y) { return !(x == y); }
    943   static SIMD_CPPFUNC bool operator!=(const float3x2& x, const float3x2& y) { return !(x == y); }
    944   static SIMD_CPPFUNC bool operator!=(const float3x3& x, const float3x3& y) { return !(x == y); }
    945   static SIMD_CPPFUNC bool operator!=(const float3x4& x, const float3x4& y) { return !(x == y); }
    946   static SIMD_CPPFUNC bool operator!=(const float4x2& x, const float4x2& y) { return !(x == y); }
    947   static SIMD_CPPFUNC bool operator!=(const float4x3& x, const float4x3& y) { return !(x == y); }
    948   static SIMD_CPPFUNC bool operator!=(const float4x4& x, const float4x4& y) { return !(x == y); }
    949   
    950   static SIMD_CPPFUNC bool almost_equal_elements(const float2x2 x, const float2x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    951   static SIMD_CPPFUNC bool almost_equal_elements(const float2x3 x, const float2x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    952   static SIMD_CPPFUNC bool almost_equal_elements(const float2x4 x, const float2x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    953   static SIMD_CPPFUNC bool almost_equal_elements(const float3x2 x, const float3x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    954   static SIMD_CPPFUNC bool almost_equal_elements(const float3x3 x, const float3x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    955   static SIMD_CPPFUNC bool almost_equal_elements(const float3x4 x, const float3x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    956   static SIMD_CPPFUNC bool almost_equal_elements(const float4x2 x, const float4x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    957   static SIMD_CPPFUNC bool almost_equal_elements(const float4x3 x, const float4x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    958   static SIMD_CPPFUNC bool almost_equal_elements(const float4x4 x, const float4x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); }
    959     
    960   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x2 x, const float2x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    961   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x3 x, const float2x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    962   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x4 x, const float2x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    963   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x2 x, const float3x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    964   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x3 x, const float3x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    965   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x4 x, const float3x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    966   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x2 x, const float4x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    967   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x3 x, const float4x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    968   static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x4 x, const float4x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
    969   
    970   static SIMD_CPPFUNC double2x2 operator+(const double2x2 x, const double2x2 y) { return double2x2(::simd_linear_combination(1, x, 1, y)); }
    971   static SIMD_CPPFUNC double2x3 operator+(const double2x3 x, const double2x3 y) { return double2x3(::simd_linear_combination(1, x, 1, y)); }
    972   static SIMD_CPPFUNC double2x4 operator+(const double2x4 x, const double2x4 y) { return double2x4(::simd_linear_combination(1, x, 1, y)); }
    973   static SIMD_CPPFUNC double3x2 operator+(const double3x2 x, const double3x2 y) { return double3x2(::simd_linear_combination(1, x, 1, y)); }
    974   static SIMD_CPPFUNC double3x3 operator+(const double3x3 x, const double3x3 y) { return double3x3(::simd_linear_combination(1, x, 1, y)); }
    975   static SIMD_CPPFUNC double3x4 operator+(const double3x4 x, const double3x4 y) { return double3x4(::simd_linear_combination(1, x, 1, y)); }
    976   static SIMD_CPPFUNC double4x2 operator+(const double4x2 x, const double4x2 y) { return double4x2(::simd_linear_combination(1, x, 1, y)); }
    977   static SIMD_CPPFUNC double4x3 operator+(const double4x3 x, const double4x3 y) { return double4x3(::simd_linear_combination(1, x, 1, y)); }
    978   static SIMD_CPPFUNC double4x4 operator+(const double4x4 x, const double4x4 y) { return double4x4(::simd_linear_combination(1, x, 1, y)); }
    979   
    980   static SIMD_CPPFUNC double2x2 operator-(const double2x2 x, const double2x2 y) { return double2x2(::simd_linear_combination(1, x, -1, y)); }
    981   static SIMD_CPPFUNC double2x3 operator-(const double2x3 x, const double2x3 y) { return double2x3(::simd_linear_combination(1, x, -1, y)); }
    982   static SIMD_CPPFUNC double2x4 operator-(const double2x4 x, const double2x4 y) { return double2x4(::simd_linear_combination(1, x, -1, y)); }
    983   static SIMD_CPPFUNC double3x2 operator-(const double3x2 x, const double3x2 y) { return double3x2(::simd_linear_combination(1, x, -1, y)); }
    984   static SIMD_CPPFUNC double3x3 operator-(const double3x3 x, const double3x3 y) { return double3x3(::simd_linear_combination(1, x, -1, y)); }
    985   static SIMD_CPPFUNC double3x4 operator-(const double3x4 x, const double3x4 y) { return double3x4(::simd_linear_combination(1, x, -1, y)); }
    986   static SIMD_CPPFUNC double4x2 operator-(const double4x2 x, const double4x2 y) { return double4x2(::simd_linear_combination(1, x, -1, y)); }
    987   static SIMD_CPPFUNC double4x3 operator-(const double4x3 x, const double4x3 y) { return double4x3(::simd_linear_combination(1, x, -1, y)); }
    988   static SIMD_CPPFUNC double4x4 operator-(const double4x4 x, const double4x4 y) { return double4x4(::simd_linear_combination(1, x, -1, y)); }
    989   
    990   static SIMD_INLINE SIMD_NODEBUG double2x2& operator+=(double2x2& x, const double2x2 y) { x = x + y; return x; }
    991   static SIMD_INLINE SIMD_NODEBUG double2x3& operator+=(double2x3& x, const double2x3 y) { x = x + y; return x; }
    992   static SIMD_INLINE SIMD_NODEBUG double2x4& operator+=(double2x4& x, const double2x4 y) { x = x + y; return x; }
    993   static SIMD_INLINE SIMD_NODEBUG double3x2& operator+=(double3x2& x, const double3x2 y) { x = x + y; return x; }
    994   static SIMD_INLINE SIMD_NODEBUG double3x3& operator+=(double3x3& x, const double3x3 y) { x = x + y; return x; }
    995   static SIMD_INLINE SIMD_NODEBUG double3x4& operator+=(double3x4& x, const double3x4 y) { x = x + y; return x; }
    996   static SIMD_INLINE SIMD_NODEBUG double4x2& operator+=(double4x2& x, const double4x2 y) { x = x + y; return x; }
    997   static SIMD_INLINE SIMD_NODEBUG double4x3& operator+=(double4x3& x, const double4x3 y) { x = x + y; return x; }
    998   static SIMD_INLINE SIMD_NODEBUG double4x4& operator+=(double4x4& x, const double4x4 y) { x = x + y; return x; }
    999   
   1000   static SIMD_INLINE SIMD_NODEBUG double2x2& operator-=(double2x2& x, const double2x2 y) { x = x - y; return x; }
   1001   static SIMD_INLINE SIMD_NODEBUG double2x3& operator-=(double2x3& x, const double2x3 y) { x = x - y; return x; }
   1002   static SIMD_INLINE SIMD_NODEBUG double2x4& operator-=(double2x4& x, const double2x4 y) { x = x - y; return x; }
   1003   static SIMD_INLINE SIMD_NODEBUG double3x2& operator-=(double3x2& x, const double3x2 y) { x = x - y; return x; }
   1004   static SIMD_INLINE SIMD_NODEBUG double3x3& operator-=(double3x3& x, const double3x3 y) { x = x - y; return x; }
   1005   static SIMD_INLINE SIMD_NODEBUG double3x4& operator-=(double3x4& x, const double3x4 y) { x = x - y; return x; }
   1006   static SIMD_INLINE SIMD_NODEBUG double4x2& operator-=(double4x2& x, const double4x2 y) { x = x - y; return x; }
   1007   static SIMD_INLINE SIMD_NODEBUG double4x3& operator-=(double4x3& x, const double4x3 y) { x = x - y; return x; }
   1008   static SIMD_INLINE SIMD_NODEBUG double4x4& operator-=(double4x4& x, const double4x4 y) { x = x - y; return x; }
   1009   
   1010   static SIMD_CPPFUNC double2x2 transpose(const double2x2 x) { return ::simd_transpose(x); }
   1011   static SIMD_CPPFUNC double2x3 transpose(const double3x2 x) { return ::simd_transpose(x); }
   1012   static SIMD_CPPFUNC double2x4 transpose(const double4x2 x) { return ::simd_transpose(x); }
   1013   static SIMD_CPPFUNC double3x2 transpose(const double2x3 x) { return ::simd_transpose(x); }
   1014   static SIMD_CPPFUNC double3x3 transpose(const double3x3 x) { return ::simd_transpose(x); }
   1015   static SIMD_CPPFUNC double3x4 transpose(const double4x3 x) { return ::simd_transpose(x); }
   1016   static SIMD_CPPFUNC double4x2 transpose(const double2x4 x) { return ::simd_transpose(x); }
   1017   static SIMD_CPPFUNC double4x3 transpose(const double3x4 x) { return ::simd_transpose(x); }
   1018   static SIMD_CPPFUNC double4x4 transpose(const double4x4 x) { return ::simd_transpose(x); }
   1019 
   1020   static SIMD_CPPFUNC double trace(const double2x2 x) { return ::simd_trace(x); }
   1021   static SIMD_CPPFUNC double trace(const double3x3 x) { return ::simd_trace(x); }
   1022   static SIMD_CPPFUNC double trace(const double4x4 x) { return ::simd_trace(x); }
   1023 
   1024   static SIMD_CPPFUNC double determinant(const double2x2 x) { return ::simd_determinant(x); }
   1025   static SIMD_CPPFUNC double determinant(const double3x3 x) { return ::simd_determinant(x); }
   1026   static SIMD_CPPFUNC double determinant(const double4x4 x) { return ::simd_determinant(x); }
   1027   
   1028 #pragma clang diagnostic push
   1029 #pragma clang diagnostic ignored "-Wgcc-compat"
   1030   static SIMD_CPPFUNC double2x2 inverse(const double2x2 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
   1031   static SIMD_CPPFUNC double3x3 inverse(const double3x3 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
   1032   static SIMD_CPPFUNC double4x4 inverse(const double4x4 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); }
   1033 #pragma clang diagnostic pop
   1034   
   1035   static SIMD_CPPFUNC double2x2 operator*(const double a, const double2x2 x) { return ::simd_mul(a, x); }
   1036   static SIMD_CPPFUNC double2x3 operator*(const double a, const double2x3 x) { return ::simd_mul(a, x); }
   1037   static SIMD_CPPFUNC double2x4 operator*(const double a, const double2x4 x) { return ::simd_mul(a, x); }
   1038   static SIMD_CPPFUNC double3x2 operator*(const double a, const double3x2 x) { return ::simd_mul(a, x); }
   1039   static SIMD_CPPFUNC double3x3 operator*(const double a, const double3x3 x) { return ::simd_mul(a, x); }
   1040   static SIMD_CPPFUNC double3x4 operator*(const double a, const double3x4 x) { return ::simd_mul(a, x); }
   1041   static SIMD_CPPFUNC double4x2 operator*(const double a, const double4x2 x) { return ::simd_mul(a, x); }
   1042   static SIMD_CPPFUNC double4x3 operator*(const double a, const double4x3 x) { return ::simd_mul(a, x); }
   1043   static SIMD_CPPFUNC double4x4 operator*(const double a, const double4x4 x) { return ::simd_mul(a, x); }
   1044   static SIMD_CPPFUNC double2x2 operator*(const double2x2 x, const double a) { return ::simd_mul(a, x); }
   1045   static SIMD_CPPFUNC double2x3 operator*(const double2x3 x, const double a) { return ::simd_mul(a, x); }
   1046   static SIMD_CPPFUNC double2x4 operator*(const double2x4 x, const double a) { return ::simd_mul(a, x); }
   1047   static SIMD_CPPFUNC double3x2 operator*(const double3x2 x, const double a) { return ::simd_mul(a, x); }
   1048   static SIMD_CPPFUNC double3x3 operator*(const double3x3 x, const double a) { return ::simd_mul(a, x); }
   1049   static SIMD_CPPFUNC double3x4 operator*(const double3x4 x, const double a) { return ::simd_mul(a, x); }
   1050   static SIMD_CPPFUNC double4x2 operator*(const double4x2 x, const double a) { return ::simd_mul(a, x); }
   1051   static SIMD_CPPFUNC double4x3 operator*(const double4x3 x, const double a) { return ::simd_mul(a, x); }
   1052   static SIMD_CPPFUNC double4x4 operator*(const double4x4 x, const double a) { return ::simd_mul(a, x); }
   1053   static SIMD_INLINE SIMD_NODEBUG double2x2& operator*=(double2x2& x, const double a) { x = ::simd_mul(a, x); return x; }
   1054   static SIMD_INLINE SIMD_NODEBUG double2x3& operator*=(double2x3& x, const double a) { x = ::simd_mul(a, x); return x; }
   1055   static SIMD_INLINE SIMD_NODEBUG double2x4& operator*=(double2x4& x, const double a) { x = ::simd_mul(a, x); return x; }
   1056   static SIMD_INLINE SIMD_NODEBUG double3x2& operator*=(double3x2& x, const double a) { x = ::simd_mul(a, x); return x; }
   1057   static SIMD_INLINE SIMD_NODEBUG double3x3& operator*=(double3x3& x, const double a) { x = ::simd_mul(a, x); return x; }
   1058   static SIMD_INLINE SIMD_NODEBUG double3x4& operator*=(double3x4& x, const double a) { x = ::simd_mul(a, x); return x; }
   1059   static SIMD_INLINE SIMD_NODEBUG double4x2& operator*=(double4x2& x, const double a) { x = ::simd_mul(a, x); return x; }
   1060   static SIMD_INLINE SIMD_NODEBUG double4x3& operator*=(double4x3& x, const double a) { x = ::simd_mul(a, x); return x; }
   1061   static SIMD_INLINE SIMD_NODEBUG double4x4& operator*=(double4x4& x, const double a) { x = ::simd_mul(a, x); return x; }
   1062   
   1063   static SIMD_CPPFUNC double2 operator*(const double2 x, const double2x2 y) { return ::simd_mul(x, y); }
   1064   static SIMD_CPPFUNC double3 operator*(const double2 x, const double3x2 y) { return ::simd_mul(x, y); }
   1065   static SIMD_CPPFUNC double4 operator*(const double2 x, const double4x2 y) { return ::simd_mul(x, y); }
   1066   static SIMD_CPPFUNC double2 operator*(const double3 x, const double2x3 y) { return ::simd_mul(x, y); }
   1067   static SIMD_CPPFUNC double3 operator*(const double3 x, const double3x3 y) { return ::simd_mul(x, y); }
   1068   static SIMD_CPPFUNC double4 operator*(const double3 x, const double4x3 y) { return ::simd_mul(x, y); }
   1069   static SIMD_CPPFUNC double2 operator*(const double4 x, const double2x4 y) { return ::simd_mul(x, y); }
   1070   static SIMD_CPPFUNC double3 operator*(const double4 x, const double3x4 y) { return ::simd_mul(x, y); }
   1071   static SIMD_CPPFUNC double4 operator*(const double4 x, const double4x4 y) { return ::simd_mul(x, y); }
   1072   static SIMD_CPPFUNC double2 operator*(const double2x2 x, const double2 y) { return ::simd_mul(x, y); }
   1073   static SIMD_CPPFUNC double2 operator*(const double3x2 x, const double3 y) { return ::simd_mul(x, y); }
   1074   static SIMD_CPPFUNC double2 operator*(const double4x2 x, const double4 y) { return ::simd_mul(x, y); }
   1075   static SIMD_CPPFUNC double3 operator*(const double2x3 x, const double2 y) { return ::simd_mul(x, y); }
   1076   static SIMD_CPPFUNC double3 operator*(const double3x3 x, const double3 y) { return ::simd_mul(x, y); }
   1077   static SIMD_CPPFUNC double3 operator*(const double4x3 x, const double4 y) { return ::simd_mul(x, y); }
   1078   static SIMD_CPPFUNC double4 operator*(const double2x4 x, const double2 y) { return ::simd_mul(x, y); }
   1079   static SIMD_CPPFUNC double4 operator*(const double3x4 x, const double3 y) { return ::simd_mul(x, y); }
   1080   static SIMD_CPPFUNC double4 operator*(const double4x4 x, const double4 y) { return ::simd_mul(x, y); }
   1081   static SIMD_INLINE SIMD_NODEBUG double2& operator*=(double2& x, const double2x2 y) { x = ::simd_mul(x, y); return x; }
   1082   static SIMD_INLINE SIMD_NODEBUG double3& operator*=(double3& x, const double3x3 y) { x = ::simd_mul(x, y); return x; }
   1083   static SIMD_INLINE SIMD_NODEBUG double4& operator*=(double4& x, const double4x4 y) { x = ::simd_mul(x, y); return x; }
   1084   
   1085   static SIMD_CPPFUNC double2x2 operator*(const double2x2 x, const double2x2 y) { return ::simd_mul(x, y); }
   1086   static SIMD_CPPFUNC double3x2 operator*(const double2x2 x, const double3x2 y) { return ::simd_mul(x, y); }
   1087   static SIMD_CPPFUNC double4x2 operator*(const double2x2 x, const double4x2 y) { return ::simd_mul(x, y); }
   1088   static SIMD_CPPFUNC double2x3 operator*(const double2x3 x, const double2x2 y) { return ::simd_mul(x, y); }
   1089   static SIMD_CPPFUNC double3x3 operator*(const double2x3 x, const double3x2 y) { return ::simd_mul(x, y); }
   1090   static SIMD_CPPFUNC double4x3 operator*(const double2x3 x, const double4x2 y) { return ::simd_mul(x, y); }
   1091   static SIMD_CPPFUNC double2x4 operator*(const double2x4 x, const double2x2 y) { return ::simd_mul(x, y); }
   1092   static SIMD_CPPFUNC double3x4 operator*(const double2x4 x, const double3x2 y) { return ::simd_mul(x, y); }
   1093   static SIMD_CPPFUNC double4x4 operator*(const double2x4 x, const double4x2 y) { return ::simd_mul(x, y); }
   1094   static SIMD_CPPFUNC double2x2 operator*(const double3x2 x, const double2x3 y) { return ::simd_mul(x, y); }
   1095   static SIMD_CPPFUNC double3x2 operator*(const double3x2 x, const double3x3 y) { return ::simd_mul(x, y); }
   1096   static SIMD_CPPFUNC double4x2 operator*(const double3x2 x, const double4x3 y) { return ::simd_mul(x, y); }
   1097   static SIMD_CPPFUNC double2x3 operator*(const double3x3 x, const double2x3 y) { return ::simd_mul(x, y); }
   1098   static SIMD_CPPFUNC double3x3 operator*(const double3x3 x, const double3x3 y) { return ::simd_mul(x, y); }
   1099   static SIMD_CPPFUNC double4x3 operator*(const double3x3 x, const double4x3 y) { return ::simd_mul(x, y); }
   1100   static SIMD_CPPFUNC double2x4 operator*(const double3x4 x, const double2x3 y) { return ::simd_mul(x, y); }
   1101   static SIMD_CPPFUNC double3x4 operator*(const double3x4 x, const double3x3 y) { return ::simd_mul(x, y); }
   1102   static SIMD_CPPFUNC double4x4 operator*(const double3x4 x, const double4x3 y) { return ::simd_mul(x, y); }
   1103   static SIMD_CPPFUNC double2x2 operator*(const double4x2 x, const double2x4 y) { return ::simd_mul(x, y); }
   1104   static SIMD_CPPFUNC double3x2 operator*(const double4x2 x, const double3x4 y) { return ::simd_mul(x, y); }
   1105   static SIMD_CPPFUNC double4x2 operator*(const double4x2 x, const double4x4 y) { return ::simd_mul(x, y); }
   1106   static SIMD_CPPFUNC double2x3 operator*(const double4x3 x, const double2x4 y) { return ::simd_mul(x, y); }
   1107   static SIMD_CPPFUNC double3x3 operator*(const double4x3 x, const double3x4 y) { return ::simd_mul(x, y); }
   1108   static SIMD_CPPFUNC double4x3 operator*(const double4x3 x, const double4x4 y) { return ::simd_mul(x, y); }
   1109   static SIMD_CPPFUNC double2x4 operator*(const double4x4 x, const double2x4 y) { return ::simd_mul(x, y); }
   1110   static SIMD_CPPFUNC double3x4 operator*(const double4x4 x, const double3x4 y) { return ::simd_mul(x, y); }
   1111   static SIMD_CPPFUNC double4x4 operator*(const double4x4 x, const double4x4 y) { return ::simd_mul(x, y); }
   1112   static SIMD_INLINE SIMD_NODEBUG double2x2& operator*=(double2x2& x, const double2x2 y) { x = ::simd_mul(x, y); return x; }
   1113   static SIMD_INLINE SIMD_NODEBUG double2x3& operator*=(double2x3& x, const double2x2 y) { x = ::simd_mul(x, y); return x; }
   1114   static SIMD_INLINE SIMD_NODEBUG double2x4& operator*=(double2x4& x, const double2x2 y) { x = ::simd_mul(x, y); return x; }
   1115   static SIMD_INLINE SIMD_NODEBUG double3x2& operator*=(double3x2& x, const double3x3 y) { x = ::simd_mul(x, y); return x; }
   1116   static SIMD_INLINE SIMD_NODEBUG double3x3& operator*=(double3x3& x, const double3x3 y) { x = ::simd_mul(x, y); return x; }
   1117   static SIMD_INLINE SIMD_NODEBUG double3x4& operator*=(double3x4& x, const double3x3 y) { x = ::simd_mul(x, y); return x; }
   1118   static SIMD_INLINE SIMD_NODEBUG double4x2& operator*=(double4x2& x, const double4x4 y) { x = ::simd_mul(x, y); return x; }
   1119   static SIMD_INLINE SIMD_NODEBUG double4x3& operator*=(double4x3& x, const double4x4 y) { x = ::simd_mul(x, y); return x; }
   1120   static SIMD_INLINE SIMD_NODEBUG double4x4& operator*=(double4x4& x, const double4x4 y) { x = ::simd_mul(x, y); return x; }
   1121   
   1122   static SIMD_CPPFUNC bool operator==(const double2x2& x, const double2x2& y) { return ::simd_equal(x, y); }
   1123   static SIMD_CPPFUNC bool operator==(const double2x3& x, const double2x3& y) { return ::simd_equal(x, y); }
   1124   static SIMD_CPPFUNC bool operator==(const double2x4& x, const double2x4& y) { return ::simd_equal(x, y); }
   1125   static SIMD_CPPFUNC bool operator==(const double3x2& x, const double3x2& y) { return ::simd_equal(x, y); }
   1126   static SIMD_CPPFUNC bool operator==(const double3x3& x, const double3x3& y) { return ::simd_equal(x, y); }
   1127   static SIMD_CPPFUNC bool operator==(const double3x4& x, const double3x4& y) { return ::simd_equal(x, y); }
   1128   static SIMD_CPPFUNC bool operator==(const double4x2& x, const double4x2& y) { return ::simd_equal(x, y); }
   1129   static SIMD_CPPFUNC bool operator==(const double4x3& x, const double4x3& y) { return ::simd_equal(x, y); }
   1130   static SIMD_CPPFUNC bool operator==(const double4x4& x, const double4x4& y) { return ::simd_equal(x, y); }
   1131   
   1132   static SIMD_CPPFUNC bool operator!=(const double2x2& x, const double2x2& y) { return !(x == y); }
   1133   static SIMD_CPPFUNC bool operator!=(const double2x3& x, const double2x3& y) { return !(x == y); }
   1134   static SIMD_CPPFUNC bool operator!=(const double2x4& x, const double2x4& y) { return !(x == y); }
   1135   static SIMD_CPPFUNC bool operator!=(const double3x2& x, const double3x2& y) { return !(x == y); }
   1136   static SIMD_CPPFUNC bool operator!=(const double3x3& x, const double3x3& y) { return !(x == y); }
   1137   static SIMD_CPPFUNC bool operator!=(const double3x4& x, const double3x4& y) { return !(x == y); }
   1138   static SIMD_CPPFUNC bool operator!=(const double4x2& x, const double4x2& y) { return !(x == y); }
   1139   static SIMD_CPPFUNC bool operator!=(const double4x3& x, const double4x3& y) { return !(x == y); }
   1140   static SIMD_CPPFUNC bool operator!=(const double4x4& x, const double4x4& y) { return !(x == y); }
   1141   
   1142   static SIMD_CPPFUNC bool almost_equal_elements(const double2x2 x, const double2x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1143   static SIMD_CPPFUNC bool almost_equal_elements(const double2x3 x, const double2x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1144   static SIMD_CPPFUNC bool almost_equal_elements(const double2x4 x, const double2x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1145   static SIMD_CPPFUNC bool almost_equal_elements(const double3x2 x, const double3x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1146   static SIMD_CPPFUNC bool almost_equal_elements(const double3x3 x, const double3x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1147   static SIMD_CPPFUNC bool almost_equal_elements(const double3x4 x, const double3x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1148   static SIMD_CPPFUNC bool almost_equal_elements(const double4x2 x, const double4x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1149   static SIMD_CPPFUNC bool almost_equal_elements(const double4x3 x, const double4x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1150   static SIMD_CPPFUNC bool almost_equal_elements(const double4x4 x, const double4x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); }
   1151   
   1152   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x2 x, const double2x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1153   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x3 x, const double2x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1154   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x4 x, const double2x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1155   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x2 x, const double3x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1156   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x3 x, const double3x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1157   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x4 x, const double3x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1158   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x2 x, const double4x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1159   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x3 x, const double4x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1160   static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x4 x, const double4x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); }
   1161 }
   1162 
   1163 extern "C" {
   1164 #endif /* __cplusplus */
   1165 
   1166 #pragma mark - Implementation
   1167 
   1168 static  simd_half2x2 SIMD_CFUNC simd_diagonal_matrix(simd_half2  __x) {  simd_half2x2 __r = { .columns[0] = {__x.x,0}, .columns[1] = {0,__x.y} }; return __r; }
   1169 static  simd_float2x2 SIMD_CFUNC simd_diagonal_matrix(simd_float2  __x) {  simd_float2x2 __r = { .columns[0] = {__x.x,0}, .columns[1] = {0,__x.y} }; return __r; }
   1170 static simd_double2x2 SIMD_CFUNC simd_diagonal_matrix(simd_double2 __x) { simd_double2x2 __r = { .columns[0] = {__x.x,0}, .columns[1] = {0,__x.y} }; return __r; }
   1171 static  simd_half3x3 SIMD_CFUNC simd_diagonal_matrix(simd_half3  __x) {  simd_half3x3 __r = { .columns[0] = {__x.x,0,0}, .columns[1] = {0,__x.y,0}, .columns[2] = {0,0,__x.z} }; return __r; }
   1172 static  simd_float3x3 SIMD_CFUNC simd_diagonal_matrix(simd_float3  __x) {  simd_float3x3 __r = { .columns[0] = {__x.x,0,0}, .columns[1] = {0,__x.y,0}, .columns[2] = {0,0,__x.z} }; return __r; }
   1173 static simd_double3x3 SIMD_CFUNC simd_diagonal_matrix(simd_double3 __x) { simd_double3x3 __r = { .columns[0] = {__x.x,0,0}, .columns[1] = {0,__x.y,0}, .columns[2] = {0,0,__x.z} }; return __r; }
   1174 static  simd_half4x4 SIMD_CFUNC simd_diagonal_matrix(simd_half4  __x) {  simd_half4x4 __r = { .columns[0] = {__x.x,0,0,0}, .columns[1] = {0,__x.y,0,0}, .columns[2] = {0,0,__x.z,0}, .columns[3] = {0,0,0,__x.w} }; return __r; }
   1175 static  simd_float4x4 SIMD_CFUNC simd_diagonal_matrix(simd_float4  __x) {  simd_float4x4 __r = { .columns[0] = {__x.x,0,0,0}, .columns[1] = {0,__x.y,0,0}, .columns[2] = {0,0,__x.z,0}, .columns[3] = {0,0,0,__x.w} }; return __r; }
   1176 static simd_double4x4 SIMD_CFUNC simd_diagonal_matrix(simd_double4 __x) { simd_double4x4 __r = { .columns[0] = {__x.x,0,0,0}, .columns[1] = {0,__x.y,0,0}, .columns[2] = {0,0,__x.z,0}, .columns[3] = {0,0,0,__x.w} }; return __r; }
   1177 
   1178 static  simd_half2x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1) {  simd_half2x2 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1179 static  simd_half2x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1) {  simd_half2x3 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1180 static  simd_half2x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1) {  simd_half2x4 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1181 static  simd_float2x2 SIMD_CFUNC simd_matrix(simd_float2  col0, simd_float2  col1) {  simd_float2x2 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1182 static  simd_float2x3 SIMD_CFUNC simd_matrix(simd_float3  col0, simd_float3  col1) {  simd_float2x3 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1183 static  simd_float2x4 SIMD_CFUNC simd_matrix(simd_float4  col0, simd_float4  col1) {  simd_float2x4 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1184 static simd_double2x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1) { simd_double2x2 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1185 static simd_double2x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1) { simd_double2x3 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1186 static simd_double2x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1) { simd_double2x4 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; }
   1187 static  simd_half3x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1, simd_half2  col2) {  simd_half3x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1188 static  simd_half3x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1, simd_half3  col2) {  simd_half3x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1189 static  simd_half3x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1, simd_half4  col2) {  simd_half3x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1190 static  simd_float3x2 SIMD_CFUNC simd_matrix(simd_float2  col0, simd_float2  col1, simd_float2  col2) {  simd_float3x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1191 static  simd_float3x3 SIMD_CFUNC simd_matrix(simd_float3  col0, simd_float3  col1, simd_float3  col2) {  simd_float3x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1192 static  simd_float3x4 SIMD_CFUNC simd_matrix(simd_float4  col0, simd_float4  col1, simd_float4  col2) {  simd_float3x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1193 static simd_double3x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2) { simd_double3x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1194 static simd_double3x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2) { simd_double3x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1195 static simd_double3x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2) { simd_double3x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; }
   1196 static  simd_half4x2 SIMD_CFUNC simd_matrix(simd_half2  col0, simd_half2  col1, simd_half2  col2, simd_half2  col3) {  simd_half4x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1197 static  simd_half4x3 SIMD_CFUNC simd_matrix(simd_half3  col0, simd_half3  col1, simd_half3  col2, simd_half3  col3) {  simd_half4x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1198 static  simd_half4x4 SIMD_CFUNC simd_matrix(simd_half4  col0, simd_half4  col1, simd_half4  col2, simd_half4  col3) {  simd_half4x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1199 static  simd_float4x2 SIMD_CFUNC simd_matrix(simd_float2  col0, simd_float2  col1, simd_float2  col2, simd_float2  col3) {  simd_float4x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1200 static  simd_float4x3 SIMD_CFUNC simd_matrix(simd_float3  col0, simd_float3  col1, simd_float3  col2, simd_float3  col3) {  simd_float4x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1201 static  simd_float4x4 SIMD_CFUNC simd_matrix(simd_float4  col0, simd_float4  col1, simd_float4  col2, simd_float4  col3) {  simd_float4x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1202 static simd_double4x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2, simd_double2 col3) { simd_double4x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1203 static simd_double4x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2, simd_double3 col3) { simd_double4x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1204 static simd_double4x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2, simd_double4 col3) { simd_double4x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; }
   1205 
   1206 static  simd_half2x2 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1207 static  simd_half3x2 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1208 static  simd_half4x2 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1209 static  simd_float2x2 SIMD_CFUNC simd_matrix_from_rows(simd_float2  row0, simd_float2  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1210 static  simd_float3x2 SIMD_CFUNC simd_matrix_from_rows(simd_float3  row0, simd_float3  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1211 static  simd_float4x2 SIMD_CFUNC simd_matrix_from_rows(simd_float4  row0, simd_float4  row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1212 static simd_double2x2 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1213 static simd_double3x2 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1214 static simd_double4x2 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1) { return simd_transpose(simd_matrix(row0, row1)); }
   1215 static  simd_half2x3 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1, simd_half2  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1216 static  simd_half3x3 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1, simd_half3  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1217 static  simd_half4x3 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1, simd_half4  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1218 static  simd_float2x3 SIMD_CFUNC simd_matrix_from_rows(simd_float2  row0, simd_float2  row1, simd_float2  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1219 static  simd_float3x3 SIMD_CFUNC simd_matrix_from_rows(simd_float3  row0, simd_float3  row1, simd_float3  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1220 static  simd_float4x3 SIMD_CFUNC simd_matrix_from_rows(simd_float4  row0, simd_float4  row1, simd_float4  row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1221 static simd_double2x3 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1222 static simd_double3x3 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1223 static simd_double4x3 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); }
   1224 static  simd_half2x4 SIMD_CFUNC simd_matrix_from_rows(simd_half2  row0, simd_half2  row1, simd_half2  row2, simd_half2  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1225 static  simd_half3x4 SIMD_CFUNC simd_matrix_from_rows(simd_half3  row0, simd_half3  row1, simd_half3  row2, simd_half3  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1226 static  simd_half4x4 SIMD_CFUNC simd_matrix_from_rows(simd_half4  row0, simd_half4  row1, simd_half4  row2, simd_half4  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1227 static  simd_float2x4 SIMD_CFUNC simd_matrix_from_rows(simd_float2  row0, simd_float2  row1, simd_float2  row2, simd_float2  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1228 static  simd_float3x4 SIMD_CFUNC simd_matrix_from_rows(simd_float3  row0, simd_float3  row1, simd_float3  row2, simd_float3  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1229 static  simd_float4x4 SIMD_CFUNC simd_matrix_from_rows(simd_float4  row0, simd_float4  row1, simd_float4  row2, simd_float4  row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1230 static simd_double2x4 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2, simd_double2 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1231 static simd_double3x4 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2, simd_double3 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1232 static simd_double4x4 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2, simd_double4 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); }
   1233   
   1234 static  simd_half3x3 SIMD_NOINLINE simd_matrix3x3(simd_quath q) {
   1235   simd_half4x4 r = simd_matrix4x4(q);
   1236   return (simd_half3x3){ r.columns[0].xyz, r.columns[1].xyz, r.columns[2].xyz };
   1237 }
   1238 
   1239 static  simd_half4x4 SIMD_NOINLINE simd_matrix4x4(simd_quath q) {
   1240   simd_half4 v = q.vector;
   1241   simd_half4x4 r = {
   1242     .columns[0] = { v.x*v.x - v.y*v.y - v.z*v.z + v.w*v.w,
   1243                         2*(v.x*v.y + v.z*v.w),
   1244                         2*(v.x*v.z - v.y*v.w), 0 },
   1245     .columns[1] = {     2*(v.x*v.y - v.z*v.w),
   1246                     v.y*v.y - v.z*v.z + v.w*v.w - v.x*v.x,
   1247                         2*(v.y*v.z + v.x*v.w), 0 },
   1248     .columns[2] = {     2*(v.z*v.x + v.y*v.w),
   1249                         2*(v.y*v.z - v.x*v.w),
   1250                     v.z*v.z + v.w*v.w - v.x*v.x - v.y*v.y, 0 },
   1251     .columns[3] = { 0, 0, 0, 1 }
   1252   };
   1253   return r;
   1254 }
   1255 
   1256 static  simd_float3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatf q) {
   1257   simd_float4x4 r = simd_matrix4x4(q);
   1258   return (simd_float3x3){ r.columns[0].xyz, r.columns[1].xyz, r.columns[2].xyz };
   1259 }
   1260 
   1261 static  simd_float4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatf q) {
   1262   simd_float4 v = q.vector;
   1263   simd_float4x4 r = {
   1264     .columns[0] = { v.x*v.x - v.y*v.y - v.z*v.z + v.w*v.w,
   1265                         2*(v.x*v.y + v.z*v.w),
   1266                         2*(v.x*v.z - v.y*v.w), 0 },
   1267     .columns[1] = {     2*(v.x*v.y - v.z*v.w),
   1268                     v.y*v.y - v.z*v.z + v.w*v.w - v.x*v.x,
   1269                         2*(v.y*v.z + v.x*v.w), 0 },
   1270     .columns[2] = {     2*(v.z*v.x + v.y*v.w),
   1271                         2*(v.y*v.z - v.x*v.w),
   1272                     v.z*v.z + v.w*v.w - v.x*v.x - v.y*v.y, 0 },
   1273     .columns[3] = { 0, 0, 0, 1 }
   1274   };
   1275   return r;
   1276 }
   1277   
   1278 static simd_double3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatd q) {
   1279   simd_double4x4 r = simd_matrix4x4(q);
   1280   return (simd_double3x3){ r.columns[0].xyz, r.columns[1].xyz, r.columns[2].xyz };
   1281 }
   1282 
   1283 static simd_double4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatd q) {
   1284   simd_double4 v = q.vector;
   1285   simd_double4x4 r = {
   1286     .columns[0] = { v.x*v.x - v.y*v.y - v.z*v.z + v.w*v.w,
   1287                         2*(v.x*v.y + v.z*v.w),
   1288                         2*(v.x*v.z - v.y*v.w), 0 },
   1289     .columns[1] = {     2*(v.x*v.y - v.z*v.w),
   1290                     v.y*v.y - v.z*v.z + v.w*v.w - v.x*v.x,
   1291                         2*(v.y*v.z + v.x*v.w), 0 },
   1292     .columns[2] = {     2*(v.z*v.x + v.y*v.w),
   1293                         2*(v.y*v.z - v.x*v.w),
   1294                     v.z*v.z + v.w*v.w - v.x*v.x - v.y*v.y, 0 },
   1295     .columns[3] = { 0, 0, 0, 1 }
   1296   };
   1297   return r;
   1298 }
   1299 
   1300 static   simd_half2x2 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1301 static   simd_half3x2 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1302 static   simd_half4x2 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1303 static   simd_half2x3 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1304 static   simd_half3x3 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1305 static   simd_half4x3 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1306 static   simd_half2x4 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1307 static   simd_half3x4 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1308 static   simd_half4x4 SIMD_CFUNC matrix_scale(_Float16  __a, simd_half4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1309 static  simd_float2x2 SIMD_CFUNC matrix_scale(float  __a,  simd_float2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1310 static  simd_float3x2 SIMD_CFUNC matrix_scale(float  __a,  simd_float3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1311 static  simd_float4x2 SIMD_CFUNC matrix_scale(float  __a,  simd_float4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1312 static  simd_float2x3 SIMD_CFUNC matrix_scale(float  __a,  simd_float2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1313 static  simd_float3x3 SIMD_CFUNC matrix_scale(float  __a,  simd_float3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1314 static  simd_float4x3 SIMD_CFUNC matrix_scale(float  __a,  simd_float4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1315 static  simd_float2x4 SIMD_CFUNC matrix_scale(float  __a,  simd_float2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1316 static  simd_float3x4 SIMD_CFUNC matrix_scale(float  __a,  simd_float3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1317 static  simd_float4x4 SIMD_CFUNC matrix_scale(float  __a,  simd_float4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1318 static simd_double2x2 SIMD_CFUNC matrix_scale(double __a, simd_double2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1319 static simd_double3x2 SIMD_CFUNC matrix_scale(double __a, simd_double3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1320 static simd_double4x2 SIMD_CFUNC matrix_scale(double __a, simd_double4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1321 static simd_double2x3 SIMD_CFUNC matrix_scale(double __a, simd_double2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1322 static simd_double3x3 SIMD_CFUNC matrix_scale(double __a, simd_double3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1323 static simd_double4x3 SIMD_CFUNC matrix_scale(double __a, simd_double4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1324 static simd_double2x4 SIMD_CFUNC matrix_scale(double __a, simd_double2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1325 static simd_double3x4 SIMD_CFUNC matrix_scale(double __a, simd_double3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1326 static simd_double4x4 SIMD_CFUNC matrix_scale(double __a, simd_double4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1327   
   1328 static   simd_half2x2 SIMD_CFUNC simd_mul(_Float16  __a, simd_half2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1329 static   simd_half3x2 SIMD_CFUNC simd_mul(_Float16  __a, simd_half3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1330 static   simd_half4x2 SIMD_CFUNC simd_mul(_Float16  __a, simd_half4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1331 static   simd_half2x3 SIMD_CFUNC simd_mul(_Float16  __a, simd_half2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1332 static   simd_half3x3 SIMD_CFUNC simd_mul(_Float16  __a, simd_half3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1333 static   simd_half4x3 SIMD_CFUNC simd_mul(_Float16  __a, simd_half4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1334 static   simd_half2x4 SIMD_CFUNC simd_mul(_Float16  __a, simd_half2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1335 static   simd_half3x4 SIMD_CFUNC simd_mul(_Float16  __a, simd_half3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1336 static   simd_half4x4 SIMD_CFUNC simd_mul(_Float16  __a, simd_half4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1337 static  simd_float2x2 SIMD_CFUNC simd_mul(float  __a,  simd_float2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1338 static  simd_float3x2 SIMD_CFUNC simd_mul(float  __a,  simd_float3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1339 static  simd_float4x2 SIMD_CFUNC simd_mul(float  __a,  simd_float4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1340 static  simd_float2x3 SIMD_CFUNC simd_mul(float  __a,  simd_float2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1341 static  simd_float3x3 SIMD_CFUNC simd_mul(float  __a,  simd_float3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1342 static  simd_float4x3 SIMD_CFUNC simd_mul(float  __a,  simd_float4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1343 static  simd_float2x4 SIMD_CFUNC simd_mul(float  __a,  simd_float2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1344 static  simd_float3x4 SIMD_CFUNC simd_mul(float  __a,  simd_float3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1345 static  simd_float4x4 SIMD_CFUNC simd_mul(float  __a,  simd_float4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1346 static simd_double2x2 SIMD_CFUNC simd_mul(double __a, simd_double2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1347 static simd_double3x2 SIMD_CFUNC simd_mul(double __a, simd_double3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1348 static simd_double4x2 SIMD_CFUNC simd_mul(double __a, simd_double4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1349 static simd_double2x3 SIMD_CFUNC simd_mul(double __a, simd_double2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1350 static simd_double3x3 SIMD_CFUNC simd_mul(double __a, simd_double3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1351 static simd_double4x3 SIMD_CFUNC simd_mul(double __a, simd_double4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1352 static simd_double2x4 SIMD_CFUNC simd_mul(double __a, simd_double2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; }
   1353 static simd_double3x4 SIMD_CFUNC simd_mul(double __a, simd_double3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; }
   1354 static simd_double4x4 SIMD_CFUNC simd_mul(double __a, simd_double4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; }
   1355 
   1356 static  simd_half2x2 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half2x2 __x, _Float16  __b,  simd_half2x2 __y) {
   1357     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1358     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1359     return __x;
   1360 }
   1361 static  simd_half3x2 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half3x2 __x, _Float16  __b,  simd_half3x2 __y) {
   1362     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1363     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1364     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1365     return __x;
   1366 }
   1367 static  simd_half4x2 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half4x2 __x, _Float16  __b,  simd_half4x2 __y) {
   1368     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1369     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1370     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1371     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1372     return __x;
   1373 }
   1374 static  simd_half2x3 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half2x3 __x, _Float16  __b,  simd_half2x3 __y) {
   1375     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1376     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1377     return __x;
   1378 }
   1379 static  simd_half3x3 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half3x3 __x, _Float16  __b,  simd_half3x3 __y) {
   1380     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1381     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1382     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1383     return __x;
   1384 }
   1385 static  simd_half4x3 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half4x3 __x, _Float16  __b,  simd_half4x3 __y) {
   1386     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1387     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1388     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1389     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1390     return __x;
   1391 }
   1392 static  simd_half2x4 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half2x4 __x, _Float16  __b,  simd_half2x4 __y) {
   1393     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1394     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1395     return __x;
   1396 }
   1397 static  simd_half3x4 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half3x4 __x, _Float16  __b,  simd_half3x4 __y) {
   1398     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1399     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1400     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1401     return __x;
   1402 }
   1403 static  simd_half4x4 SIMD_CFUNC simd_linear_combination(_Float16  __a,  simd_half4x4 __x, _Float16  __b,  simd_half4x4 __y) {
   1404     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1405     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1406     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1407     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1408     return __x;
   1409 }
   1410 static  simd_float2x2 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float2x2 __x, float  __b,  simd_float2x2 __y) {
   1411     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1412     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1413     return __x;
   1414 }
   1415 static  simd_float3x2 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float3x2 __x, float  __b,  simd_float3x2 __y) {
   1416     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1417     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1418     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1419     return __x;
   1420 }
   1421 static  simd_float4x2 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float4x2 __x, float  __b,  simd_float4x2 __y) {
   1422     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1423     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1424     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1425     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1426     return __x;
   1427 }
   1428 static  simd_float2x3 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float2x3 __x, float  __b,  simd_float2x3 __y) {
   1429     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1430     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1431     return __x;
   1432 }
   1433 static  simd_float3x3 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float3x3 __x, float  __b,  simd_float3x3 __y) {
   1434     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1435     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1436     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1437     return __x;
   1438 }
   1439 static  simd_float4x3 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float4x3 __x, float  __b,  simd_float4x3 __y) {
   1440     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1441     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1442     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1443     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1444     return __x;
   1445 }
   1446 static  simd_float2x4 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float2x4 __x, float  __b,  simd_float2x4 __y) {
   1447     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1448     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1449     return __x;
   1450 }
   1451 static  simd_float3x4 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float3x4 __x, float  __b,  simd_float3x4 __y) {
   1452     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1453     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1454     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1455     return __x;
   1456 }
   1457 static  simd_float4x4 SIMD_CFUNC simd_linear_combination(float  __a,  simd_float4x4 __x, float  __b,  simd_float4x4 __y) {
   1458     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1459     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1460     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1461     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1462     return __x;
   1463 }
   1464 static simd_double2x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x2 __x, double __b, simd_double2x2 __y) {
   1465     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1466     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1467     return __x;
   1468 }
   1469 static simd_double3x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x2 __x, double __b, simd_double3x2 __y) {
   1470     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1471     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1472     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1473     return __x;
   1474 }
   1475 static simd_double4x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x2 __x, double __b, simd_double4x2 __y) {
   1476     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1477     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1478     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1479     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1480     return __x;
   1481 }
   1482 static simd_double2x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x3 __x, double __b, simd_double2x3 __y) {
   1483     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1484     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1485     return __x;
   1486 }
   1487 static simd_double3x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x3 __x, double __b, simd_double3x3 __y) {
   1488     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1489     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1490     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1491     return __x;
   1492 }
   1493 static simd_double4x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x3 __x, double __b, simd_double4x3 __y) {
   1494     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1495     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1496     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1497     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1498     return __x;
   1499 }
   1500 static simd_double2x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x4 __x, double __b, simd_double2x4 __y) {
   1501     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1502     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1503     return __x;
   1504 }
   1505 static simd_double3x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x4 __x, double __b, simd_double3x4 __y) {
   1506     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1507     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1508     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1509     return __x;
   1510 }
   1511 static simd_double4x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x4 __x, double __b, simd_double4x4 __y) {
   1512     __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0];
   1513     __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1];
   1514     __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2];
   1515     __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3];
   1516     return __x;
   1517 }
   1518   
   1519 static simd_half2x2 SIMD_CFUNC simd_add(simd_half2x2 __x, simd_half2x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1520 static simd_half3x2 SIMD_CFUNC simd_add(simd_half3x2 __x, simd_half3x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1521 static simd_half4x2 SIMD_CFUNC simd_add(simd_half4x2 __x, simd_half4x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1522 static simd_half2x3 SIMD_CFUNC simd_add(simd_half2x3 __x, simd_half2x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1523 static simd_half3x3 SIMD_CFUNC simd_add(simd_half3x3 __x, simd_half3x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1524 static simd_half4x3 SIMD_CFUNC simd_add(simd_half4x3 __x, simd_half4x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1525 static simd_half2x4 SIMD_CFUNC simd_add(simd_half2x4 __x, simd_half2x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1526 static simd_half3x4 SIMD_CFUNC simd_add(simd_half3x4 __x, simd_half3x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1527 static simd_half4x4 SIMD_CFUNC simd_add(simd_half4x4 __x, simd_half4x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1528 static simd_float2x2 SIMD_CFUNC simd_add(simd_float2x2 __x, simd_float2x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1529 static simd_float3x2 SIMD_CFUNC simd_add(simd_float3x2 __x, simd_float3x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1530 static simd_float4x2 SIMD_CFUNC simd_add(simd_float4x2 __x, simd_float4x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1531 static simd_float2x3 SIMD_CFUNC simd_add(simd_float2x3 __x, simd_float2x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1532 static simd_float3x3 SIMD_CFUNC simd_add(simd_float3x3 __x, simd_float3x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1533 static simd_float4x3 SIMD_CFUNC simd_add(simd_float4x3 __x, simd_float4x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1534 static simd_float2x4 SIMD_CFUNC simd_add(simd_float2x4 __x, simd_float2x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1535 static simd_float3x4 SIMD_CFUNC simd_add(simd_float3x4 __x, simd_float3x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1536 static simd_float4x4 SIMD_CFUNC simd_add(simd_float4x4 __x, simd_float4x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1537 static simd_double2x2 SIMD_CFUNC simd_add(simd_double2x2 __x, simd_double2x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1538 static simd_double3x2 SIMD_CFUNC simd_add(simd_double3x2 __x, simd_double3x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1539 static simd_double4x2 SIMD_CFUNC simd_add(simd_double4x2 __x, simd_double4x2 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1540 static simd_double2x3 SIMD_CFUNC simd_add(simd_double2x3 __x, simd_double2x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1541 static simd_double3x3 SIMD_CFUNC simd_add(simd_double3x3 __x, simd_double3x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1542 static simd_double4x3 SIMD_CFUNC simd_add(simd_double4x3 __x, simd_double4x3 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1543 static simd_double2x4 SIMD_CFUNC simd_add(simd_double2x4 __x, simd_double2x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1544 static simd_double3x4 SIMD_CFUNC simd_add(simd_double3x4 __x, simd_double3x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1545 static simd_double4x4 SIMD_CFUNC simd_add(simd_double4x4 __x, simd_double4x4 __y) { return simd_linear_combination(1, __x, 1, __y); }
   1546       
   1547 static simd_half2x2 SIMD_CFUNC simd_sub(simd_half2x2 __x, simd_half2x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1548 static simd_half3x2 SIMD_CFUNC simd_sub(simd_half3x2 __x, simd_half3x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1549 static simd_half4x2 SIMD_CFUNC simd_sub(simd_half4x2 __x, simd_half4x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1550 static simd_half2x3 SIMD_CFUNC simd_sub(simd_half2x3 __x, simd_half2x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1551 static simd_half3x3 SIMD_CFUNC simd_sub(simd_half3x3 __x, simd_half3x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1552 static simd_half4x3 SIMD_CFUNC simd_sub(simd_half4x3 __x, simd_half4x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1553 static simd_half2x4 SIMD_CFUNC simd_sub(simd_half2x4 __x, simd_half2x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1554 static simd_half3x4 SIMD_CFUNC simd_sub(simd_half3x4 __x, simd_half3x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1555 static simd_half4x4 SIMD_CFUNC simd_sub(simd_half4x4 __x, simd_half4x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1556 static simd_float2x2 SIMD_CFUNC simd_sub(simd_float2x2 __x, simd_float2x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1557 static simd_float3x2 SIMD_CFUNC simd_sub(simd_float3x2 __x, simd_float3x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1558 static simd_float4x2 SIMD_CFUNC simd_sub(simd_float4x2 __x, simd_float4x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1559 static simd_float2x3 SIMD_CFUNC simd_sub(simd_float2x3 __x, simd_float2x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1560 static simd_float3x3 SIMD_CFUNC simd_sub(simd_float3x3 __x, simd_float3x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1561 static simd_float4x3 SIMD_CFUNC simd_sub(simd_float4x3 __x, simd_float4x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1562 static simd_float2x4 SIMD_CFUNC simd_sub(simd_float2x4 __x, simd_float2x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1563 static simd_float3x4 SIMD_CFUNC simd_sub(simd_float3x4 __x, simd_float3x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1564 static simd_float4x4 SIMD_CFUNC simd_sub(simd_float4x4 __x, simd_float4x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1565 static simd_double2x2 SIMD_CFUNC simd_sub(simd_double2x2 __x, simd_double2x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1566 static simd_double3x2 SIMD_CFUNC simd_sub(simd_double3x2 __x, simd_double3x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1567 static simd_double4x2 SIMD_CFUNC simd_sub(simd_double4x2 __x, simd_double4x2 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1568 static simd_double2x3 SIMD_CFUNC simd_sub(simd_double2x3 __x, simd_double2x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1569 static simd_double3x3 SIMD_CFUNC simd_sub(simd_double3x3 __x, simd_double3x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1570 static simd_double4x3 SIMD_CFUNC simd_sub(simd_double4x3 __x, simd_double4x3 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1571 static simd_double2x4 SIMD_CFUNC simd_sub(simd_double2x4 __x, simd_double2x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1572 static simd_double3x4 SIMD_CFUNC simd_sub(simd_double3x4 __x, simd_double3x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1573 static simd_double4x4 SIMD_CFUNC simd_sub(simd_double4x4 __x, simd_double4x4 __y) { return simd_linear_combination(1, __x, -1, __y); }
   1574 
   1575 static simd_half2x2 SIMD_CFUNC simd_transpose(simd_half2x2 __x) {
   1576     simd_half4 __x0, __x1;
   1577     __x0.xy = __x.columns[0];
   1578     __x1.xy = __x.columns[1];
   1579     simd_half4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] };
   1580     return simd_matrix(__r01.lo, __r01.hi);
   1581 }
   1582 
   1583 static simd_half3x2 SIMD_CFUNC simd_transpose(simd_half2x3 __x) {
   1584     simd_half4 __x0, __x1;
   1585     __x0.xyz = __x.columns[0];
   1586     __x1.xyz = __x.columns[1];
   1587 #if defined __arm64e__
   1588     simd_half4 __r01 = vzip1_f16(__x0, __x1);
   1589     simd_half4 __r2x = vzip2_f16(__x0, __x1);
   1590 #else
   1591     simd_half4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] };
   1592     simd_half4 __r2x = { __x0[2], __x1[2] };
   1593 #endif
   1594     return simd_matrix(__r01.lo, __r01.hi, __r2x.lo);
   1595 }
   1596 
   1597 static simd_half4x2 SIMD_CFUNC simd_transpose(simd_half2x4 __x) {
   1598 #if defined __arm64e__
   1599     simd_half4 __r01 = vzip1_f16(__x.columns[0], __x.columns[1]);
   1600     simd_half4 __r23 = vzip2_f16(__x.columns[0], __x.columns[1]);
   1601 #else
   1602     simd_half4 __r01 = { __x.columns[0][0], __x.columns[1][0], __x.columns[0][1], __x.columns[1][1] };
   1603     simd_half4 __r23 = { __x.columns[0][2], __x.columns[1][2], __x.columns[0][3], __x.columns[1][3] };
   1604 #endif
   1605     return simd_matrix(__r01.lo, __r01.hi, __r23.lo, __r23.hi);
   1606 }
   1607 
   1608 static simd_half2x3 SIMD_CFUNC simd_transpose(simd_half3x2 __x) {
   1609     simd_half4 __x0, __x1, __x2;
   1610     __x0.xy = __x.columns[0];
   1611     __x1.xy = __x.columns[1];
   1612     __x2.xy = __x.columns[2];
   1613 #if defined __arm64e__
   1614     simd_half4 padding = { 0 };
   1615     simd_half4 __t0 = vzip1_f16(__x0,__x2);
   1616     simd_half4 __t1 = vzip1_f16(__x1,padding);
   1617     simd_half4 __r0 = vzip1_f16(__t0,__t1);
   1618     simd_half4 __r1 = vzip2_f16(__t0,__t1);
   1619 #else
   1620     simd_half4 __r0 = { __x0[0], __x1[0], __x2[0] };
   1621     simd_half4 __r1 = { __x0[1], __x1[1], __x2[1] };
   1622 #endif
   1623     return simd_matrix(__r0.xyz, __r1.xyz);
   1624 }
   1625 
   1626 static simd_half3x3 SIMD_CFUNC simd_transpose(simd_half3x3 __x) {
   1627     simd_half4 __x0, __x1, __x2;
   1628     __x0.xyz = __x.columns[0];
   1629     __x1.xyz = __x.columns[1];
   1630     __x2.xyz = __x.columns[2];
   1631 #if defined __arm64e__
   1632     simd_half4 padding = { 0 };
   1633     simd_half4 __t0 = vzip1_f16(__x0,__x2);
   1634     simd_half4 __t1 = vzip2_f16(__x0,__x2);
   1635     simd_half4 __t2 = vzip1_f16(__x1,padding);
   1636     simd_half4 __t3 = vzip2_f16(__x1,padding);
   1637     simd_half4 __r0 = vzip1_f16(__t0,__t2);
   1638     simd_half4 __r1 = vzip2_f16(__t0,__t2);
   1639     simd_half4 __r2 = vzip1_f16(__t1,__t3);
   1640 #else
   1641     simd_half4 __r0 = {__x0[0], __x1[0], __x2[0]};
   1642     simd_half4 __r1 = {__x0[1], __x1[1], __x2[1]};
   1643     simd_half4 __r2 = {__x0[2], __x1[2], __x2[2]};
   1644 #endif
   1645     return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz);
   1646 }
   1647 
   1648 static simd_half4x3 SIMD_CFUNC simd_transpose(simd_half3x4 __x) {
   1649 #if defined __arm64e__
   1650     simd_half4 padding = { 0 };
   1651     simd_half4 __t0 = vzip1_f16(__x.columns[0],__x.columns[2]);
   1652     simd_half4 __t1 = vzip2_f16(__x.columns[0],__x.columns[2]);
   1653     simd_half4 __t2 = vzip1_f16(__x.columns[1],padding);
   1654     simd_half4 __t3 = vzip2_f16(__x.columns[1],padding);
   1655     simd_half4 __r0 = vzip1_f16(__t0,__t2);
   1656     simd_half4 __r1 = vzip2_f16(__t0,__t2);
   1657     simd_half4 __r2 = vzip1_f16(__t1,__t3);
   1658     simd_half4 __r3 = vzip2_f16(__t1,__t3);
   1659 #else
   1660     simd_half4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0]};
   1661     simd_half4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1]};
   1662     simd_half4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2]};
   1663     simd_half4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3]};
   1664 #endif
   1665     return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz, __r3.xyz);
   1666 }
   1667 
   1668 static simd_half2x4 SIMD_CFUNC simd_transpose(simd_half4x2 __x) {
   1669     simd_half4 __x0, __x1, __x2, __x3;
   1670     __x0.xy = __x.columns[0];
   1671     __x1.xy = __x.columns[1];
   1672     __x2.xy = __x.columns[2];
   1673     __x3.xy = __x.columns[3];
   1674 #if defined __arm64e__
   1675     simd_half4 __t0 = vzip1_f16(__x0,__x2);
   1676     simd_half4 __t1 = vzip1_f16(__x1,__x3);
   1677     simd_half4 __r0 = vzip1_f16(__t0,__t1);
   1678     simd_half4 __r1 = vzip2_f16(__t0,__t1);
   1679 #else
   1680     simd_half4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1681     simd_half4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1682 #endif
   1683     return simd_matrix(__r0,__r1);
   1684 }
   1685 
   1686 static simd_half3x4 SIMD_CFUNC simd_transpose(simd_half4x3 __x) {
   1687     simd_half4 __x0, __x1, __x2, __x3;
   1688     __x0.xyz = __x.columns[0];
   1689     __x1.xyz = __x.columns[1];
   1690     __x2.xyz = __x.columns[2];
   1691     __x3.xyz = __x.columns[3];
   1692 #if defined __arm64e__
   1693     simd_half4 __t0 = vzip1_f16(__x0,__x2);
   1694     simd_half4 __t1 = vzip2_f16(__x0,__x2);
   1695     simd_half4 __t2 = vzip1_f16(__x1,__x3);
   1696     simd_half4 __t3 = vzip2_f16(__x1,__x3);
   1697     simd_half4 __r0 = vzip1_f16(__t0,__t2);
   1698     simd_half4 __r1 = vzip2_f16(__t0,__t2);
   1699     simd_half4 __r2 = vzip1_f16(__t1,__t3);
   1700 #else
   1701     simd_half4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1702     simd_half4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1703     simd_half4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]};
   1704 #endif
   1705     return simd_matrix(__r0,__r1,__r2);
   1706 }
   1707 
   1708 static simd_half4x4 SIMD_CFUNC simd_transpose(simd_half4x4 __x) {
   1709 #if defined __arm64e__
   1710     simd_half4 __t0 = vzip1_f16(__x.columns[0],__x.columns[2]);
   1711     simd_half4 __t1 = vzip2_f16(__x.columns[0],__x.columns[2]);
   1712     simd_half4 __t2 = vzip1_f16(__x.columns[1],__x.columns[3]);
   1713     simd_half4 __t3 = vzip2_f16(__x.columns[1],__x.columns[3]);
   1714     simd_half4 __r0 = vzip1_f16(__t0,__t2);
   1715     simd_half4 __r1 = vzip2_f16(__t0,__t2);
   1716     simd_half4 __r2 = vzip1_f16(__t1,__t3);
   1717     simd_half4 __r3 = vzip2_f16(__t1,__t3);
   1718 #else
   1719     simd_half4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1720     simd_half4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1721     simd_half4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]};
   1722     simd_half4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3], __x.columns[3][3]};
   1723 #endif
   1724     return simd_matrix(__r0,__r1,__r2,__r3);
   1725 }
   1726 
   1727 static simd_float2x2 SIMD_CFUNC simd_transpose(simd_float2x2 __x) {
   1728     simd_float4 __x0, __x1;
   1729     __x0.xy = __x.columns[0];
   1730     __x1.xy = __x.columns[1];
   1731 #if defined __SSE__
   1732     simd_float4 __r01 = _mm_unpacklo_ps(__x0, __x1);
   1733 #elif defined __ARM_NEON__ && defined __arm64__
   1734     simd_float4 __r01 = vzip1q_f32(__x0, __x1);
   1735 #else
   1736     simd_float4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] };
   1737 #endif
   1738     return simd_matrix(__r01.lo, __r01.hi);
   1739 }
   1740     
   1741 static simd_float3x2 SIMD_CFUNC simd_transpose(simd_float2x3 __x) {
   1742     simd_float4 __x0, __x1;
   1743     __x0.xyz = __x.columns[0];
   1744     __x1.xyz = __x.columns[1];
   1745 #if defined __SSE__
   1746     simd_float4 __r01 = _mm_unpacklo_ps(__x0, __x1);
   1747     simd_float4 __r2x = _mm_unpackhi_ps(__x0, __x1);
   1748 #elif defined __ARM_NEON__ && defined __arm64__
   1749     simd_float4 __r01 = vzip1q_f32(__x0, __x1);
   1750     simd_float4 __r2x = vzip2q_f32(__x0, __x1);
   1751 #else
   1752     simd_float4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] };
   1753     simd_float4 __r2x = { __x0[2], __x1[2] };
   1754 #endif
   1755     return simd_matrix(__r01.lo, __r01.hi, __r2x.lo);
   1756 }
   1757     
   1758 static simd_float4x2 SIMD_CFUNC simd_transpose(simd_float2x4 __x) {
   1759 #if defined __SSE__
   1760     simd_float4 __r01 = _mm_unpacklo_ps(__x.columns[0], __x.columns[1]);
   1761     simd_float4 __r23 = _mm_unpackhi_ps(__x.columns[0], __x.columns[1]);
   1762 #elif defined __ARM_NEON__ && defined __arm64__
   1763     simd_float4 __r01 = vzip1q_f32(__x.columns[0], __x.columns[1]);
   1764     simd_float4 __r23 = vzip2q_f32(__x.columns[0], __x.columns[1]);
   1765 #else
   1766     simd_float4 __r01 = { __x.columns[0][0], __x.columns[1][0], __x.columns[0][1], __x.columns[1][1] };
   1767     simd_float4 __r23 = { __x.columns[0][2], __x.columns[1][2], __x.columns[0][3], __x.columns[1][3] };
   1768 #endif
   1769     return simd_matrix(__r01.lo, __r01.hi, __r23.lo, __r23.hi);
   1770 }
   1771     
   1772 static simd_float2x3 SIMD_CFUNC simd_transpose(simd_float3x2 __x) {
   1773     simd_float4 __x0, __x1, __x2;
   1774     __x0.xy = __x.columns[0];
   1775     __x1.xy = __x.columns[1];
   1776     __x2.xy = __x.columns[2];
   1777 #if defined __SSE__
   1778     simd_float4 __t = _mm_unpacklo_ps(__x0, __x1);
   1779     simd_float4 __r0 = _mm_shuffle_ps(__t,__x2,0xc4);
   1780     simd_float4 __r1 = _mm_shuffle_ps(__t,__x2,0xde);
   1781 #elif defined __ARM_NEON__ && defined __arm64__
   1782     simd_float4 padding = { 0 };
   1783     simd_float4 __t0 = vzip1q_f32(__x0,__x2);
   1784     simd_float4 __t1 = vzip1q_f32(__x1,padding);
   1785     simd_float4 __r0 = vzip1q_f32(__t0,__t1);
   1786     simd_float4 __r1 = vzip2q_f32(__t0,__t1);
   1787 #else
   1788     simd_float4 __r0 = { __x0[0], __x1[0], __x2[0] };
   1789     simd_float4 __r1 = { __x0[1], __x1[1], __x2[1] };
   1790 #endif
   1791     return simd_matrix(__r0.xyz, __r1.xyz);
   1792 }
   1793     
   1794 static simd_float3x3 SIMD_CFUNC simd_transpose(simd_float3x3 __x) {
   1795     simd_float4 __x0, __x1, __x2;
   1796     __x0.xyz = __x.columns[0];
   1797     __x1.xyz = __x.columns[1];
   1798     __x2.xyz = __x.columns[2];
   1799 #if defined __SSE__
   1800     simd_float4 __t0 = _mm_unpacklo_ps(__x0, __x1);
   1801     simd_float4 __t1 = _mm_unpackhi_ps(__x0, __x1);
   1802     simd_float4 __r0 = __t0; __r0.hi = __x2.lo;
   1803     simd_float4 __r1 = _mm_shuffle_ps(__t0, __x2, 0xde);
   1804     simd_float4 __r2 = __x2; __r2.lo = __t1.lo;
   1805 #elif defined __ARM_NEON__ && defined __arm64__
   1806     simd_float4 padding = { 0 };
   1807     simd_float4 __t0 = vzip1q_f32(__x0,__x2);
   1808     simd_float4 __t1 = vzip2q_f32(__x0,__x2);
   1809     simd_float4 __t2 = vzip1q_f32(__x1,padding);
   1810     simd_float4 __t3 = vzip2q_f32(__x1,padding);
   1811     simd_float4 __r0 = vzip1q_f32(__t0,__t2);
   1812     simd_float4 __r1 = vzip2q_f32(__t0,__t2);
   1813     simd_float4 __r2 = vzip1q_f32(__t1,__t3);
   1814 #else
   1815     simd_float4 __r0 = {__x0[0], __x1[0], __x2[0]};
   1816     simd_float4 __r1 = {__x0[1], __x1[1], __x2[1]};
   1817     simd_float4 __r2 = {__x0[2], __x1[2], __x2[2]};
   1818 #endif
   1819     return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz);
   1820 }
   1821     
   1822 static simd_float4x3 SIMD_CFUNC simd_transpose(simd_float3x4 __x) {
   1823 #if defined __SSE__
   1824     simd_float4 __t0 = _mm_unpacklo_ps(__x.columns[0],__x.columns[1]); /* 00 10 01 11 */
   1825     simd_float4 __t1 = _mm_unpackhi_ps(__x.columns[0],__x.columns[1]); /* 02 12 03 13 */
   1826     simd_float4 __r0 = __t0; __r0.hi = __x.columns[2].lo;
   1827     simd_float4 __r1 = _mm_shuffle_ps(__t0, __x.columns[2], 0xde);
   1828     simd_float4 __r2 = __x.columns[2]; __r2.lo = __t1.lo;
   1829     simd_float4 __r3 = _mm_shuffle_ps(__t1, __x.columns[2], 0xfe);
   1830 #elif defined __ARM_NEON__ && defined __arm64__
   1831     simd_float4 padding = { 0 };
   1832     simd_float4 __t0 = vzip1q_f32(__x.columns[0],__x.columns[2]);
   1833     simd_float4 __t1 = vzip2q_f32(__x.columns[0],__x.columns[2]);
   1834     simd_float4 __t2 = vzip1q_f32(__x.columns[1],padding);
   1835     simd_float4 __t3 = vzip2q_f32(__x.columns[1],padding);
   1836     simd_float4 __r0 = vzip1q_f32(__t0,__t2);
   1837     simd_float4 __r1 = vzip2q_f32(__t0,__t2);
   1838     simd_float4 __r2 = vzip1q_f32(__t1,__t3);
   1839     simd_float4 __r3 = vzip2q_f32(__t1,__t3);
   1840 #else
   1841     simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0]};
   1842     simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1]};
   1843     simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2]};
   1844     simd_float4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3]};
   1845 #endif
   1846     return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz, __r3.xyz);
   1847 }
   1848 
   1849 static simd_float2x4 SIMD_CFUNC simd_transpose(simd_float4x2 __x) {
   1850     simd_float4 __x0, __x1, __x2, __x3;
   1851     __x0.xy = __x.columns[0];
   1852     __x1.xy = __x.columns[1];
   1853     __x2.xy = __x.columns[2];
   1854     __x3.xy = __x.columns[3];
   1855 #if defined __SSE__
   1856     simd_float4 __t0 = _mm_unpacklo_ps(__x0,__x2);
   1857     simd_float4 __t1 = _mm_unpacklo_ps(__x1,__x3);
   1858     simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t1);
   1859     simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t1);
   1860 #elif defined __ARM_NEON__ && defined __arm64__
   1861     simd_float4 __t0 = vzip1q_f32(__x0,__x2);
   1862     simd_float4 __t1 = vzip1q_f32(__x1,__x3);
   1863     simd_float4 __r0 = vzip1q_f32(__t0,__t1);
   1864     simd_float4 __r1 = vzip2q_f32(__t0,__t1);
   1865 #else
   1866     simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1867     simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1868 #endif
   1869     return simd_matrix(__r0,__r1);
   1870 }
   1871 
   1872 static simd_float3x4 SIMD_CFUNC simd_transpose(simd_float4x3 __x) {
   1873     simd_float4 __x0, __x1, __x2, __x3;
   1874     __x0.xyz = __x.columns[0];
   1875     __x1.xyz = __x.columns[1];
   1876     __x2.xyz = __x.columns[2];
   1877     __x3.xyz = __x.columns[3];
   1878 #if defined __SSE__
   1879     simd_float4 __t0 = _mm_unpacklo_ps(__x0,__x2);
   1880     simd_float4 __t1 = _mm_unpackhi_ps(__x0,__x2);
   1881     simd_float4 __t2 = _mm_unpacklo_ps(__x1,__x3);
   1882     simd_float4 __t3 = _mm_unpackhi_ps(__x1,__x3);
   1883     simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t2);
   1884     simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t2);
   1885     simd_float4 __r2 = _mm_unpacklo_ps(__t1,__t3);
   1886 #elif defined __ARM_NEON__ && defined __arm64__
   1887     simd_float4 __t0 = vzip1q_f32(__x0,__x2);
   1888     simd_float4 __t1 = vzip2q_f32(__x0,__x2);
   1889     simd_float4 __t2 = vzip1q_f32(__x1,__x3);
   1890     simd_float4 __t3 = vzip2q_f32(__x1,__x3);
   1891     simd_float4 __r0 = vzip1q_f32(__t0,__t2);
   1892     simd_float4 __r1 = vzip2q_f32(__t0,__t2);
   1893     simd_float4 __r2 = vzip1q_f32(__t1,__t3);
   1894 #else
   1895     simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1896     simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1897     simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]};
   1898 #endif
   1899     return simd_matrix(__r0,__r1,__r2);
   1900 }
   1901 
   1902 static simd_float4x4 SIMD_CFUNC simd_transpose(simd_float4x4 __x) {
   1903 #if defined __SSE__
   1904     simd_float4 __t0 = _mm_unpacklo_ps(__x.columns[0],__x.columns[2]);
   1905     simd_float4 __t1 = _mm_unpackhi_ps(__x.columns[0],__x.columns[2]);
   1906     simd_float4 __t2 = _mm_unpacklo_ps(__x.columns[1],__x.columns[3]);
   1907     simd_float4 __t3 = _mm_unpackhi_ps(__x.columns[1],__x.columns[3]);
   1908     simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t2);
   1909     simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t2);
   1910     simd_float4 __r2 = _mm_unpacklo_ps(__t1,__t3);
   1911     simd_float4 __r3 = _mm_unpackhi_ps(__t1,__t3);
   1912 #elif defined __ARM_NEON__ && defined __arm64__
   1913     simd_float4 __t0 = vzip1q_f32(__x.columns[0],__x.columns[2]);
   1914     simd_float4 __t1 = vzip2q_f32(__x.columns[0],__x.columns[2]);
   1915     simd_float4 __t2 = vzip1q_f32(__x.columns[1],__x.columns[3]);
   1916     simd_float4 __t3 = vzip2q_f32(__x.columns[1],__x.columns[3]);
   1917     simd_float4 __r0 = vzip1q_f32(__t0,__t2);
   1918     simd_float4 __r1 = vzip2q_f32(__t0,__t2);
   1919     simd_float4 __r2 = vzip1q_f32(__t1,__t3);
   1920     simd_float4 __r3 = vzip2q_f32(__t1,__t3);
   1921 #else
   1922     simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]};
   1923     simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]};
   1924     simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]};
   1925     simd_float4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3], __x.columns[3][3]};
   1926 #endif
   1927     return simd_matrix(__r0,__r1,__r2,__r3);
   1928 }
   1929 
   1930 static simd_double2x2 SIMD_CFUNC simd_transpose(simd_double2x2 __x) {
   1931     simd_double2 __x0, __x1;
   1932     __x0 = __x.columns[0];
   1933     __x1 = __x.columns[1];
   1934 #if defined __ARM_NEON__ && defined __arm64__
   1935     simd_double2 __r0 = vzip1q_f64(__x0, __x1);
   1936     simd_double2 __r1 = vzip2q_f64(__x0, __x1);
   1937 #else
   1938     simd_double2 __r0 = { __x0[0], __x1[0] };
   1939     simd_double2 __r1 = { __x0[1], __x1[1] };
   1940 #endif
   1941     return simd_matrix(__r0, __r1);
   1942 }
   1943 
   1944 static simd_double3x2 SIMD_CFUNC simd_transpose(simd_double2x3 __x) {
   1945     simd_double4 __x0, __x1;
   1946     __x0.xyz = __x.columns[0];
   1947     __x1.xyz = __x.columns[1];
   1948 #if defined __ARM_NEON__ && defined __arm64__
   1949     simd_double2 __r0 = vzip1q_f64(__x0.lo,__x1.lo);
   1950     simd_double2 __r1 = vzip2q_f64(__x0.lo,__x1.lo);
   1951     simd_double2 __r2 = vzip1q_f64(__x0.hi,__x1.hi);
   1952 #else
   1953     simd_double2 __r0 = {__x0[0], __x1[0]};
   1954     simd_double2 __r1 = {__x0[1], __x1[1]};
   1955     simd_double2 __r2 = {__x0[2], __x1[2]};
   1956 #endif
   1957     return simd_matrix(__r0,__r1,__r2);
   1958 }
   1959 
   1960 static simd_double4x2 SIMD_CFUNC simd_transpose(simd_double2x4 __x) {
   1961     simd_double4 __x0, __x1;
   1962     __x0 = __x.columns[0];
   1963     __x1 = __x.columns[1];
   1964 #if defined __ARM_NEON__ && defined __arm64__
   1965     simd_double2 __r0 = vzip1q_f64(__x0.lo,__x1.lo);
   1966     simd_double2 __r1 = vzip2q_f64(__x0.lo,__x1.lo);
   1967     simd_double2 __r2 = vzip1q_f64(__x0.hi,__x1.hi);
   1968     simd_double2 __r3 = vzip2q_f64(__x0.hi,__x1.hi);
   1969 #else
   1970     simd_double2 __r0 = {__x0[0], __x1[0]};
   1971     simd_double2 __r1 = {__x0[1], __x1[1]};
   1972     simd_double2 __r2 = {__x0[2], __x1[2]};
   1973     simd_double2 __r3 = {__x0[3], __x1[3]};
   1974 #endif
   1975     return simd_matrix(__r0,__r1,__r2,__r3);
   1976 }
   1977 
   1978 static simd_double2x3 SIMD_CFUNC simd_transpose(simd_double3x2 __x) {
   1979     simd_double2 __x0, __x1, __x2;
   1980     __x0 = __x.columns[0];
   1981     __x1 = __x.columns[1];
   1982     __x2 = __x.columns[2];
   1983 #if defined __ARM_NEON__ && defined __arm64__
   1984     simd_double2 padding = { 0 };
   1985     simd_double4 __r0,__r1;
   1986     __r0.lo = vzip1q_f64(__x0,__x1);
   1987     __r1.lo = vzip2q_f64(__x0,__x1);
   1988     __r0.hi = vzip1q_f64(__x2,padding);
   1989     __r1.hi = vzip2q_f64(__x2,padding);
   1990 #else
   1991     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]};
   1992     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]};
   1993 #endif
   1994     return simd_matrix(__r0.xyz,__r1.xyz);
   1995 }
   1996 
   1997 static simd_double3x3 SIMD_CFUNC simd_transpose(simd_double3x3 __x) {
   1998     simd_double4 __x0, __x1, __x2;
   1999     __x0.xyz = __x.columns[0];
   2000     __x1.xyz = __x.columns[1];
   2001     __x2.xyz = __x.columns[2];
   2002 #if defined __ARM_NEON__ && defined __arm64__
   2003     simd_double2 padding = { 0 };
   2004     simd_double4 __r0,__r1,__r2;
   2005     __r0.lo = vzip1q_f64(__x0.lo,__x1.lo);
   2006     __r1.lo = vzip2q_f64(__x0.lo,__x1.lo);
   2007     __r2.lo = vzip1q_f64(__x0.hi,__x1.hi);
   2008     __r0.hi = vzip1q_f64(__x2.lo,padding);
   2009     __r1.hi = vzip2q_f64(__x2.lo,padding);
   2010     __r2.hi = vzip1q_f64(__x2.hi,padding);
   2011 #else
   2012     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]};
   2013     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]};
   2014     simd_double4 __r2 = {__x0[2], __x1[2], __x2[2]};
   2015 #endif
   2016     return simd_matrix(__r0.xyz,__r1.xyz,__r2.xyz);
   2017 }
   2018 
   2019 static simd_double4x3 SIMD_CFUNC simd_transpose(simd_double3x4 __x) {
   2020     simd_double4 __x0, __x1, __x2;
   2021     __x0 = __x.columns[0];
   2022     __x1 = __x.columns[1];
   2023     __x2 = __x.columns[2];
   2024 #if defined __ARM_NEON__ && defined __arm64__
   2025     simd_double2 padding = { 0 };
   2026     simd_double4 __r0,__r1,__r2,__r3;
   2027     __r0.lo = vzip1q_f64(__x0.lo,__x1.lo);
   2028     __r1.lo = vzip2q_f64(__x0.lo,__x1.lo);
   2029     __r2.lo = vzip1q_f64(__x0.hi,__x1.hi);
   2030     __r3.lo = vzip2q_f64(__x0.hi,__x1.hi);
   2031     __r0.hi = vzip1q_f64(__x2.lo,padding);
   2032     __r1.hi = vzip2q_f64(__x2.lo,padding);
   2033     __r2.hi = vzip1q_f64(__x2.hi,padding);
   2034     __r3.hi = vzip2q_f64(__x2.hi,padding);
   2035 #else
   2036     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]};
   2037     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]};
   2038     simd_double4 __r2 = {__x0[2], __x1[2], __x2[2]};
   2039     simd_double4 __r3 = {__x0[3], __x1[3], __x2[3]};
   2040 #endif
   2041     return simd_matrix(__r0.xyz,__r1.xyz,__r2.xyz,__r3.xyz);
   2042 }
   2043 
   2044 static simd_double2x4 SIMD_CFUNC simd_transpose(simd_double4x2 __x) {
   2045     simd_double2 __x0, __x1, __x2, __x3;
   2046     __x0 = __x.columns[0];
   2047     __x1 = __x.columns[1];
   2048     __x2 = __x.columns[2];
   2049     __x3 = __x.columns[3];
   2050 #if defined __ARM_NEON__ && defined __arm64__
   2051     simd_double4 __r0,__r1;
   2052     __r0.lo = vzip1q_f64(__x0,__x1);
   2053     __r1.lo = vzip2q_f64(__x0,__x1);
   2054     __r0.hi = vzip1q_f64(__x2,__x3);
   2055     __r1.hi = vzip2q_f64(__x2,__x3);
   2056 #else
   2057     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]};
   2058     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]};
   2059 #endif
   2060     return simd_matrix(__r0,__r1);
   2061 }
   2062 
   2063 static simd_double3x4 SIMD_CFUNC simd_transpose(simd_double4x3 __x) {
   2064     simd_double4 __x0, __x1, __x2, __x3;
   2065     __x0.xyz = __x.columns[0];
   2066     __x1.xyz = __x.columns[1];
   2067     __x2.xyz = __x.columns[2];
   2068     __x3.xyz = __x.columns[3];
   2069 #if defined __ARM_NEON__ && defined __arm64__
   2070     simd_double4 __r0,__r1,__r2;
   2071     __r0.lo = vzip1q_f64(__x0.lo,__x1.lo);
   2072     __r1.lo = vzip2q_f64(__x0.lo,__x1.lo);
   2073     __r2.lo = vzip1q_f64(__x0.hi,__x1.hi);
   2074     __r0.hi = vzip1q_f64(__x2.lo,__x3.lo);
   2075     __r1.hi = vzip2q_f64(__x2.lo,__x3.lo);
   2076     __r2.hi = vzip1q_f64(__x2.hi,__x3.hi);
   2077 #else
   2078     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]};
   2079     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]};
   2080     simd_double4 __r2 = {__x0[2], __x1[2], __x2[2], __x3[2]};
   2081 #endif
   2082     return simd_matrix(__r0,__r1,__r2);
   2083 }
   2084 
   2085 static simd_double4x4 SIMD_CFUNC simd_transpose(simd_double4x4 __x) {
   2086     simd_double4 __x0, __x1, __x2, __x3;
   2087     __x0 = __x.columns[0];
   2088     __x1 = __x.columns[1];
   2089     __x2 = __x.columns[2];
   2090     __x3 = __x.columns[3];
   2091 #if defined __ARM_NEON__ && defined __arm64__
   2092     simd_double4 __r0,__r1,__r2,__r3;
   2093     __r0.lo = vzip1q_f64(__x0.lo,__x1.lo);
   2094     __r1.lo = vzip2q_f64(__x0.lo,__x1.lo);
   2095     __r2.lo = vzip1q_f64(__x0.hi,__x1.hi);
   2096     __r3.lo = vzip2q_f64(__x0.hi,__x1.hi);
   2097     __r0.hi = vzip1q_f64(__x2.lo,__x3.lo);
   2098     __r1.hi = vzip2q_f64(__x2.lo,__x3.lo);
   2099     __r2.hi = vzip1q_f64(__x2.hi,__x3.hi);
   2100     __r3.hi = vzip2q_f64(__x2.hi,__x3.hi);
   2101 #else
   2102     simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]};
   2103     simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]};
   2104     simd_double4 __r2 = {__x0[2], __x1[2], __x2[2], __x3[2]};
   2105     simd_double4 __r3 = {__x0[3], __x1[3], __x2[3], __x3[3]};
   2106 #endif
   2107     return simd_matrix(__r0,__r1,__r2,__r3);
   2108 }
   2109 
   2110 static  simd_half3  SIMD_CFUNC __rotate1(  simd_half3 __x) { return __builtin_shufflevector(__x,__x,1,2,0); }
   2111 static  simd_half3  SIMD_CFUNC __rotate2(  simd_half3 __x) { return __builtin_shufflevector(__x,__x,2,0,1); }
   2112 static  simd_half4  SIMD_CFUNC __rotate1(  simd_half4 __x) { return __builtin_shufflevector(__x,__x,1,2,3,0); }
   2113 static  simd_half4  SIMD_CFUNC __rotate2(  simd_half4 __x) { return __builtin_shufflevector(__x,__x,2,3,0,1); }
   2114 static  simd_half4  SIMD_CFUNC __rotate3(  simd_half4 __x) { return __builtin_shufflevector(__x,__x,3,0,1,2); }
   2115 static  simd_float3 SIMD_CFUNC __rotate1( simd_float3 __x) { return __builtin_shufflevector(__x,__x,1,2,0); }
   2116 static  simd_float3 SIMD_CFUNC __rotate2( simd_float3 __x) { return __builtin_shufflevector(__x,__x,2,0,1); }
   2117 static  simd_float4 SIMD_CFUNC __rotate1( simd_float4 __x) { return __builtin_shufflevector(__x,__x,1,2,3,0); }
   2118 static  simd_float4 SIMD_CFUNC __rotate2( simd_float4 __x) { return __builtin_shufflevector(__x,__x,2,3,0,1); }
   2119 static  simd_float4 SIMD_CFUNC __rotate3( simd_float4 __x) { return __builtin_shufflevector(__x,__x,3,0,1,2); }
   2120 static simd_double3 SIMD_CFUNC __rotate1(simd_double3 __x) { return __builtin_shufflevector(__x,__x,1,2,0); }
   2121 static simd_double3 SIMD_CFUNC __rotate2(simd_double3 __x) { return __builtin_shufflevector(__x,__x,2,0,1); }
   2122 static simd_double4 SIMD_CFUNC __rotate1(simd_double4 __x) { return __builtin_shufflevector(__x,__x,1,2,3,0); }
   2123 static simd_double4 SIMD_CFUNC __rotate2(simd_double4 __x) { return __builtin_shufflevector(__x,__x,2,3,0,1); }
   2124 static simd_double4 SIMD_CFUNC __rotate3(simd_double4 __x) { return __builtin_shufflevector(__x,__x,3,0,1,2); }
   2125 
   2126 static _Float16 SIMD_CFUNC simd_trace(simd_half2x2 __x) { return __x.columns[0][0] + __x.columns[1][1]; }
   2127 static  float SIMD_CFUNC simd_trace( simd_float2x2 __x) { return __x.columns[0][0] + __x.columns[1][1]; }
   2128 static double SIMD_CFUNC simd_trace(simd_double2x2 __x) { return __x.columns[0][0] + __x.columns[1][1]; }
   2129 static _Float16 SIMD_CFUNC simd_trace(simd_half3x3 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2]; }
   2130 static  float SIMD_CFUNC simd_trace( simd_float3x3 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2]; }
   2131 static double SIMD_CFUNC simd_trace(simd_double3x3 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2]; }
   2132 static _Float16 SIMD_CFUNC simd_trace(simd_half4x4 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2] + __x.columns[3][3]; }
   2133 static  float SIMD_CFUNC simd_trace( simd_float4x4 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2] + __x.columns[3][3]; }
   2134 static double SIMD_CFUNC simd_trace(simd_double4x4 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2] + __x.columns[3][3]; }
   2135 
   2136 static _Float16 SIMD_CFUNC simd_determinant(simd_half2x2 __x) { return __x.columns[0][0]*__x.columns[1][1] - __x.columns[0][1]*__x.columns[1][0]; }
   2137 static  float SIMD_CFUNC simd_determinant( simd_float2x2 __x) { return __x.columns[0][0]*__x.columns[1][1] - __x.columns[0][1]*__x.columns[1][0]; }
   2138 static double SIMD_CFUNC simd_determinant(simd_double2x2 __x) { return __x.columns[0][0]*__x.columns[1][1] - __x.columns[0][1]*__x.columns[1][0]; }
   2139 static _Float16 SIMD_CFUNC simd_determinant(simd_half3x3 __x) { return simd_reduce_add(__x.columns[0]*(__rotate1(__x.columns[1])*__rotate2(__x.columns[2]) - __rotate2(__x.columns[1])*__rotate1(__x.columns[2]))); }
   2140 static  float SIMD_CFUNC simd_determinant( simd_float3x3 __x) { return simd_reduce_add(__x.columns[0]*(__rotate1(__x.columns[1])*__rotate2(__x.columns[2]) - __rotate2(__x.columns[1])*__rotate1(__x.columns[2]))); }
   2141 static double SIMD_CFUNC simd_determinant(simd_double3x3 __x) { return simd_reduce_add(__x.columns[0]*(__rotate1(__x.columns[1])*__rotate2(__x.columns[2]) - __rotate2(__x.columns[1])*__rotate1(__x.columns[2]))); }
   2142 static _Float16 SIMD_CFUNC simd_determinant( simd_half4x4 __x) {
   2143     simd_half4 codet = __x.columns[0]*(__rotate1(__x.columns[1])*(__rotate2(__x.columns[2])*__rotate3(__x.columns[3])-__rotate3(__x.columns[2])*__rotate2(__x.columns[3])) +
   2144                                        __rotate2(__x.columns[1])*(__rotate3(__x.columns[2])*__rotate1(__x.columns[3])-__rotate1(__x.columns[2])*__rotate3(__x.columns[3])) +
   2145                                        __rotate3(__x.columns[1])*(__rotate1(__x.columns[2])*__rotate2(__x.columns[3])-__rotate2(__x.columns[2])*__rotate1(__x.columns[3])));
   2146     return simd_reduce_add(codet.even - codet.odd);
   2147 }
   2148 static  float SIMD_CFUNC simd_determinant( simd_float4x4 __x) {
   2149     simd_float4 codet = __x.columns[0]*(__rotate1(__x.columns[1])*(__rotate2(__x.columns[2])*__rotate3(__x.columns[3])-__rotate3(__x.columns[2])*__rotate2(__x.columns[3])) +
   2150                                           __rotate2(__x.columns[1])*(__rotate3(__x.columns[2])*__rotate1(__x.columns[3])-__rotate1(__x.columns[2])*__rotate3(__x.columns[3])) +
   2151                                           __rotate3(__x.columns[1])*(__rotate1(__x.columns[2])*__rotate2(__x.columns[3])-__rotate2(__x.columns[2])*__rotate1(__x.columns[3])));
   2152     return simd_reduce_add(codet.even - codet.odd);
   2153 }
   2154 static double SIMD_CFUNC simd_determinant(simd_double4x4 __x) {
   2155     simd_double4 codet = __x.columns[0]*(__rotate1(__x.columns[1])*(__rotate2(__x.columns[2])*__rotate3(__x.columns[3])-__rotate3(__x.columns[2])*__rotate2(__x.columns[3])) +
   2156                                            __rotate2(__x.columns[1])*(__rotate3(__x.columns[2])*__rotate1(__x.columns[3])-__rotate1(__x.columns[2])*__rotate3(__x.columns[3])) +
   2157                                            __rotate3(__x.columns[1])*(__rotate1(__x.columns[2])*__rotate2(__x.columns[3])-__rotate2(__x.columns[2])*__rotate1(__x.columns[3])));
   2158     return simd_reduce_add(codet.even - codet.odd);
   2159 }
   2160 
   2161 #if SIMD_LIBRARY_VERSION >= 6
   2162 static  simd_half2x2  SIMD_CFUNC simd_inverse(  simd_half2x2 __x) { return __invert_h2(__x); }
   2163 static  simd_half3x3  SIMD_CFUNC simd_inverse(  simd_half3x3 __x) { return __invert_h3(__x); }
   2164 static  simd_half4x4  SIMD_CFUNC simd_inverse(  simd_half4x4 __x) { return __invert_h4(__x); }
   2165 #endif /* SIMD_LIBRARY_VERSION */
   2166 static  simd_float2x2 SIMD_CFUNC simd_inverse( simd_float2x2 __x) { return __invert_f2(__x); }
   2167 static  simd_float3x3 SIMD_CFUNC simd_inverse( simd_float3x3 __x) { return __invert_f3(__x); }
   2168 static  simd_float4x4 SIMD_CFUNC simd_inverse( simd_float4x4 __x) { return __invert_f4(__x); }
   2169 static simd_double2x2 SIMD_CFUNC simd_inverse(simd_double2x2 __x) { return __invert_d2(__x); }
   2170 static simd_double3x3 SIMD_CFUNC simd_inverse(simd_double3x3 __x) { return __invert_d3(__x); }
   2171 static simd_double4x4 SIMD_CFUNC simd_inverse(simd_double4x4 __x) { return __invert_d4(__x); }
   2172 
   2173 static  simd_half2 SIMD_CFUNC simd_mul( simd_half2x2 __x,  simd_half2 __y) {  simd_half2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2174 static  simd_half3 SIMD_CFUNC simd_mul( simd_half2x3 __x,  simd_half2 __y) {  simd_half3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2175 static  simd_half4 SIMD_CFUNC simd_mul( simd_half2x4 __x,  simd_half2 __y) {  simd_half4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2176 static  simd_half2 SIMD_CFUNC simd_mul( simd_half3x2 __x,  simd_half3 __y) {  simd_half2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2177 static  simd_half3 SIMD_CFUNC simd_mul( simd_half3x3 __x,  simd_half3 __y) {  simd_half3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2178 static  simd_half4 SIMD_CFUNC simd_mul( simd_half3x4 __x,  simd_half3 __y) {  simd_half4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2179 static  simd_half2 SIMD_CFUNC simd_mul( simd_half4x2 __x,  simd_half4 __y) {  simd_half2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2180 static  simd_half3 SIMD_CFUNC simd_mul( simd_half4x3 __x,  simd_half4 __y) {  simd_half3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2181 static  simd_half4 SIMD_CFUNC simd_mul( simd_half4x4 __x,  simd_half4 __y) {  simd_half4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2182 static  simd_float2 SIMD_CFUNC simd_mul( simd_float2x2 __x,  simd_float2 __y) {  simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2183 static  simd_float3 SIMD_CFUNC simd_mul( simd_float2x3 __x,  simd_float2 __y) {  simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2184 static  simd_float4 SIMD_CFUNC simd_mul( simd_float2x4 __x,  simd_float2 __y) {  simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2185 static  simd_float2 SIMD_CFUNC simd_mul( simd_float3x2 __x,  simd_float3 __y) {  simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2186 static  simd_float3 SIMD_CFUNC simd_mul( simd_float3x3 __x,  simd_float3 __y) {  simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2187 static  simd_float4 SIMD_CFUNC simd_mul( simd_float3x4 __x,  simd_float3 __y) {  simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2188 static  simd_float2 SIMD_CFUNC simd_mul( simd_float4x2 __x,  simd_float4 __y) {  simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2189 static  simd_float3 SIMD_CFUNC simd_mul( simd_float4x3 __x,  simd_float4 __y) {  simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2190 static  simd_float4 SIMD_CFUNC simd_mul( simd_float4x4 __x,  simd_float4 __y) {  simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2191 static simd_double2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2192 static simd_double3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2193 static simd_double4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; }
   2194 static simd_double2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2195 static simd_double3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2196 static simd_double4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; }
   2197 static simd_double2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2198 static simd_double3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2199 static simd_double4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; }
   2200 
   2201 static   simd_half2 SIMD_CFUNC simd_mul(  simd_half2 __x,   simd_half2x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2202 static   simd_half3 SIMD_CFUNC simd_mul(  simd_half2 __x,   simd_half3x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2203 static   simd_half4 SIMD_CFUNC simd_mul(  simd_half2 __x,   simd_half4x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2204 static   simd_half2 SIMD_CFUNC simd_mul(  simd_half3 __x,   simd_half2x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2205 static   simd_half3 SIMD_CFUNC simd_mul(  simd_half3 __x,   simd_half3x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2206 static   simd_half4 SIMD_CFUNC simd_mul(  simd_half3 __x,   simd_half4x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2207 static   simd_half2 SIMD_CFUNC simd_mul(  simd_half4 __x,   simd_half2x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2208 static   simd_half3 SIMD_CFUNC simd_mul(  simd_half4 __x,   simd_half3x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2209 static   simd_half4 SIMD_CFUNC simd_mul(  simd_half4 __x,   simd_half4x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2210 static  simd_float2 SIMD_CFUNC simd_mul( simd_float2 __x,  simd_float2x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2211 static  simd_float3 SIMD_CFUNC simd_mul( simd_float2 __x,  simd_float3x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2212 static  simd_float4 SIMD_CFUNC simd_mul( simd_float2 __x,  simd_float4x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2213 static  simd_float2 SIMD_CFUNC simd_mul( simd_float3 __x,  simd_float2x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2214 static  simd_float3 SIMD_CFUNC simd_mul( simd_float3 __x,  simd_float3x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2215 static  simd_float4 SIMD_CFUNC simd_mul( simd_float3 __x,  simd_float4x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2216 static  simd_float2 SIMD_CFUNC simd_mul( simd_float4 __x,  simd_float2x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2217 static  simd_float3 SIMD_CFUNC simd_mul( simd_float4 __x,  simd_float3x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2218 static  simd_float4 SIMD_CFUNC simd_mul( simd_float4 __x,  simd_float4x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2219 static simd_double2 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double2x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2220 static simd_double3 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double3x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2221 static simd_double4 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double4x2 __y) { return simd_mul(simd_transpose(__y), __x); }
   2222 static simd_double2 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double2x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2223 static simd_double3 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double3x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2224 static simd_double4 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double4x3 __y) { return simd_mul(simd_transpose(__y), __x); }
   2225 static simd_double2 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double2x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2226 static simd_double3 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double3x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2227 static simd_double4 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double4x4 __y) { return simd_mul(simd_transpose(__y), __x); }
   2228 
   2229 static   simd_half2x2 SIMD_CFUNC simd_mul(  simd_half2x2 __x,   simd_half2x2 __y) {   simd_half2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2230 static  simd_float2x2 SIMD_CFUNC simd_mul( simd_float2x2 __x,  simd_float2x2 __y) {  simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2231 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2x2 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2232 static   simd_half2x3 SIMD_CFUNC simd_mul(  simd_half2x3 __x,   simd_half2x2 __y) {   simd_half2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2233 static  simd_float2x3 SIMD_CFUNC simd_mul( simd_float2x3 __x,  simd_float2x2 __y) {  simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2234 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2x2 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2235 static   simd_half2x4 SIMD_CFUNC simd_mul(  simd_half2x4 __x,   simd_half2x2 __y) {   simd_half2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2236 static  simd_float2x4 SIMD_CFUNC simd_mul( simd_float2x4 __x,  simd_float2x2 __y) {  simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2237 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2x2 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2238 static   simd_half2x2 SIMD_CFUNC simd_mul(  simd_half3x2 __x,   simd_half2x3 __y) {   simd_half2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2239 static  simd_float2x2 SIMD_CFUNC simd_mul( simd_float3x2 __x,  simd_float2x3 __y) {  simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2240 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double2x3 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2241 static   simd_half2x3 SIMD_CFUNC simd_mul(  simd_half3x3 __x,   simd_half2x3 __y) {   simd_half2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2242 static  simd_float2x3 SIMD_CFUNC simd_mul( simd_float3x3 __x,  simd_float2x3 __y) {  simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2243 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double2x3 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2244 static   simd_half2x4 SIMD_CFUNC simd_mul(  simd_half3x4 __x,   simd_half2x3 __y) {   simd_half2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2245 static  simd_float2x4 SIMD_CFUNC simd_mul( simd_float3x4 __x,  simd_float2x3 __y) {  simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2246 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double2x3 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2247 static   simd_half2x2 SIMD_CFUNC simd_mul(  simd_half4x2 __x,   simd_half2x4 __y) {   simd_half2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2248 static  simd_float2x2 SIMD_CFUNC simd_mul( simd_float4x2 __x,  simd_float2x4 __y) {  simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2249 static simd_double2x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double2x4 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2250 static   simd_half2x3 SIMD_CFUNC simd_mul(  simd_half4x3 __x,   simd_half2x4 __y) {   simd_half2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2251 static  simd_float2x3 SIMD_CFUNC simd_mul( simd_float4x3 __x,  simd_float2x4 __y) {  simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2252 static simd_double2x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double2x4 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2253 static   simd_half2x4 SIMD_CFUNC simd_mul(  simd_half4x4 __x,   simd_half2x4 __y) {   simd_half2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2254 static  simd_float2x4 SIMD_CFUNC simd_mul( simd_float4x4 __x,  simd_float2x4 __y) {  simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2255 static simd_double2x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double2x4 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2256 
   2257 static   simd_half3x2 SIMD_CFUNC simd_mul(  simd_half2x2 __x,   simd_half3x2 __y) {   simd_half3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2258 static  simd_float3x2 SIMD_CFUNC simd_mul( simd_float2x2 __x,  simd_float3x2 __y) {  simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2259 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double3x2 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2260 static   simd_half3x3 SIMD_CFUNC simd_mul(  simd_half2x3 __x,   simd_half3x2 __y) {   simd_half3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2261 static  simd_float3x3 SIMD_CFUNC simd_mul( simd_float2x3 __x,  simd_float3x2 __y) {  simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2262 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double3x2 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2263 static   simd_half3x4 SIMD_CFUNC simd_mul(  simd_half2x4 __x,   simd_half3x2 __y) {   simd_half3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2264 static  simd_float3x4 SIMD_CFUNC simd_mul( simd_float2x4 __x,  simd_float3x2 __y) {  simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2265 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double3x2 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2266 static   simd_half3x2 SIMD_CFUNC simd_mul(  simd_half3x2 __x,   simd_half3x3 __y) {   simd_half3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2267 static  simd_float3x2 SIMD_CFUNC simd_mul( simd_float3x2 __x,  simd_float3x3 __y) {  simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2268 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3x3 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2269 static   simd_half3x3 SIMD_CFUNC simd_mul(  simd_half3x3 __x,   simd_half3x3 __y) {   simd_half3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2270 static  simd_float3x3 SIMD_CFUNC simd_mul( simd_float3x3 __x,  simd_float3x3 __y) {  simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2271 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3x3 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2272 static   simd_half3x4 SIMD_CFUNC simd_mul(  simd_half3x4 __x,   simd_half3x3 __y) {   simd_half3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2273 static  simd_float3x4 SIMD_CFUNC simd_mul( simd_float3x4 __x,  simd_float3x3 __y) {  simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2274 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3x3 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2275 static   simd_half3x2 SIMD_CFUNC simd_mul(  simd_half4x2 __x,   simd_half3x4 __y) {   simd_half3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2276 static  simd_float3x2 SIMD_CFUNC simd_mul( simd_float4x2 __x,  simd_float3x4 __y) {  simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2277 static simd_double3x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double3x4 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2278 static   simd_half3x3 SIMD_CFUNC simd_mul(  simd_half4x3 __x,   simd_half3x4 __y) {   simd_half3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2279 static  simd_float3x3 SIMD_CFUNC simd_mul( simd_float4x3 __x,  simd_float3x4 __y) {  simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2280 static simd_double3x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double3x4 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2281 static   simd_half3x4 SIMD_CFUNC simd_mul(  simd_half4x4 __x,   simd_half3x4 __y) {   simd_half3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2282 static  simd_float3x4 SIMD_CFUNC simd_mul( simd_float4x4 __x,  simd_float3x4 __y) {  simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2283 static simd_double3x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double3x4 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2284 
   2285 static   simd_half4x2 SIMD_CFUNC simd_mul(  simd_half2x2 __x,   simd_half4x2 __y) {   simd_half4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2286 static  simd_float4x2 SIMD_CFUNC simd_mul( simd_float2x2 __x,  simd_float4x2 __y) {  simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2287 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double4x2 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2288 static   simd_half4x3 SIMD_CFUNC simd_mul(  simd_half2x3 __x,   simd_half4x2 __y) {   simd_half4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2289 static  simd_float4x3 SIMD_CFUNC simd_mul( simd_float2x3 __x,  simd_float4x2 __y) {  simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2290 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double4x2 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2291 static   simd_half4x4 SIMD_CFUNC simd_mul(  simd_half2x4 __x,   simd_half4x2 __y) {   simd_half4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2292 static  simd_float4x4 SIMD_CFUNC simd_mul( simd_float2x4 __x,  simd_float4x2 __y) {  simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2293 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double4x2 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2294 static   simd_half4x2 SIMD_CFUNC simd_mul(  simd_half3x2 __x,   simd_half4x3 __y) {   simd_half4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2295 static  simd_float4x2 SIMD_CFUNC simd_mul( simd_float3x2 __x,  simd_float4x3 __y) {  simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2296 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double4x3 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2297 static   simd_half4x3 SIMD_CFUNC simd_mul(  simd_half3x3 __x,   simd_half4x3 __y) {   simd_half4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2298 static  simd_float4x3 SIMD_CFUNC simd_mul( simd_float3x3 __x,  simd_float4x3 __y) {  simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2299 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double4x3 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2300 static   simd_half4x4 SIMD_CFUNC simd_mul(  simd_half3x4 __x,   simd_half4x3 __y) {   simd_half4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2301 static  simd_float4x4 SIMD_CFUNC simd_mul( simd_float3x4 __x,  simd_float4x3 __y) {  simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2302 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double4x3 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2303 static   simd_half4x2 SIMD_CFUNC simd_mul(  simd_half4x2 __x,   simd_half4x4 __y) {   simd_half4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2304 static  simd_float4x2 SIMD_CFUNC simd_mul( simd_float4x2 __x,  simd_float4x4 __y) {  simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2305 static simd_double4x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4x4 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2306 static   simd_half4x3 SIMD_CFUNC simd_mul(  simd_half4x3 __x,   simd_half4x4 __y) {   simd_half4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2307 static  simd_float4x3 SIMD_CFUNC simd_mul( simd_float4x3 __x,  simd_float4x4 __y) {  simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2308 static simd_double4x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4x4 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2309 static   simd_half4x4 SIMD_CFUNC simd_mul(  simd_half4x4 __x,   simd_half4x4 __y) {   simd_half4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2310 static  simd_float4x4 SIMD_CFUNC simd_mul( simd_float4x4 __x,  simd_float4x4 __y) {  simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2311 static simd_double4x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4x4 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; }
   2312   
   2313 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half2x2 __x,   simd_half2 __y) { return simd_mul(__x, __y); }
   2314 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half2x3 __x,   simd_half2 __y) { return simd_mul(__x, __y); }
   2315 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half2x4 __x,   simd_half2 __y) { return simd_mul(__x, __y); }
   2316 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half3x2 __x,   simd_half3 __y) { return simd_mul(__x, __y); }
   2317 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half3x3 __x,   simd_half3 __y) { return simd_mul(__x, __y); }
   2318 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half3x4 __x,   simd_half3 __y) { return simd_mul(__x, __y); }
   2319 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half4x2 __x,   simd_half4 __y) { return simd_mul(__x, __y); }
   2320 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half4x3 __x,   simd_half4 __y) { return simd_mul(__x, __y); }
   2321 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half4x4 __x,   simd_half4 __y) { return simd_mul(__x, __y); }
   2322 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x,  simd_float2 __y) { return simd_mul(__x, __y); }
   2323 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x,  simd_float2 __y) { return simd_mul(__x, __y); }
   2324 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x,  simd_float2 __y) { return simd_mul(__x, __y); }
   2325 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x,  simd_float3 __y) { return simd_mul(__x, __y); }
   2326 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x,  simd_float3 __y) { return simd_mul(__x, __y); }
   2327 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x,  simd_float3 __y) { return simd_mul(__x, __y); }
   2328 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x,  simd_float4 __y) { return simd_mul(__x, __y); }
   2329 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x,  simd_float4 __y) { return simd_mul(__x, __y); }
   2330 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x,  simd_float4 __y) { return simd_mul(__x, __y); }
   2331 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double2 __y) { return simd_mul(__x, __y); }
   2332 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double2 __y) { return simd_mul(__x, __y); }
   2333 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double2 __y) { return simd_mul(__x, __y); }
   2334 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double3 __y) { return simd_mul(__x, __y); }
   2335 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double3 __y) { return simd_mul(__x, __y); }
   2336 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double3 __y) { return simd_mul(__x, __y); }
   2337 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double4 __y) { return simd_mul(__x, __y); }
   2338 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double4 __y) { return simd_mul(__x, __y); }
   2339 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double4 __y) { return simd_mul(__x, __y); }
   2340   
   2341 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half2 __x,   simd_half2x2 __y) { return simd_mul(__x, __y); }
   2342 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half2 __x,   simd_half3x2 __y) { return simd_mul(__x, __y); }
   2343 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half2 __x,   simd_half4x2 __y) { return simd_mul(__x, __y); }
   2344 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half3 __x,   simd_half2x3 __y) { return simd_mul(__x, __y); }
   2345 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half3 __x,   simd_half3x3 __y) { return simd_mul(__x, __y); }
   2346 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half3 __x,   simd_half4x3 __y) { return simd_mul(__x, __y); }
   2347 static   simd_half2 SIMD_CFUNC matrix_multiply(  simd_half4 __x,   simd_half2x4 __y) { return simd_mul(__x, __y); }
   2348 static   simd_half3 SIMD_CFUNC matrix_multiply(  simd_half4 __x,   simd_half3x4 __y) { return simd_mul(__x, __y); }
   2349 static   simd_half4 SIMD_CFUNC matrix_multiply(  simd_half4 __x,   simd_half4x4 __y) { return simd_mul(__x, __y); }
   2350 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float2 __x,  simd_float2x2 __y) { return simd_mul(__x, __y); }
   2351 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float2 __x,  simd_float3x2 __y) { return simd_mul(__x, __y); }
   2352 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float2 __x,  simd_float4x2 __y) { return simd_mul(__x, __y); }
   2353 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float3 __x,  simd_float2x3 __y) { return simd_mul(__x, __y); }
   2354 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float3 __x,  simd_float3x3 __y) { return simd_mul(__x, __y); }
   2355 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float3 __x,  simd_float4x3 __y) { return simd_mul(__x, __y); }
   2356 static  simd_float2 SIMD_CFUNC matrix_multiply( simd_float4 __x,  simd_float2x4 __y) { return simd_mul(__x, __y); }
   2357 static  simd_float3 SIMD_CFUNC matrix_multiply( simd_float4 __x,  simd_float3x4 __y) { return simd_mul(__x, __y); }
   2358 static  simd_float4 SIMD_CFUNC matrix_multiply( simd_float4 __x,  simd_float4x4 __y) { return simd_mul(__x, __y); }
   2359 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double2x2 __y) { return simd_mul(__x, __y); }
   2360 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double3x2 __y) { return simd_mul(__x, __y); }
   2361 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double4x2 __y) { return simd_mul(__x, __y); }
   2362 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double2x3 __y) { return simd_mul(__x, __y); }
   2363 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double3x3 __y) { return simd_mul(__x, __y); }
   2364 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double4x3 __y) { return simd_mul(__x, __y); }
   2365 static simd_double2 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double2x4 __y) { return simd_mul(__x, __y); }
   2366 static simd_double3 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double3x4 __y) { return simd_mul(__x, __y); }
   2367 static simd_double4 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double4x4 __y) { return simd_mul(__x, __y); }
   2368 
   2369 static   simd_half2x2 SIMD_CFUNC matrix_multiply(  simd_half2x2 __x,   simd_half2x2 __y) { return simd_mul(__x, __y); }
   2370 static  simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x,  simd_float2x2 __y) { return simd_mul(__x, __y); }
   2371 static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double2x2 __y) { return simd_mul(__x, __y); }
   2372 static   simd_half2x3 SIMD_CFUNC matrix_multiply(  simd_half2x3 __x,   simd_half2x2 __y) { return simd_mul(__x, __y); }
   2373 static  simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x,  simd_float2x2 __y) { return simd_mul(__x, __y); }
   2374 static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double2x2 __y) { return simd_mul(__x, __y); }
   2375 static   simd_half2x4 SIMD_CFUNC matrix_multiply(  simd_half2x4 __x,   simd_half2x2 __y) { return simd_mul(__x, __y); }
   2376 static  simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x,  simd_float2x2 __y) { return simd_mul(__x, __y); }
   2377 static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double2x2 __y) { return simd_mul(__x, __y); }
   2378 static   simd_half2x2 SIMD_CFUNC matrix_multiply(  simd_half3x2 __x,   simd_half2x3 __y) { return simd_mul(__x, __y); }
   2379 static  simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x,  simd_float2x3 __y) { return simd_mul(__x, __y); }
   2380 static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double2x3 __y) { return simd_mul(__x, __y); }
   2381 static   simd_half2x3 SIMD_CFUNC matrix_multiply(  simd_half3x3 __x,   simd_half2x3 __y) { return simd_mul(__x, __y); }
   2382 static  simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x,  simd_float2x3 __y) { return simd_mul(__x, __y); }
   2383 static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double2x3 __y) { return simd_mul(__x, __y); }
   2384 static   simd_half2x4 SIMD_CFUNC matrix_multiply(  simd_half3x4 __x,   simd_half2x3 __y) { return simd_mul(__x, __y); }
   2385 static  simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x,  simd_float2x3 __y) { return simd_mul(__x, __y); }
   2386 static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double2x3 __y) { return simd_mul(__x, __y); }
   2387 static   simd_half2x2 SIMD_CFUNC matrix_multiply(  simd_half4x2 __x,   simd_half2x4 __y) { return simd_mul(__x, __y); }
   2388 static  simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x,  simd_float2x4 __y) { return simd_mul(__x, __y); }
   2389 static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double2x4 __y) { return simd_mul(__x, __y); }
   2390 static   simd_half2x3 SIMD_CFUNC matrix_multiply(  simd_half4x3 __x,   simd_half2x4 __y) { return simd_mul(__x, __y); }
   2391 static  simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x,  simd_float2x4 __y) { return simd_mul(__x, __y); }
   2392 static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double2x4 __y) { return simd_mul(__x, __y); }
   2393 static   simd_half2x4 SIMD_CFUNC matrix_multiply(  simd_half4x4 __x,   simd_half2x4 __y) { return simd_mul(__x, __y); }
   2394 static  simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x,  simd_float2x4 __y) { return simd_mul(__x, __y); }
   2395 static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double2x4 __y) { return simd_mul(__x, __y); }
   2396   
   2397 static   simd_half3x2 SIMD_CFUNC matrix_multiply(  simd_half2x2 __x,   simd_half3x2 __y) { return simd_mul(__x, __y); }
   2398 static  simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x,  simd_float3x2 __y) { return simd_mul(__x, __y); }
   2399 static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double3x2 __y) { return simd_mul(__x, __y); }
   2400 static   simd_half3x3 SIMD_CFUNC matrix_multiply(  simd_half2x3 __x,   simd_half3x2 __y) { return simd_mul(__x, __y); }
   2401 static  simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x,  simd_float3x2 __y) { return simd_mul(__x, __y); }
   2402 static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double3x2 __y) { return simd_mul(__x, __y); }
   2403 static   simd_half3x4 SIMD_CFUNC matrix_multiply(  simd_half2x4 __x,   simd_half3x2 __y) { return simd_mul(__x, __y); }
   2404 static  simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x,  simd_float3x2 __y) { return simd_mul(__x, __y); }
   2405 static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double3x2 __y) { return simd_mul(__x, __y); }
   2406 static   simd_half3x2 SIMD_CFUNC matrix_multiply(  simd_half3x2 __x,   simd_half3x3 __y) { return simd_mul(__x, __y); }
   2407 static  simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x,  simd_float3x3 __y) { return simd_mul(__x, __y); }
   2408 static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double3x3 __y) { return simd_mul(__x, __y); }
   2409 static   simd_half3x3 SIMD_CFUNC matrix_multiply(  simd_half3x3 __x,   simd_half3x3 __y) { return simd_mul(__x, __y); }
   2410 static  simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x,  simd_float3x3 __y) { return simd_mul(__x, __y); }
   2411 static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double3x3 __y) { return simd_mul(__x, __y); }
   2412 static   simd_half3x4 SIMD_CFUNC matrix_multiply(  simd_half3x4 __x,   simd_half3x3 __y) { return simd_mul(__x, __y); }
   2413 static  simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x,  simd_float3x3 __y) { return simd_mul(__x, __y); }
   2414 static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double3x3 __y) { return simd_mul(__x, __y); }
   2415 static   simd_half3x2 SIMD_CFUNC matrix_multiply(  simd_half4x2 __x,   simd_half3x4 __y) { return simd_mul(__x, __y); }
   2416 static  simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x,  simd_float3x4 __y) { return simd_mul(__x, __y); }
   2417 static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double3x4 __y) { return simd_mul(__x, __y); }
   2418 static   simd_half3x3 SIMD_CFUNC matrix_multiply(  simd_half4x3 __x,   simd_half3x4 __y) { return simd_mul(__x, __y); }
   2419 static  simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x,  simd_float3x4 __y) { return simd_mul(__x, __y); }
   2420 static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double3x4 __y) { return simd_mul(__x, __y); }
   2421 static   simd_half3x4 SIMD_CFUNC matrix_multiply(  simd_half4x4 __x,   simd_half3x4 __y) { return simd_mul(__x, __y); }
   2422 static  simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x,  simd_float3x4 __y) { return simd_mul(__x, __y); }
   2423 static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double3x4 __y) { return simd_mul(__x, __y); }
   2424   
   2425 static   simd_half4x2 SIMD_CFUNC matrix_multiply(  simd_half2x2 __x,   simd_half4x2 __y) { return simd_mul(__x, __y); }
   2426 static  simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x,  simd_float4x2 __y) { return simd_mul(__x, __y); }
   2427 static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double4x2 __y) { return simd_mul(__x, __y); }
   2428 static   simd_half4x3 SIMD_CFUNC matrix_multiply(  simd_half2x3 __x,   simd_half4x2 __y) { return simd_mul(__x, __y); }
   2429 static  simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x,  simd_float4x2 __y) { return simd_mul(__x, __y); }
   2430 static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double4x2 __y) { return simd_mul(__x, __y); }
   2431 static   simd_half4x4 SIMD_CFUNC matrix_multiply(  simd_half2x4 __x,   simd_half4x2 __y) { return simd_mul(__x, __y); }
   2432 static  simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x,  simd_float4x2 __y) { return simd_mul(__x, __y); }
   2433 static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double4x2 __y) { return simd_mul(__x, __y); }
   2434 static   simd_half4x2 SIMD_CFUNC matrix_multiply(  simd_half3x2 __x,   simd_half4x3 __y) { return simd_mul(__x, __y); }
   2435 static  simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x,  simd_float4x3 __y) { return simd_mul(__x, __y); }
   2436 static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double4x3 __y) { return simd_mul(__x, __y); }
   2437 static   simd_half4x3 SIMD_CFUNC matrix_multiply(  simd_half3x3 __x,   simd_half4x3 __y) { return simd_mul(__x, __y); }
   2438 static  simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x,  simd_float4x3 __y) { return simd_mul(__x, __y); }
   2439 static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double4x3 __y) { return simd_mul(__x, __y); }
   2440 static   simd_half4x4 SIMD_CFUNC matrix_multiply(  simd_half3x4 __x,   simd_half4x3 __y) { return simd_mul(__x, __y); }
   2441 static  simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x,  simd_float4x3 __y) { return simd_mul(__x, __y); }
   2442 static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double4x3 __y) { return simd_mul(__x, __y); }
   2443 static   simd_half4x2 SIMD_CFUNC matrix_multiply(  simd_half4x2 __x,   simd_half4x4 __y) { return simd_mul(__x, __y); }
   2444 static  simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x,  simd_float4x4 __y) { return simd_mul(__x, __y); }
   2445 static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double4x4 __y) { return simd_mul(__x, __y); }
   2446 static   simd_half4x3 SIMD_CFUNC matrix_multiply(  simd_half4x3 __x,   simd_half4x4 __y) { return simd_mul(__x, __y); }
   2447 static  simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x,  simd_float4x4 __y) { return simd_mul(__x, __y); }
   2448 static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double4x4 __y) { return simd_mul(__x, __y); }
   2449 static   simd_half4x4 SIMD_CFUNC matrix_multiply(  simd_half4x4 __x,   simd_half4x4 __y) { return simd_mul(__x, __y); }
   2450 static  simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x,  simd_float4x4 __y) { return simd_mul(__x, __y); }
   2451 static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double4x4 __y) { return simd_mul(__x, __y); }
   2452 
   2453 static simd_bool SIMD_CFUNC simd_equal(simd_half2x2 __x, simd_half2x2 __y) {
   2454     return simd_all((__x.columns[0] == __y.columns[0]) &
   2455                       (__x.columns[1] == __y.columns[1]));
   2456 }
   2457 static simd_bool SIMD_CFUNC simd_equal(simd_half2x3 __x, simd_half2x3 __y) {
   2458     return simd_all((__x.columns[0] == __y.columns[0]) &
   2459                       (__x.columns[1] == __y.columns[1]));
   2460 }
   2461 static simd_bool SIMD_CFUNC simd_equal(simd_half2x4 __x, simd_half2x4 __y) {
   2462     return simd_all((__x.columns[0] == __y.columns[0]) &
   2463                       (__x.columns[1] == __y.columns[1]));
   2464 }
   2465 static simd_bool SIMD_CFUNC simd_equal(simd_half3x2 __x, simd_half3x2 __y) {
   2466     return simd_all((__x.columns[0] == __y.columns[0]) &
   2467                       (__x.columns[1] == __y.columns[1]) &
   2468                       (__x.columns[2] == __y.columns[2]));
   2469 }
   2470 static simd_bool SIMD_CFUNC simd_equal(simd_half3x3 __x, simd_half3x3 __y) {
   2471     return simd_all((__x.columns[0] == __y.columns[0]) &
   2472                       (__x.columns[1] == __y.columns[1]) &
   2473                       (__x.columns[2] == __y.columns[2]));
   2474 }
   2475 static simd_bool SIMD_CFUNC simd_equal(simd_half3x4 __x, simd_half3x4 __y) {
   2476     return simd_all((__x.columns[0] == __y.columns[0]) &
   2477                       (__x.columns[1] == __y.columns[1]) &
   2478                       (__x.columns[2] == __y.columns[2]));
   2479 }
   2480 static simd_bool SIMD_CFUNC simd_equal(simd_half4x2 __x, simd_half4x2 __y) {
   2481     return simd_all((__x.columns[0] == __y.columns[0]) &
   2482                       (__x.columns[1] == __y.columns[1]) &
   2483                       (__x.columns[2] == __y.columns[2]) &
   2484                       (__x.columns[3] == __y.columns[3]));
   2485 }
   2486 static simd_bool SIMD_CFUNC simd_equal(simd_half4x3 __x, simd_half4x3 __y) {
   2487     return simd_all((__x.columns[0] == __y.columns[0]) &
   2488                       (__x.columns[1] == __y.columns[1]) &
   2489                       (__x.columns[2] == __y.columns[2]) &
   2490                       (__x.columns[3] == __y.columns[3]));
   2491 }
   2492 static simd_bool SIMD_CFUNC simd_equal(simd_half4x4 __x, simd_half4x4 __y) {
   2493     return simd_all((__x.columns[0] == __y.columns[0]) &
   2494                       (__x.columns[1] == __y.columns[1]) &
   2495                       (__x.columns[2] == __y.columns[2]) &
   2496                       (__x.columns[3] == __y.columns[3]));
   2497 }
   2498 static simd_bool SIMD_CFUNC simd_equal(simd_float2x2 __x, simd_float2x2 __y) {
   2499     return simd_all((__x.columns[0] == __y.columns[0]) &
   2500                       (__x.columns[1] == __y.columns[1]));
   2501 }
   2502 static simd_bool SIMD_CFUNC simd_equal(simd_float2x3 __x, simd_float2x3 __y) {
   2503     return simd_all((__x.columns[0] == __y.columns[0]) &
   2504                       (__x.columns[1] == __y.columns[1]));
   2505 }
   2506 static simd_bool SIMD_CFUNC simd_equal(simd_float2x4 __x, simd_float2x4 __y) {
   2507     return simd_all((__x.columns[0] == __y.columns[0]) &
   2508                       (__x.columns[1] == __y.columns[1]));
   2509 }
   2510 static simd_bool SIMD_CFUNC simd_equal(simd_float3x2 __x, simd_float3x2 __y) {
   2511     return simd_all((__x.columns[0] == __y.columns[0]) &
   2512                       (__x.columns[1] == __y.columns[1]) &
   2513                       (__x.columns[2] == __y.columns[2]));
   2514 }
   2515 static simd_bool SIMD_CFUNC simd_equal(simd_float3x3 __x, simd_float3x3 __y) {
   2516     return simd_all((__x.columns[0] == __y.columns[0]) &
   2517                       (__x.columns[1] == __y.columns[1]) &
   2518                       (__x.columns[2] == __y.columns[2]));
   2519 }
   2520 static simd_bool SIMD_CFUNC simd_equal(simd_float3x4 __x, simd_float3x4 __y) {
   2521     return simd_all((__x.columns[0] == __y.columns[0]) &
   2522                       (__x.columns[1] == __y.columns[1]) &
   2523                       (__x.columns[2] == __y.columns[2]));
   2524 }
   2525 static simd_bool SIMD_CFUNC simd_equal(simd_float4x2 __x, simd_float4x2 __y) {
   2526     return simd_all((__x.columns[0] == __y.columns[0]) &
   2527                       (__x.columns[1] == __y.columns[1]) &
   2528                       (__x.columns[2] == __y.columns[2]) &
   2529                       (__x.columns[3] == __y.columns[3]));
   2530 }
   2531 static simd_bool SIMD_CFUNC simd_equal(simd_float4x3 __x, simd_float4x3 __y) {
   2532     return simd_all((__x.columns[0] == __y.columns[0]) &
   2533                       (__x.columns[1] == __y.columns[1]) &
   2534                       (__x.columns[2] == __y.columns[2]) &
   2535                       (__x.columns[3] == __y.columns[3]));
   2536 }
   2537 static simd_bool SIMD_CFUNC simd_equal(simd_float4x4 __x, simd_float4x4 __y) {
   2538     return simd_all((__x.columns[0] == __y.columns[0]) &
   2539                       (__x.columns[1] == __y.columns[1]) &
   2540                       (__x.columns[2] == __y.columns[2]) &
   2541                       (__x.columns[3] == __y.columns[3]));
   2542 }
   2543 static simd_bool SIMD_CFUNC simd_equal(simd_double2x2 __x, simd_double2x2 __y) {
   2544     return simd_all((__x.columns[0] == __y.columns[0]) &
   2545                       (__x.columns[1] == __y.columns[1]));
   2546 }
   2547 static simd_bool SIMD_CFUNC simd_equal(simd_double2x3 __x, simd_double2x3 __y) {
   2548     return simd_all((__x.columns[0] == __y.columns[0]) &
   2549                       (__x.columns[1] == __y.columns[1]));
   2550 }
   2551 static simd_bool SIMD_CFUNC simd_equal(simd_double2x4 __x, simd_double2x4 __y) {
   2552     return simd_all((__x.columns[0] == __y.columns[0]) &
   2553                       (__x.columns[1] == __y.columns[1]));
   2554 }
   2555 static simd_bool SIMD_CFUNC simd_equal(simd_double3x2 __x, simd_double3x2 __y) {
   2556     return simd_all((__x.columns[0] == __y.columns[0]) &
   2557                       (__x.columns[1] == __y.columns[1]) &
   2558                       (__x.columns[2] == __y.columns[2]));
   2559 }
   2560 static simd_bool SIMD_CFUNC simd_equal(simd_double3x3 __x, simd_double3x3 __y) {
   2561     return simd_all((__x.columns[0] == __y.columns[0]) &
   2562                       (__x.columns[1] == __y.columns[1]) &
   2563                       (__x.columns[2] == __y.columns[2]));
   2564 }
   2565 static simd_bool SIMD_CFUNC simd_equal(simd_double3x4 __x, simd_double3x4 __y) {
   2566     return simd_all((__x.columns[0] == __y.columns[0]) &
   2567                       (__x.columns[1] == __y.columns[1]) &
   2568                       (__x.columns[2] == __y.columns[2]));
   2569 }
   2570 static simd_bool SIMD_CFUNC simd_equal(simd_double4x2 __x, simd_double4x2 __y) {
   2571     return simd_all((__x.columns[0] == __y.columns[0]) &
   2572                       (__x.columns[1] == __y.columns[1]) &
   2573                       (__x.columns[2] == __y.columns[2]) &
   2574                       (__x.columns[3] == __y.columns[3]));
   2575 }
   2576 static simd_bool SIMD_CFUNC simd_equal(simd_double4x3 __x, simd_double4x3 __y) {
   2577     return simd_all((__x.columns[0] == __y.columns[0]) &
   2578                       (__x.columns[1] == __y.columns[1]) &
   2579                       (__x.columns[2] == __y.columns[2]) &
   2580                       (__x.columns[3] == __y.columns[3]));
   2581 }
   2582 static simd_bool SIMD_CFUNC simd_equal(simd_double4x4 __x, simd_double4x4 __y) {
   2583     return simd_all((__x.columns[0] == __y.columns[0]) &
   2584                       (__x.columns[1] == __y.columns[1]) &
   2585                       (__x.columns[2] == __y.columns[2]) &
   2586                       (__x.columns[3] == __y.columns[3]));
   2587 }
   2588 
   2589 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x2 __x, simd_half2x2 __y, _Float16 __tol) {
   2590     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2591                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2592 }
   2593 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x3 __x, simd_half2x3 __y, _Float16 __tol) {
   2594     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2595                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2596 }
   2597 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half2x4 __x, simd_half2x4 __y, _Float16 __tol) {
   2598     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2599                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2600 }
   2601 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x2 __x, simd_half3x2 __y, _Float16 __tol) {
   2602     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2603                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2604                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2605 }
   2606 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x3 __x, simd_half3x3 __y, _Float16 __tol) {
   2607     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2608                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2609                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2610 }
   2611 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half3x4 __x, simd_half3x4 __y, _Float16 __tol) {
   2612     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2613                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2614                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2615 }
   2616 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x2 __x, simd_half4x2 __y, _Float16 __tol) {
   2617     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2618                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2619                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2620                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2621 }
   2622 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x3 __x, simd_half4x3 __y, _Float16 __tol) {
   2623     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2624                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2625                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2626                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2627 }
   2628 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_half4x4 __x, simd_half4x4 __y, _Float16 __tol) {
   2629     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2630                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2631                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2632                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2633 }
   2634 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x2 __x, simd_float2x2 __y, float __tol) {
   2635     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2636                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2637 }
   2638 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x3 __x, simd_float2x3 __y, float __tol) {
   2639     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2640                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2641 }
   2642 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x4 __x, simd_float2x4 __y, float __tol) {
   2643     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2644                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2645 }
   2646 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x2 __x, simd_float3x2 __y, float __tol) {
   2647     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2648                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2649                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2650 }
   2651 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x3 __x, simd_float3x3 __y, float __tol) {
   2652     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2653                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2654                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2655 }
   2656 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x4 __x, simd_float3x4 __y, float __tol) {
   2657     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2658                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2659                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2660 }
   2661 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x2 __x, simd_float4x2 __y, float __tol) {
   2662     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2663                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2664                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2665                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2666 }
   2667 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x3 __x, simd_float4x3 __y, float __tol) {
   2668     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2669                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2670                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2671                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2672 }
   2673 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x4 __x, simd_float4x4 __y, float __tol) {
   2674     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2675                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2676                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2677                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2678 }
   2679 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x2 __x, simd_double2x2 __y, double __tol) {
   2680     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2681                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2682 }
   2683 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x3 __x, simd_double2x3 __y, double __tol) {
   2684     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2685                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2686 }
   2687 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x4 __x, simd_double2x4 __y, double __tol) {
   2688     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2689                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol));
   2690 }
   2691 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x2 __x, simd_double3x2 __y, double __tol) {
   2692     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2693                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2694                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2695 }
   2696 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x3 __x, simd_double3x3 __y, double __tol) {
   2697     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2698                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2699                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2700 }
   2701 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x4 __x, simd_double3x4 __y, double __tol) {
   2702     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2703                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2704                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol));
   2705 }
   2706 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x2 __x, simd_double4x2 __y, double __tol) {
   2707     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2708                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2709                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2710                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2711 }
   2712 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x3 __x, simd_double4x3 __y, double __tol) {
   2713     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2714                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2715                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2716                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2717 }
   2718 static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x4 __x, simd_double4x4 __y, double __tol) {
   2719     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) &
   2720                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) &
   2721                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) &
   2722                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol));
   2723 }
   2724 
   2725 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x2 __x, simd_half2x2 __y, _Float16 __tol) {
   2726     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2727                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2728 }
   2729 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x3 __x, simd_half2x3 __y, _Float16 __tol) {
   2730     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2731                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2732 }
   2733 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half2x4 __x, simd_half2x4 __y, _Float16 __tol) {
   2734     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2735                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2736 }
   2737 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x2 __x, simd_half3x2 __y, _Float16 __tol) {
   2738     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2739                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2740                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2741 }
   2742 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x3 __x, simd_half3x3 __y, _Float16 __tol) {
   2743     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2744                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2745                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2746 }
   2747 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half3x4 __x, simd_half3x4 __y, _Float16 __tol) {
   2748     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2749                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2750                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2751 }
   2752 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x2 __x, simd_half4x2 __y, _Float16 __tol) {
   2753     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2754                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2755                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2756                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2757 }
   2758 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x3 __x, simd_half4x3 __y, _Float16 __tol) {
   2759     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2760                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2761                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2762                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2763 }
   2764 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_half4x4 __x, simd_half4x4 __y, _Float16 __tol) {
   2765     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2766                     (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2767                     (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2768                     (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2769 }
   2770 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x2 __x, simd_float2x2 __y, float __tol) {
   2771     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2772                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2773 }
   2774 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x3 __x, simd_float2x3 __y, float __tol) {
   2775     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2776                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2777 }
   2778 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x4 __x, simd_float2x4 __y, float __tol) {
   2779     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2780                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2781 }
   2782 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x2 __x, simd_float3x2 __y, float __tol) {
   2783     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2784                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2785                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2786 }
   2787 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x3 __x, simd_float3x3 __y, float __tol) {
   2788     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2789                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2790                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2791 }
   2792 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x4 __x, simd_float3x4 __y, float __tol) {
   2793     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2794                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2795                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2796 }
   2797 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x2 __x, simd_float4x2 __y, float __tol) {
   2798     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2799                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2800                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2801                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2802 }
   2803 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x3 __x, simd_float4x3 __y, float __tol) {
   2804     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2805                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2806                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2807                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2808 }
   2809 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x4 __x, simd_float4x4 __y, float __tol) {
   2810     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2811                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2812                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2813                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2814 }
   2815 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x2 __x, simd_double2x2 __y, double __tol) {
   2816     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2817                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2818 }
   2819 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x3 __x, simd_double2x3 __y, double __tol) {
   2820     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2821                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2822 }
   2823 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x4 __x, simd_double2x4 __y, double __tol) {
   2824     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2825                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])));
   2826 }
   2827 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x2 __x, simd_double3x2 __y, double __tol) {
   2828     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2829                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2830                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2831 }
   2832 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x3 __x, simd_double3x3 __y, double __tol) {
   2833     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2834                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2835                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2836 }
   2837 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x4 __x, simd_double3x4 __y, double __tol) {
   2838     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2839                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2840                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])));
   2841 }
   2842 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x2 __x, simd_double4x2 __y, double __tol) {
   2843     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2844                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2845                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2846                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2847 }
   2848 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x3 __x, simd_double4x3 __y, double __tol) {
   2849     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2850                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2851                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2852                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2853 }
   2854 static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x4 __x, simd_double4x4 __y, double __tol) {
   2855     return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) &
   2856                       (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) &
   2857                       (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) &
   2858                       (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3])));
   2859 }
   2860     
   2861 #ifdef __cplusplus
   2862 }
   2863 #endif
   2864 #endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */
   2865 #endif /* __SIMD_HEADER__ */