diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake index b4bd80773d..3ce52df893 100644 --- a/cmake/Findclang.cmake +++ b/cmake/Findclang.cmake @@ -9,27 +9,27 @@ find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h PATHS - /usr/lib/llvm/11/include - /usr/lib/llvm-11/include - /usr/lib/llvm-11.0/include - /usr/local/llvm110/include - /usr/local/llvm11/include + /usr/lib/llvm/12/include + /usr/lib/llvm-12/include + /usr/lib/llvm-12.0/include + /usr/local/llvm120/include + /usr/local/llvm12/include /mingw64/include ) if(ZIG_PREFER_CLANG_CPP_DYLIB) find_library(CLANG_LIBRARIES NAMES - clang-cpp-11.0 - clang-cpp110 + clang-cpp-12.0 + clang-cpp120 clang-cpp PATHS ${CLANG_LIBDIRS} - /usr/lib/llvm/11/lib - /usr/lib/llvm/11/lib64 - /usr/lib/llvm-11/lib - /usr/local/llvm110/lib - /usr/local/llvm11/lib + /usr/lib/llvm/12/lib + /usr/lib/llvm/12/lib64 + /usr/lib/llvm-12/lib + /usr/local/llvm120/lib + /usr/local/llvm12/lib ) endif() @@ -39,11 +39,11 @@ if(NOT CLANG_LIBRARIES) find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_} PATHS ${CLANG_LIBDIRS} - /usr/lib/llvm/11/lib - /usr/lib/llvm-11/lib - /usr/lib/llvm-11.0/lib - /usr/local/llvm110/lib - /usr/local/llvm11/lib + /usr/lib/llvm/12/lib + /usr/lib/llvm-12/lib + /usr/lib/llvm-12.0/lib + /usr/local/llvm120/lib + /usr/local/llvm12/lib /mingw64/lib /c/msys64/mingw64/lib c:\\msys64\\mingw64\\lib diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake index 3103601ff8..72724ecd1e 100644 --- a/cmake/Findlld.cmake +++ b/cmake/Findlld.cmake @@ -8,16 +8,16 @@ find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h PATHS - /usr/lib/llvm-11/include - /usr/local/llvm110/include - /usr/local/llvm11/include + /usr/lib/llvm-12/include + /usr/local/llvm120/include + /usr/local/llvm12/include /mingw64/include) -find_library(LLD_LIBRARY NAMES lld-11.0 lld110 lld +find_library(LLD_LIBRARY NAMES lld-12.0 lld120 lld PATHS - /usr/lib/llvm-11/lib - /usr/local/llvm110/lib - /usr/local/llvm11/lib + /usr/lib/llvm-12/lib + /usr/local/llvm120/lib + /usr/local/llvm12/lib ) if(EXISTS ${LLD_LIBRARY}) set(LLD_LIBRARIES ${LLD_LIBRARY}) @@ -27,9 +27,9 @@ else() find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_} PATHS ${LLD_LIBDIRS} - /usr/lib/llvm-11/lib - /usr/local/llvm110/lib - /usr/local/llvm11/lib + /usr/lib/llvm-12/lib + /usr/local/llvm120/lib + /usr/local/llvm12/lib /mingw64/lib /c/msys64/mingw64/lib c:/msys64/mingw64/lib) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 4984723ec2..bfde645cad 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -9,37 +9,37 @@ find_path(LLVM_INCLUDE_DIRS NAMES llvm/IR/IRBuilder.h PATHS - /usr/lib/llvm/11/include - /usr/lib/llvm-11/include - /usr/lib/llvm-11.0/include - /usr/local/llvm11/include - /usr/local/llvm110/include + /usr/lib/llvm/12/include + /usr/lib/llvm-12/include + /usr/lib/llvm-12.0/include + /usr/local/llvm12/include + /usr/local/llvm120/include /mingw64/include ) if(ZIG_PREFER_CLANG_CPP_DYLIB) find_library(LLVM_LIBRARIES NAMES - LLVM-11.0 - LLVM-11 - LLVM-110 + LLVM-12.0 + LLVM-12 + LLVM-120 LLVM PATHS ${LLVM_LIBDIRS} - /usr/lib/llvm/11/lib - /usr/lib/llvm/11/lib64 - /usr/lib/llvm-11/lib - /usr/local/llvm11/lib - /usr/local/llvm110/lib + /usr/lib/llvm/12/lib + /usr/lib/llvm/12/lib64 + /usr/lib/llvm-12/lib + /usr/local/llvm12/lib + /usr/local/llvm120/lib ) find_program(LLVM_CONFIG_EXE - NAMES llvm-config-11 llvm-config-11.0 llvm-config110 llvm-config11 llvm-config + NAMES llvm-config-12 llvm-config-12.0 llvm-config120 llvm-config12 llvm-config PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-11.0.0/bin") + "C:/Libraries/llvm-12.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") message(FATAL_ERROR "unable to find llvm-config") @@ -54,23 +54,23 @@ if(ZIG_PREFER_CLANG_CPP_DYLIB) OUTPUT_VARIABLE LLVM_CONFIG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - if("${LLVM_CONFIG_VERSION}" VERSION_LESS 11) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_LESS 12) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() - if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 12) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 13) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() - if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 12) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 13) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() elseif(ZIG_USE_LLVM_CONFIG) find_program(LLVM_CONFIG_EXE - NAMES llvm-config-11 llvm-config-11.0 llvm-config110 llvm-config11 llvm-config + NAMES llvm-config-12 llvm-config-12.0 llvm-config120 llvm-config12 llvm-config PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-11.0.0/bin") + "C:/Libraries/llvm-12.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") message(FATAL_ERROR "unable to find llvm-config") @@ -85,14 +85,14 @@ elseif(ZIG_USE_LLVM_CONFIG) OUTPUT_VARIABLE LLVM_CONFIG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) - if("${LLVM_CONFIG_VERSION}" VERSION_LESS 11) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_LESS 12) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() - if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 12) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 13) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() - if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 12) - message(FATAL_ERROR "expected LLVM 11.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") + if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 13) + message(FATAL_ERROR "expected LLVM 12.x but found ${LLVM_CONFIG_VERSION} using ${LLVM_CONFIG_EXE}") endif() execute_process( @@ -166,7 +166,7 @@ elseif(ZIG_USE_LLVM_CONFIG) set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS}) if(NOT LLVM_LIBRARIES) - find_library(LLVM_LIBRARIES NAMES LLVM LLVM-11 LLVM-11.0) + find_library(LLVM_LIBRARIES NAMES LLVM LLVM-12 LLVM-12.0) endif() link_directories("${CMAKE_PREFIX_PATH}/lib") @@ -180,11 +180,11 @@ else() find_library(LLVM_${_prettylibname_}_LIB NAMES ${_libname_} PATHS ${LLVM_LIBDIRS} - /usr/lib/llvm/11/lib - /usr/lib/llvm-11/lib - /usr/lib/llvm-11.0/lib - /usr/local/llvm110/lib - /usr/local/llvm11/lib + /usr/lib/llvm/12/lib + /usr/lib/llvm-12/lib + /usr/lib/llvm-12.0/lib + /usr/local/llvm120/lib + /usr/local/llvm12/lib /mingw64/lib /c/msys64/mingw64/lib c:\\msys64\\mingw64\\lib) @@ -194,78 +194,57 @@ else() # This list can be re-generated with `llvm-config --libfiles` and then # reformatting using your favorite text editor. Note we do not execute # `llvm-config` here because we are cross compiling. - FIND_AND_ADD_LLVM_LIB(LLVMXRay) FIND_AND_ADD_LLVM_LIB(LLVMWindowsManifest) - FIND_AND_ADD_LLVM_LIB(LLVMSymbolize) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoPDB) - FIND_AND_ADD_LLVM_LIB(LLVMOrcJIT) - FIND_AND_ADD_LLVM_LIB(LLVMOrcError) - FIND_AND_ADD_LLVM_LIB(LLVMJITLink) - FIND_AND_ADD_LLVM_LIB(LLVMObjectYAML) - FIND_AND_ADD_LLVM_LIB(LLVMMCA) - FIND_AND_ADD_LLVM_LIB(LLVMLTO) - FIND_AND_ADD_LLVM_LIB(LLVMPasses) - FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) - FIND_AND_ADD_LLVM_LIB(LLVMObjCARCOpts) - FIND_AND_ADD_LLVM_LIB(LLVMExtensions) - FIND_AND_ADD_LLVM_LIB(LLVMLineEditor) + FIND_AND_ADD_LLVM_LIB(LLVMXRay) FIND_AND_ADD_LLVM_LIB(LLVMLibDriver) - FIND_AND_ADD_LLVM_LIB(LLVMInterpreter) - FIND_AND_ADD_LLVM_LIB(LLVMFuzzMutate) - FIND_AND_ADD_LLVM_LIB(LLVMMCJIT) - FIND_AND_ADD_LLVM_LIB(LLVMExecutionEngine) - FIND_AND_ADD_LLVM_LIB(LLVMRuntimeDyld) - FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker) FIND_AND_ADD_LLVM_LIB(LLVMDlltoolDriver) - FIND_AND_ADD_LLVM_LIB(LLVMOption) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM) FIND_AND_ADD_LLVM_LIB(LLVMCoverage) + FIND_AND_ADD_LLVM_LIB(LLVMLineEditor) FIND_AND_ADD_LLVM_LIB(LLVMXCoreDisassembler) FIND_AND_ADD_LLVM_LIB(LLVMXCoreCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMXCoreDesc) FIND_AND_ADD_LLVM_LIB(LLVMXCoreInfo) FIND_AND_ADD_LLVM_LIB(LLVMX86Disassembler) - FIND_AND_ADD_LLVM_LIB(LLVMX86CodeGen) FIND_AND_ADD_LLVM_LIB(LLVMX86AsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMX86CodeGen) FIND_AND_ADD_LLVM_LIB(LLVMX86Desc) FIND_AND_ADD_LLVM_LIB(LLVMX86Info) FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyDisassembler) + FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyAsmParser) FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyDesc) - FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyAsmParser) FIND_AND_ADD_LLVM_LIB(LLVMWebAssemblyInfo) FIND_AND_ADD_LLVM_LIB(LLVMSystemZDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMSystemZCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMSystemZAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMSystemZCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMSystemZDesc) FIND_AND_ADD_LLVM_LIB(LLVMSystemZInfo) FIND_AND_ADD_LLVM_LIB(LLVMSparcDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMSparcCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMSparcAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMSparcCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMSparcDesc) FIND_AND_ADD_LLVM_LIB(LLVMSparcInfo) FIND_AND_ADD_LLVM_LIB(LLVMRISCVDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMRISCVCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMRISCVAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMRISCVCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMRISCVDesc) - FIND_AND_ADD_LLVM_LIB(LLVMRISCVUtils) FIND_AND_ADD_LLVM_LIB(LLVMRISCVInfo) FIND_AND_ADD_LLVM_LIB(LLVMPowerPCDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMPowerPCCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMPowerPCAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMPowerPCCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMPowerPCDesc) FIND_AND_ADD_LLVM_LIB(LLVMPowerPCInfo) FIND_AND_ADD_LLVM_LIB(LLVMNVPTXCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMNVPTXDesc) FIND_AND_ADD_LLVM_LIB(LLVMNVPTXInfo) FIND_AND_ADD_LLVM_LIB(LLVMMSP430Disassembler) - FIND_AND_ADD_LLVM_LIB(LLVMMSP430CodeGen) FIND_AND_ADD_LLVM_LIB(LLVMMSP430AsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMMSP430CodeGen) FIND_AND_ADD_LLVM_LIB(LLVMMSP430Desc) FIND_AND_ADD_LLVM_LIB(LLVMMSP430Info) FIND_AND_ADD_LLVM_LIB(LLVMMipsDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMMipsCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMMipsAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMMipsCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMMipsDesc) FIND_AND_ADD_LLVM_LIB(LLVMMipsInfo) FIND_AND_ADD_LLVM_LIB(LLVMLanaiDisassembler) @@ -279,44 +258,73 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMHexagonDesc) FIND_AND_ADD_LLVM_LIB(LLVMHexagonInfo) FIND_AND_ADD_LLVM_LIB(LLVMBPFDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMBPFCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMBPFAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMBPFCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMBPFDesc) FIND_AND_ADD_LLVM_LIB(LLVMBPFInfo) FIND_AND_ADD_LLVM_LIB(LLVMAVRDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMAVRCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMAVRAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMAVRCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMAVRDesc) FIND_AND_ADD_LLVM_LIB(LLVMAVRInfo) FIND_AND_ADD_LLVM_LIB(LLVMARMDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMARMCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMARMAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMARMCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMARMDesc) FIND_AND_ADD_LLVM_LIB(LLVMARMUtils) FIND_AND_ADD_LLVM_LIB(LLVMARMInfo) FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUDisassembler) - FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUCodeGen) - FIND_AND_ADD_LLVM_LIB(LLVMMIRParser) - FIND_AND_ADD_LLVM_LIB(LLVMipo) - FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation) - FIND_AND_ADD_LLVM_LIB(LLVMVectorize) - FIND_AND_ADD_LLVM_LIB(LLVMLinker) - FIND_AND_ADD_LLVM_LIB(LLVMIRReader) - FIND_AND_ADD_LLVM_LIB(LLVMAsmParser) - FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP) FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUCodeGen) FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUDesc) FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUUtils) FIND_AND_ADD_LLVM_LIB(LLVMAMDGPUInfo) FIND_AND_ADD_LLVM_LIB(LLVMAArch64Disassembler) - FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler) + FIND_AND_ADD_LLVM_LIB(LLVMAArch64AsmParser) FIND_AND_ADD_LLVM_LIB(LLVMAArch64CodeGen) + FIND_AND_ADD_LLVM_LIB(LLVMAArch64Desc) + FIND_AND_ADD_LLVM_LIB(LLVMAArch64Utils) + FIND_AND_ADD_LLVM_LIB(LLVMAArch64Info) + FIND_AND_ADD_LLVM_LIB(LLVMOrcJIT) + FIND_AND_ADD_LLVM_LIB(LLVMMCJIT) + FIND_AND_ADD_LLVM_LIB(LLVMJITLink) + FIND_AND_ADD_LLVM_LIB(LLVMOrcTargetProcess) + FIND_AND_ADD_LLVM_LIB(LLVMOrcShared) + FIND_AND_ADD_LLVM_LIB(LLVMInterpreter) + FIND_AND_ADD_LLVM_LIB(LLVMExecutionEngine) + FIND_AND_ADD_LLVM_LIB(LLVMRuntimeDyld) + FIND_AND_ADD_LLVM_LIB(LLVMSymbolize) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoPDB) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoGSYM) + FIND_AND_ADD_LLVM_LIB(LLVMOption) + FIND_AND_ADD_LLVM_LIB(LLVMObjectYAML) + FIND_AND_ADD_LLVM_LIB(LLVMMCA) + FIND_AND_ADD_LLVM_LIB(LLVMMCDisassembler) + FIND_AND_ADD_LLVM_LIB(LLVMLTO) + FIND_AND_ADD_LLVM_LIB(LLVMPasses) FIND_AND_ADD_LLVM_LIB(LLVMCFGuard) + FIND_AND_ADD_LLVM_LIB(LLVMCoroutines) + FIND_AND_ADD_LLVM_LIB(LLVMObjCARCOpts) + FIND_AND_ADD_LLVM_LIB(LLVMHelloNew) + FIND_AND_ADD_LLVM_LIB(LLVMipo) + FIND_AND_ADD_LLVM_LIB(LLVMVectorize) + FIND_AND_ADD_LLVM_LIB(LLVMLinker) + FIND_AND_ADD_LLVM_LIB(LLVMInstrumentation) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenMP) + FIND_AND_ADD_LLVM_LIB(LLVMFrontendOpenACC) + FIND_AND_ADD_LLVM_LIB(LLVMExtensions) + FIND_AND_ADD_LLVM_LIB(LLVMDWARFLinker) FIND_AND_ADD_LLVM_LIB(LLVMGlobalISel) - FIND_AND_ADD_LLVM_LIB(LLVMSelectionDAG) + FIND_AND_ADD_LLVM_LIB(LLVMMIRParser) FIND_AND_ADD_LLVM_LIB(LLVMAsmPrinter) FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoDWARF) + FIND_AND_ADD_LLVM_LIB(LLVMSelectionDAG) FIND_AND_ADD_LLVM_LIB(LLVMCodeGen) + FIND_AND_ADD_LLVM_LIB(LLVMIRReader) + FIND_AND_ADD_LLVM_LIB(LLVMAsmParser) + FIND_AND_ADD_LLVM_LIB(LLVMInterfaceStub) + FIND_AND_ADD_LLVM_LIB(LLVMFileCheck) + FIND_AND_ADD_LLVM_LIB(LLVMFuzzMutate) FIND_AND_ADD_LLVM_LIB(LLVMTarget) FIND_AND_ADD_LLVM_LIB(LLVMScalarOpts) FIND_AND_ADD_LLVM_LIB(LLVMInstCombine) @@ -327,19 +335,15 @@ else() FIND_AND_ADD_LLVM_LIB(LLVMProfileData) FIND_AND_ADD_LLVM_LIB(LLVMObject) FIND_AND_ADD_LLVM_LIB(LLVMTextAPI) + FIND_AND_ADD_LLVM_LIB(LLVMMCParser) + FIND_AND_ADD_LLVM_LIB(LLVMMC) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) + FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoMSF) FIND_AND_ADD_LLVM_LIB(LLVMBitReader) FIND_AND_ADD_LLVM_LIB(LLVMCore) FIND_AND_ADD_LLVM_LIB(LLVMRemarks) FIND_AND_ADD_LLVM_LIB(LLVMBitstreamReader) - FIND_AND_ADD_LLVM_LIB(LLVMAArch64AsmParser) - FIND_AND_ADD_LLVM_LIB(LLVMMCParser) - FIND_AND_ADD_LLVM_LIB(LLVMAArch64Desc) - FIND_AND_ADD_LLVM_LIB(LLVMMC) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoCodeView) - FIND_AND_ADD_LLVM_LIB(LLVMDebugInfoMSF) FIND_AND_ADD_LLVM_LIB(LLVMBinaryFormat) - FIND_AND_ADD_LLVM_LIB(LLVMAArch64Utils) - FIND_AND_ADD_LLVM_LIB(LLVMAArch64Info) FIND_AND_ADD_LLVM_LIB(LLVMSupport) FIND_AND_ADD_LLVM_LIB(LLVMDemangle) endif() diff --git a/lib/include/__clang_cuda_builtin_vars.h b/lib/include/__clang_cuda_builtin_vars.h index 2ba1521f25..412e823a82 100644 --- a/lib/include/__clang_cuda_builtin_vars.h +++ b/lib/include/__clang_cuda_builtin_vars.h @@ -55,7 +55,9 @@ struct __cuda_builtin_threadIdx_t { __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_tid_z()); // threadIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_threadIdx_t); }; @@ -66,7 +68,9 @@ struct __cuda_builtin_blockIdx_t { __CUDA_DEVICE_BUILTIN(z,__nvvm_read_ptx_sreg_ctaid_z()); // blockIdx should be convertible to uint3 (in fact in nvcc, it *is* a // uint3). This function is defined after we pull in vector_types.h. + __attribute__((device)) operator dim3() const; __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockIdx_t); }; @@ -78,6 +82,8 @@ struct __cuda_builtin_blockDim_t { // blockDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; + __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_blockDim_t); }; @@ -89,6 +95,8 @@ struct __cuda_builtin_gridDim_t { // gridDim should be convertible to dim3 (in fact in nvcc, it *is* a // dim3). This function is defined after we pull in vector_types.h. __attribute__((device)) operator dim3() const; + __attribute__((device)) operator uint3() const; + private: __CUDA_DISALLOW_BUILTINVAR_ACCESS(__cuda_builtin_gridDim_t); }; @@ -108,5 +116,6 @@ __attribute__((device)) const int warpSize = 32; #undef __CUDA_DEVICE_BUILTIN #undef __CUDA_BUILTIN_VAR #undef __CUDA_DISALLOW_BUILTINVAR_ACCESS +#undef __DELETE #endif /* __CUDA_BUILTIN_VARS_H */ diff --git a/lib/include/__clang_cuda_cmath.h b/lib/include/__clang_cuda_cmath.h index 8ba182689a..5bbb59a93b 100644 --- a/lib/include/__clang_cuda_cmath.h +++ b/lib/include/__clang_cuda_cmath.h @@ -66,10 +66,38 @@ __DEVICE__ float frexp(float __arg, int *__exp) { } // For inscrutable reasons, the CUDA headers define these functions for us on -// Windows. For OpenMP we omit these as some old system headers have -// non-conforming `isinf(float)` and `isnan(float)` implementations that return -// an `int`. The system versions of these functions should be fine anyway. -#if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__) +// Windows. +#if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) + +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, differing only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#if defined(__OPENMP_NVPTX__) +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant + +#endif + __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } @@ -79,6 +107,11 @@ __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#if defined(__OPENMP_NVPTX__) +#pragma omp end declare variant +#endif + #endif __DEVICE__ bool isgreater(float __x, float __y) { @@ -142,6 +175,15 @@ __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } __DEVICE__ float tan(float __x) { return ::tanf(__x); } __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } +// There was a redefinition error for this this overload in CUDA mode. +// We restrict it to OpenMP mode for now, that is where it is actually needed +// anyway. +#ifdef __OPENMP_NVPTX__ +__DEVICE__ float remquo(float __n, float __d, int *__q) { + return ::remquof(__n, __d, __q); +} +#endif + // Notably missing above is nexttoward. We omit it because // libdevice doesn't provide an implementation, and we don't want to be in the // business of implementing tricky libm functions in this header. diff --git a/lib/include/__clang_cuda_complex_builtins.h b/lib/include/__clang_cuda_complex_builtins.h index d924487ab2..2b701fef0e 100644 --- a/lib/include/__clang_cuda_complex_builtins.h +++ b/lib/include/__clang_cuda_complex_builtins.h @@ -41,6 +41,27 @@ #define _ABSf std::abs #define _LOGBd std::logb #define _LOGBf std::logb +// Rather than pulling in std::max from algorithm everytime, use available ::max. +#define _fmaxd max +#define _fmaxf max +#else +#ifdef __AMDGCN__ +#define _ISNANd __ocml_isnan_f64 +#define _ISNANf __ocml_isnan_f32 +#define _ISINFd __ocml_isinf_f64 +#define _ISINFf __ocml_isinf_f32 +#define _ISFINITEd __ocml_isfinite_f64 +#define _ISFINITEf __ocml_isfinite_f32 +#define _COPYSIGNd __ocml_copysign_f64 +#define _COPYSIGNf __ocml_copysign_f32 +#define _SCALBNd __ocml_scalbn_f64 +#define _SCALBNf __ocml_scalbn_f32 +#define _ABSd __ocml_fabs_f64 +#define _ABSf __ocml_fabs_f32 +#define _LOGBd __ocml_logb_f64 +#define _LOGBf __ocml_logb_f32 +#define _fmaxd __ocml_fmax_f64 +#define _fmaxf __ocml_fmax_f32 #else #define _ISNANd __nv_isnand #define _ISNANf __nv_isnanf @@ -56,6 +77,9 @@ #define _ABSf __nv_fabsf #define _LOGBd __nv_logb #define _LOGBf __nv_logbf +#define _fmaxd __nv_fmax +#define _fmaxf __nv_fmaxf +#endif #endif #if defined(__cplusplus) @@ -167,7 +191,7 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, // Can't use std::max, because that's defined in , and we don't // want to pull that in for every compile. The CUDA headers define // ::max(float, float) and ::max(double, double), which is sufficient for us. - double __logbw = _LOGBd(max(_ABSd(__c), _ABSd(__d))); + double __logbw = _LOGBd(_fmaxd(_ABSd(__c), _ABSd(__d))); if (_ISFINITEd(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNd(__c, -__ilogbw); @@ -200,7 +224,7 @@ __DEVICE__ double _Complex __divdc3(double __a, double __b, double __c, __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { int __ilogbw = 0; - float __logbw = _LOGBf(max(_ABSf(__c), _ABSf(__d))); + float __logbw = _LOGBf(_fmaxf(_ABSf(__c), _ABSf(__d))); if (_ISFINITEf(__logbw)) { __ilogbw = (int)__logbw; __c = _SCALBNf(__c, -__ilogbw); @@ -249,6 +273,8 @@ __DEVICE__ float _Complex __divsc3(float __a, float __b, float __c, float __d) { #undef _ABSf #undef _LOGBd #undef _LOGBf +#undef _fmaxd +#undef _fmaxf #ifdef __OPENMP_NVPTX__ #pragma omp end declare target diff --git a/lib/include/__clang_cuda_math.h b/lib/include/__clang_cuda_math.h index 332e616702..acb26ad345 100644 --- a/lib/include/__clang_cuda_math.h +++ b/lib/include/__clang_cuda_math.h @@ -195,8 +195,8 @@ __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); } __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); } __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); } __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); } -__DEVICE__ double nearbyint(double __a) { return __nv_nearbyint(__a); } -__DEVICE__ float nearbyintf(float __a) { return __nv_nearbyintf(__a); } +__DEVICE__ double nearbyint(double __a) { return __builtin_nearbyint(__a); } +__DEVICE__ float nearbyintf(float __a) { return __builtin_nearbyintf(__a); } __DEVICE__ double nextafter(double __a, double __b) { return __nv_nextafter(__a, __b); } @@ -249,8 +249,9 @@ __DEVICE__ double rhypot(double __a, double __b) { __DEVICE__ float rhypotf(float __a, float __b) { return __nv_rhypotf(__a, __b); } -__DEVICE__ double rint(double __a) { return __nv_rint(__a); } -__DEVICE__ float rintf(float __a) { return __nv_rintf(__a); } +// __nv_rint* in libdevice is buggy and produces incorrect results. +__DEVICE__ double rint(double __a) { return __builtin_rint(__a); } +__DEVICE__ float rintf(float __a) { return __builtin_rintf(__a); } __DEVICE__ double rnorm(int __a, const double *__b) { return __nv_rnorm(__a, __b); } diff --git a/lib/include/__clang_cuda_math_forward_declares.h b/lib/include/__clang_cuda_math_forward_declares.h index 8a270859e4..c0f1f47cc9 100644 --- a/lib/include/__clang_cuda_math_forward_declares.h +++ b/lib/include/__clang_cuda_math_forward_declares.h @@ -160,6 +160,9 @@ __DEVICE__ double scalbln(double, long); __DEVICE__ float scalbln(float, long); __DEVICE__ double scalbn(double, int); __DEVICE__ float scalbn(float, int); +#ifdef _MSC_VER +__DEVICE__ bool signbit(long double); +#endif __DEVICE__ bool signbit(double); __DEVICE__ bool signbit(float); __DEVICE__ double sin(double); diff --git a/lib/include/__clang_cuda_runtime_wrapper.h b/lib/include/__clang_cuda_runtime_wrapper.h index f43ed55de4..f88c39a9b6 100644 --- a/lib/include/__clang_cuda_runtime_wrapper.h +++ b/lib/include/__clang_cuda_runtime_wrapper.h @@ -377,30 +377,38 @@ __device__ static inline void *malloc(size_t __size) { // Out-of-line implementations from __clang_cuda_builtin_vars.h. These need to // come after we've pulled in the definition of uint3 and dim3. +__device__ inline __cuda_builtin_threadIdx_t::operator dim3() const { + return dim3(x, y, z); +} + __device__ inline __cuda_builtin_threadIdx_t::operator uint3() const { - uint3 ret; - ret.x = x; - ret.y = y; - ret.z = z; - return ret; + return {x, y, z}; +} + +__device__ inline __cuda_builtin_blockIdx_t::operator dim3() const { + return dim3(x, y, z); } __device__ inline __cuda_builtin_blockIdx_t::operator uint3() const { - uint3 ret; - ret.x = x; - ret.y = y; - ret.z = z; - return ret; + return {x, y, z}; } __device__ inline __cuda_builtin_blockDim_t::operator dim3() const { return dim3(x, y, z); } +__device__ inline __cuda_builtin_blockDim_t::operator uint3() const { + return {x, y, z}; +} + __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { return dim3(x, y, z); } +__device__ inline __cuda_builtin_gridDim_t::operator uint3() const { + return {x, y, z}; +} + #include <__clang_cuda_cmath.h> #include <__clang_cuda_intrinsics.h> #include <__clang_cuda_complex_builtins.h> diff --git a/lib/include/__clang_hip_cmath.h b/lib/include/__clang_hip_cmath.h new file mode 100644 index 0000000000..cd22a2df95 --- /dev/null +++ b/lib/include/__clang_hip_cmath.h @@ -0,0 +1,664 @@ +/*===---- __clang_hip_cmath.h - HIP cmath decls -----------------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_HIP_CMATH_H__ +#define __CLANG_HIP_CMATH_H__ + +#if !defined(__HIP__) +#error "This file is for HIP and OpenMP AMDGCN device compilation only." +#endif + +#if defined(__cplusplus) +#include +#include +#include +#endif +#include +#include + +#pragma push_macro("__DEVICE__") +#define __DEVICE__ static __device__ inline __attribute__((always_inline)) + +// Start with functions that cannot be defined by DEF macros below. +#if defined(__cplusplus) +__DEVICE__ double abs(double __x) { return ::fabs(__x); } +__DEVICE__ float abs(float __x) { return ::fabsf(__x); } +__DEVICE__ long long abs(long long __n) { return ::llabs(__n); } +__DEVICE__ long abs(long __n) { return ::labs(__n); } +__DEVICE__ float fma(float __x, float __y, float __z) { + return ::fmaf(__x, __y, __z); +} +__DEVICE__ int fpclassify(float __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ int fpclassify(double __x) { + return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, + FP_ZERO, __x); +} +__DEVICE__ float frexp(float __arg, int *__exp) { + return ::frexpf(__arg, __exp); +} +__DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ bool isgreater(float __x, float __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreater(double __x, double __y) { + return __builtin_isgreater(__x, __y); +} +__DEVICE__ bool isgreaterequal(float __x, float __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isgreaterequal(double __x, double __y) { + return __builtin_isgreaterequal(__x, __y); +} +__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ bool isless(float __x, float __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool isless(double __x, double __y) { + return __builtin_isless(__x, __y); +} +__DEVICE__ bool islessequal(float __x, float __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessequal(double __x, double __y) { + return __builtin_islessequal(__x, __y); +} +__DEVICE__ bool islessgreater(float __x, float __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool islessgreater(double __x, double __y) { + return __builtin_islessgreater(__x, __y); +} +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } +__DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } +__DEVICE__ bool isunordered(float __x, float __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ bool isunordered(double __x, double __y) { + return __builtin_isunordered(__x, __y); +} +__DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } +__DEVICE__ float pow(float __base, int __iexp) { + return ::powif(__base, __iexp); +} +__DEVICE__ double pow(double __base, int __iexp) { + return ::powi(__base, __iexp); +} +__DEVICE__ float remquo(float __x, float __y, int *__quo) { + return ::remquof(__x, __y, __quo); +} +__DEVICE__ float scalbln(float __x, long int __n) { + return ::scalblnf(__x, __n); +} +__DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } +__DEVICE__ bool signbit(double __x) { return ::__signbit(__x); } + +// Notably missing above is nexttoward. We omit it because +// ocml doesn't provide an implementation, and we don't want to be in the +// business of implementing tricky libm functions in this header. + +// Other functions. +__DEVICE__ _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { + return __ocml_fma_f16(__x, __y, __z); +} +__DEVICE__ _Float16 pow(_Float16 __base, int __iexp) { + return __ocml_pown_f16(__base, __iexp); +} + +// BEGIN DEF_FUN and HIP_OVERLOAD + +// BEGIN DEF_FUN + +#pragma push_macro("__DEF_FUN1") +#pragma push_macro("__DEF_FUN2") +#pragma push_macro("__DEF_FUN2_FI") + +// Define cmath functions with float argument and returns __retty. +#define __DEF_FUN1(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x) { return __func##f(__x); } + +// Define cmath functions with two float arguments and returns __retty. +#define __DEF_FUN2(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x, float __y) { return __func##f(__x, __y); } + +// Define cmath functions with a float and an int argument and returns __retty. +#define __DEF_FUN2_FI(__retty, __func) \ + __DEVICE__ \ + __retty __func(float __x, int __y) { return __func##f(__x, __y); } + +__DEF_FUN1(float, acos) +__DEF_FUN1(float, acosh) +__DEF_FUN1(float, asin) +__DEF_FUN1(float, asinh) +__DEF_FUN1(float, atan) +__DEF_FUN2(float, atan2) +__DEF_FUN1(float, atanh) +__DEF_FUN1(float, cbrt) +__DEF_FUN1(float, ceil) +__DEF_FUN2(float, copysign) +__DEF_FUN1(float, cos) +__DEF_FUN1(float, cosh) +__DEF_FUN1(float, erf) +__DEF_FUN1(float, erfc) +__DEF_FUN1(float, exp) +__DEF_FUN1(float, exp2) +__DEF_FUN1(float, expm1) +__DEF_FUN1(float, fabs) +__DEF_FUN2(float, fdim) +__DEF_FUN1(float, floor) +__DEF_FUN2(float, fmax) +__DEF_FUN2(float, fmin) +__DEF_FUN2(float, fmod) +__DEF_FUN2(float, hypot) +__DEF_FUN1(int, ilogb) +__DEF_FUN2_FI(float, ldexp) +__DEF_FUN1(float, lgamma) +__DEF_FUN1(float, log) +__DEF_FUN1(float, log10) +__DEF_FUN1(float, log1p) +__DEF_FUN1(float, log2) +__DEF_FUN1(float, logb) +__DEF_FUN1(long long, llrint) +__DEF_FUN1(long long, llround) +__DEF_FUN1(long, lrint) +__DEF_FUN1(long, lround) +__DEF_FUN1(float, nearbyint) +__DEF_FUN2(float, nextafter) +__DEF_FUN2(float, pow) +__DEF_FUN2(float, remainder) +__DEF_FUN1(float, rint) +__DEF_FUN1(float, round) +__DEF_FUN2_FI(float, scalbn) +__DEF_FUN1(float, sin) +__DEF_FUN1(float, sinh) +__DEF_FUN1(float, sqrt) +__DEF_FUN1(float, tan) +__DEF_FUN1(float, tanh) +__DEF_FUN1(float, tgamma) +__DEF_FUN1(float, trunc) + +#pragma pop_macro("__DEF_FUN1") +#pragma pop_macro("__DEF_FUN2") +#pragma pop_macro("__DEF_FUN2_FI") + +// END DEF_FUN + +// BEGIN HIP_OVERLOAD + +#pragma push_macro("__HIP_OVERLOAD1") +#pragma push_macro("__HIP_OVERLOAD2") + +// __hip_enable_if::type is a type function which returns __T if __B is true. +template struct __hip_enable_if {}; + +template struct __hip_enable_if { typedef __T type; }; + +// decltype is only available in C++11 and above. +#if __cplusplus >= 201103L +// __hip_promote +namespace __hip { + +template struct __numeric_type { + static void __test(...); + static _Float16 __test(_Float16); + static float __test(float); + static double __test(char); + static double __test(int); + static double __test(unsigned); + static double __test(long); + static double __test(unsigned long); + static double __test(long long); + static double __test(unsigned long long); + static double __test(double); + // No support for long double, use double instead. + static double __test(long double); + + typedef decltype(__test(std::declval<_Tp>())) type; + static const bool value = !std::is_same::value; +}; + +template <> struct __numeric_type { static const bool value = true; }; + +template ::value &&__numeric_type<_A2>::value + &&__numeric_type<_A3>::value> +class __promote_imp { +public: + static const bool value = false; +}; + +template +class __promote_imp<_A1, _A2, _A3, true> { +private: + typedef typename __promote_imp<_A1>::type __type1; + typedef typename __promote_imp<_A2>::type __type2; + typedef typename __promote_imp<_A3>::type __type3; + +public: + typedef decltype(__type1() + __type2() + __type3()) type; + static const bool value = true; +}; + +template class __promote_imp<_A1, _A2, void, true> { +private: + typedef typename __promote_imp<_A1>::type __type1; + typedef typename __promote_imp<_A2>::type __type2; + +public: + typedef decltype(__type1() + __type2()) type; + static const bool value = true; +}; + +template class __promote_imp<_A1, void, void, true> { +public: + typedef typename __numeric_type<_A1>::type type; + static const bool value = true; +}; + +template +class __promote : public __promote_imp<_A1, _A2, _A3> {}; + +} // namespace __hip +#endif //__cplusplus >= 201103L + +// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to +// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with +// floor(double). +#define __HIP_OVERLOAD1(__retty, __fn) \ + template \ + __DEVICE__ typename __hip_enable_if::is_integer, \ + __retty>::type \ + __fn(__T __x) { \ + return ::__fn((double)__x); \ + } + +// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double +// or integer argument to avoid compilation error due to ambibuity. e.g. +// max(5.0f, 6.0) is resolved with max(double, double). +#if __cplusplus >= 201103L +#define __HIP_OVERLOAD2(__retty, __fn) \ + template \ + __DEVICE__ typename __hip_enable_if< \ + std::numeric_limits<__T1>::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + typename __hip::__promote<__T1, __T2>::type>::type \ + __fn(__T1 __x, __T2 __y) { \ + typedef typename __hip::__promote<__T1, __T2>::type __result_type; \ + return __fn((__result_type)__x, (__result_type)__y); \ + } +#else +#define __HIP_OVERLOAD2(__retty, __fn) \ + template \ + __DEVICE__ \ + typename __hip_enable_if::is_specialized && \ + std::numeric_limits<__T2>::is_specialized, \ + __retty>::type \ + __fn(__T1 __x, __T2 __y) { \ + return __fn((double)__x, (double)__y); \ + } +#endif + +__HIP_OVERLOAD1(double, abs) +__HIP_OVERLOAD1(double, acos) +__HIP_OVERLOAD1(double, acosh) +__HIP_OVERLOAD1(double, asin) +__HIP_OVERLOAD1(double, asinh) +__HIP_OVERLOAD1(double, atan) +__HIP_OVERLOAD2(double, atan2) +__HIP_OVERLOAD1(double, atanh) +__HIP_OVERLOAD1(double, cbrt) +__HIP_OVERLOAD1(double, ceil) +__HIP_OVERLOAD2(double, copysign) +__HIP_OVERLOAD1(double, cos) +__HIP_OVERLOAD1(double, cosh) +__HIP_OVERLOAD1(double, erf) +__HIP_OVERLOAD1(double, erfc) +__HIP_OVERLOAD1(double, exp) +__HIP_OVERLOAD1(double, exp2) +__HIP_OVERLOAD1(double, expm1) +__HIP_OVERLOAD1(double, fabs) +__HIP_OVERLOAD2(double, fdim) +__HIP_OVERLOAD1(double, floor) +__HIP_OVERLOAD2(double, fmax) +__HIP_OVERLOAD2(double, fmin) +__HIP_OVERLOAD2(double, fmod) +__HIP_OVERLOAD1(int, fpclassify) +__HIP_OVERLOAD2(double, hypot) +__HIP_OVERLOAD1(int, ilogb) +__HIP_OVERLOAD1(bool, isfinite) +__HIP_OVERLOAD2(bool, isgreater) +__HIP_OVERLOAD2(bool, isgreaterequal) +__HIP_OVERLOAD1(bool, isinf) +__HIP_OVERLOAD2(bool, isless) +__HIP_OVERLOAD2(bool, islessequal) +__HIP_OVERLOAD2(bool, islessgreater) +__HIP_OVERLOAD1(bool, isnan) +__HIP_OVERLOAD1(bool, isnormal) +__HIP_OVERLOAD2(bool, isunordered) +__HIP_OVERLOAD1(double, lgamma) +__HIP_OVERLOAD1(double, log) +__HIP_OVERLOAD1(double, log10) +__HIP_OVERLOAD1(double, log1p) +__HIP_OVERLOAD1(double, log2) +__HIP_OVERLOAD1(double, logb) +__HIP_OVERLOAD1(long long, llrint) +__HIP_OVERLOAD1(long long, llround) +__HIP_OVERLOAD1(long, lrint) +__HIP_OVERLOAD1(long, lround) +__HIP_OVERLOAD1(double, nearbyint) +__HIP_OVERLOAD2(double, nextafter) +__HIP_OVERLOAD2(double, pow) +__HIP_OVERLOAD2(double, remainder) +__HIP_OVERLOAD1(double, rint) +__HIP_OVERLOAD1(double, round) +__HIP_OVERLOAD1(bool, signbit) +__HIP_OVERLOAD1(double, sin) +__HIP_OVERLOAD1(double, sinh) +__HIP_OVERLOAD1(double, sqrt) +__HIP_OVERLOAD1(double, tan) +__HIP_OVERLOAD1(double, tanh) +__HIP_OVERLOAD1(double, tgamma) +__HIP_OVERLOAD1(double, trunc) + +// Overload these but don't add them to std, they are not part of cmath. +__HIP_OVERLOAD2(double, max) +__HIP_OVERLOAD2(double, min) + +// Additional Overloads that don't quite match HIP_OVERLOAD. +#if __cplusplus >= 201103L +template +__DEVICE__ typename __hip_enable_if< + std::numeric_limits<__T1>::is_specialized && + std::numeric_limits<__T2>::is_specialized && + std::numeric_limits<__T3>::is_specialized, + typename __hip::__promote<__T1, __T2, __T3>::type>::type +fma(__T1 __x, __T2 __y, __T3 __z) { + typedef typename __hip::__promote<__T1, __T2, __T3>::type __result_type; + return ::fma((__result_type)__x, (__result_type)__y, (__result_type)__z); +} +#else +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized && + std::numeric_limits<__T3>::is_specialized, + double>::type + fma(__T1 __x, __T2 __y, __T3 __z) { + return ::fma((double)__x, (double)__y, (double)__z); +} +#endif + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + frexp(__T __x, int *__exp) { + return ::frexp((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + ldexp(__T __x, int __exp) { + return ::ldexp((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + modf(__T __x, double *__exp) { + return ::modf((double)__x, __exp); +} + +#if __cplusplus >= 201103L +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized, + typename __hip::__promote<__T1, __T2>::type>::type + remquo(__T1 __x, __T2 __y, int *__quo) { + typedef typename __hip::__promote<__T1, __T2>::type __result_type; + return ::remquo((__result_type)__x, (__result_type)__y, __quo); +} +#else +template +__DEVICE__ + typename __hip_enable_if::is_specialized && + std::numeric_limits<__T2>::is_specialized, + double>::type + remquo(__T1 __x, __T2 __y, int *__quo) { + return ::remquo((double)__x, (double)__y, __quo); +} +#endif + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + scalbln(__T __x, long int __exp) { + return ::scalbln((double)__x, __exp); +} + +template +__DEVICE__ + typename __hip_enable_if::is_integer, double>::type + scalbn(__T __x, int __exp) { + return ::scalbn((double)__x, __exp); +} + +#pragma pop_macro("__HIP_OVERLOAD1") +#pragma pop_macro("__HIP_OVERLOAD2") + +// END HIP_OVERLOAD + +// END DEF_FUN and HIP_OVERLOAD + +#endif // defined(__cplusplus) + +// Define these overloads inside the namespace our standard library uses. +#ifdef _LIBCPP_BEGIN_NAMESPACE_STD +_LIBCPP_BEGIN_NAMESPACE_STD +#else +namespace std { +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_BEGIN_NAMESPACE_VERSION +#endif +#endif + +// Pull the new overloads we defined above into namespace std. +// using ::abs; - This may be considered for C++. +using ::acos; +using ::acosh; +using ::asin; +using ::asinh; +using ::atan; +using ::atan2; +using ::atanh; +using ::cbrt; +using ::ceil; +using ::copysign; +using ::cos; +using ::cosh; +using ::erf; +using ::erfc; +using ::exp; +using ::exp2; +using ::expm1; +using ::fabs; +using ::fdim; +using ::floor; +using ::fma; +using ::fmax; +using ::fmin; +using ::fmod; +using ::fpclassify; +using ::frexp; +using ::hypot; +using ::ilogb; +using ::isfinite; +using ::isgreater; +using ::isgreaterequal; +using ::isless; +using ::islessequal; +using ::islessgreater; +using ::isnormal; +using ::isunordered; +using ::ldexp; +using ::lgamma; +using ::llrint; +using ::llround; +using ::log; +using ::log10; +using ::log1p; +using ::log2; +using ::logb; +using ::lrint; +using ::lround; +using ::modf; +// using ::nan; - This may be considered for C++. +// using ::nanf; - This may be considered for C++. +// using ::nanl; - This is not yet defined. +using ::nearbyint; +using ::nextafter; +// using ::nexttoward; - Omit this since we do not have a definition. +using ::pow; +using ::remainder; +using ::remquo; +using ::rint; +using ::round; +using ::scalbln; +using ::scalbn; +using ::signbit; +using ::sin; +using ::sinh; +using ::sqrt; +using ::tan; +using ::tanh; +using ::tgamma; +using ::trunc; + +// Well this is fun: We need to pull these symbols in for libc++, but we can't +// pull them in with libstdc++, because its ::isinf and ::isnan are different +// than its std::isinf and std::isnan. +#ifndef __GLIBCXX__ +using ::isinf; +using ::isnan; +#endif + +// Finally, pull the "foobarf" functions that HIP defines into std. +using ::acosf; +using ::acoshf; +using ::asinf; +using ::asinhf; +using ::atan2f; +using ::atanf; +using ::atanhf; +using ::cbrtf; +using ::ceilf; +using ::copysignf; +using ::cosf; +using ::coshf; +using ::erfcf; +using ::erff; +using ::exp2f; +using ::expf; +using ::expm1f; +using ::fabsf; +using ::fdimf; +using ::floorf; +using ::fmaf; +using ::fmaxf; +using ::fminf; +using ::fmodf; +using ::frexpf; +using ::hypotf; +using ::ilogbf; +using ::ldexpf; +using ::lgammaf; +using ::llrintf; +using ::llroundf; +using ::log10f; +using ::log1pf; +using ::log2f; +using ::logbf; +using ::logf; +using ::lrintf; +using ::lroundf; +using ::modff; +using ::nearbyintf; +using ::nextafterf; +// using ::nexttowardf; - Omit this since we do not have a definition. +using ::powf; +using ::remainderf; +using ::remquof; +using ::rintf; +using ::roundf; +using ::scalblnf; +using ::scalbnf; +using ::sinf; +using ::sinhf; +using ::sqrtf; +using ::tanf; +using ::tanhf; +using ::tgammaf; +using ::truncf; + +#ifdef _LIBCPP_END_NAMESPACE_STD +_LIBCPP_END_NAMESPACE_STD +#else +#ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION +_GLIBCXX_END_NAMESPACE_VERSION +#endif +} // namespace std +#endif + +// Define device-side math functions from on MSVC. +#if defined(_MSC_VER) + +// Before VS2019, `` is also included in `` and other headers. +// But, from VS2019, it's only included in ``. Need to include +// `` here to ensure C functions declared there won't be markded as +// `__host__` and `__device__` through `` wrapper. +#include + +#if defined(__cplusplus) +extern "C" { +#endif // defined(__cplusplus) +__DEVICE__ __attribute__((overloadable)) double _Cosh(double x, double y) { + return cosh(x) * y; +} +__DEVICE__ __attribute__((overloadable)) float _FCosh(float x, float y) { + return coshf(x) * y; +} +__DEVICE__ __attribute__((overloadable)) short _Dtest(double *p) { + return fpclassify(*p); +} +__DEVICE__ __attribute__((overloadable)) short _FDtest(float *p) { + return fpclassify(*p); +} +__DEVICE__ __attribute__((overloadable)) double _Sinh(double x, double y) { + return sinh(x) * y; +} +__DEVICE__ __attribute__((overloadable)) float _FSinh(float x, float y) { + return sinhf(x) * y; +} +#if defined(__cplusplus) +} +#endif // defined(__cplusplus) +#endif // defined(_MSC_VER) + +#pragma pop_macro("__DEVICE__") + +#endif // __CLANG_HIP_CMATH_H__ diff --git a/lib/include/__clang_hip_libdevice_declares.h b/lib/include/__clang_hip_libdevice_declares.h index e1cd49a39c..ac98907ad5 100644 --- a/lib/include/__clang_hip_libdevice_declares.h +++ b/lib/include/__clang_hip_libdevice_declares.h @@ -10,7 +10,9 @@ #ifndef __CLANG_HIP_LIBDEVICE_DECLARES_H__ #define __CLANG_HIP_LIBDEVICE_DECLARES_H__ +#ifdef __cplusplus extern "C" { +#endif // BEGIN FLOAT __device__ __attribute__((const)) float __ocml_acos_f32(float); @@ -78,6 +80,7 @@ __device__ __attribute__((const)) float __ocml_len4_f32(float, float, float, __device__ __attribute__((pure)) float __ocml_ncdf_f32(float); __device__ __attribute__((pure)) float __ocml_ncdfinv_f32(float); __device__ __attribute__((pure)) float __ocml_pow_f32(float, float); +__device__ __attribute__((pure)) float __ocml_pown_f32(float, int); __device__ __attribute__((pure)) float __ocml_rcbrt_f32(float); __device__ __attribute__((const)) float __ocml_remainder_f32(float, float); __device__ float __ocml_remquo_f32(float, float, @@ -126,10 +129,10 @@ __device__ __attribute__((const)) float __ocml_div_rte_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtn_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtp_f32(float, float); __device__ __attribute__((const)) float __ocml_div_rtz_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float, float); -__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float, float); +__device__ __attribute__((const)) float __ocml_sqrt_rte_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtn_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtp_f32(float); +__device__ __attribute__((const)) float __ocml_sqrt_rtz_f32(float); __device__ __attribute__((const)) float __ocml_fma_rte_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtn_f32(float, float, float); __device__ __attribute__((const)) float __ocml_fma_rtp_f32(float, float, float); @@ -205,6 +208,7 @@ __device__ __attribute__((const)) double __ocml_len4_f64(double, double, double, __device__ __attribute__((pure)) double __ocml_ncdf_f64(double); __device__ __attribute__((pure)) double __ocml_ncdfinv_f64(double); __device__ __attribute__((pure)) double __ocml_pow_f64(double, double); +__device__ __attribute__((pure)) double __ocml_pown_f64(double, int); __device__ __attribute__((pure)) double __ocml_rcbrt_f64(double); __device__ __attribute__((const)) double __ocml_remainder_f64(double, double); __device__ double __ocml_remquo_f64(double, double, @@ -252,10 +256,10 @@ __device__ __attribute__((const)) double __ocml_div_rte_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtn_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtp_f64(double, double); __device__ __attribute__((const)) double __ocml_div_rtz_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double, double); -__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double, double); +__device__ __attribute__((const)) double __ocml_sqrt_rte_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtn_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtp_f64(double); +__device__ __attribute__((const)) double __ocml_sqrt_rtz_f64(double); __device__ __attribute__((const)) double __ocml_fma_rte_f64(double, double, double); __device__ __attribute__((const)) double __ocml_fma_rtn_f64(double, double, @@ -290,6 +294,7 @@ __device__ __attribute__((const)) _Float16 __ocml_rsqrt_f16(_Float16); __device__ _Float16 __ocml_sin_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_sqrt_f16(_Float16); __device__ __attribute__((const)) _Float16 __ocml_trunc_f16(_Float16); +__device__ __attribute__((pure)) _Float16 __ocml_pown_f16(_Float16, int); typedef _Float16 __2f16 __attribute__((ext_vector_type(2))); typedef short __2i16 __attribute__((ext_vector_type(2))); @@ -313,14 +318,17 @@ __device__ __attribute__((pure)) __2f16 __ocml_log2_2f16(__2f16); __device__ inline __2f16 __llvm_amdgcn_rcp_2f16(__2f16 __x) // Not currently exposed by ROCDL. { - return __2f16{__llvm_amdgcn_rcp_f16(__x.x), __llvm_amdgcn_rcp_f16(__x.y)}; + return (__2f16)(__llvm_amdgcn_rcp_f16(__x.x), __llvm_amdgcn_rcp_f16(__x.y)); } __device__ __attribute__((const)) __2f16 __ocml_rint_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_rsqrt_2f16(__2f16); __device__ __2f16 __ocml_sin_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_sqrt_2f16(__2f16); __device__ __attribute__((const)) __2f16 __ocml_trunc_2f16(__2f16); +__device__ __attribute__((const)) __2f16 __ocml_pown_2f16(__2f16, __2i16); +#ifdef __cplusplus } // extern "C" +#endif #endif // __CLANG_HIP_LIBDEVICE_DECLARES_H__ diff --git a/lib/include/__clang_hip_math.h b/lib/include/__clang_hip_math.h index cf7014b9ae..14d91c66b3 100644 --- a/lib/include/__clang_hip_math.h +++ b/lib/include/__clang_hip_math.h @@ -1,4 +1,4 @@ -/*===---- __clang_hip_math.h - HIP math decls -------------------------------=== +/*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== * * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. * See https://llvm.org/LICENSE.txt for license information. @@ -6,24 +6,57 @@ * *===-----------------------------------------------------------------------=== */ - #ifndef __CLANG_HIP_MATH_H__ #define __CLANG_HIP_MATH_H__ +#if !defined(__HIP__) +#error "This file is for HIP and OpenMP AMDGCN device compilation only." +#endif + +#if defined(__cplusplus) #include +#endif #include -#include #include #pragma push_macro("__DEVICE__") -#pragma push_macro("__RETURN_TYPE") +#define __DEVICE__ static __device__ inline __attribute__((always_inline)) -// to be consistent with __clang_cuda_math_forward_declares -#define __DEVICE__ static __device__ +// A few functions return bool type starting only in C++11. +#pragma push_macro("__RETURN_TYPE") +#if defined(__cplusplus) #define __RETURN_TYPE bool +#else +#define __RETURN_TYPE int +#endif + +#if defined (__cplusplus) && __cplusplus < 201103L +// emulate static_assert on type sizes +template +struct __compare_result{}; +template<> +struct __compare_result { + static const bool valid; +}; __DEVICE__ -inline uint64_t __make_mantissa_base8(const char *__tagp) { +void __suppress_unused_warning(bool b){}; +template +__DEVICE__ void __static_assert_equal_size() { + __suppress_unused_warning(__compare_result::valid); +} + +#define __static_assert_type_size_equal(A, B) \ + __static_assert_equal_size() + +#else +#define __static_assert_type_size_equal(A,B) \ + static_assert((A) == (B), "") + +#endif + +__DEVICE__ +uint64_t __make_mantissa_base8(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -40,7 +73,7 @@ inline uint64_t __make_mantissa_base8(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa_base10(const char *__tagp) { +uint64_t __make_mantissa_base10(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -57,7 +90,7 @@ inline uint64_t __make_mantissa_base10(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa_base16(const char *__tagp) { +uint64_t __make_mantissa_base16(const char *__tagp) { uint64_t __r = 0; while (__tagp) { char __tmp = *__tagp; @@ -78,7 +111,7 @@ inline uint64_t __make_mantissa_base16(const char *__tagp) { } __DEVICE__ -inline uint64_t __make_mantissa(const char *__tagp) { +uint64_t __make_mantissa(const char *__tagp) { if (!__tagp) return 0u; @@ -95,78 +128,124 @@ inline uint64_t __make_mantissa(const char *__tagp) { } // BEGIN FLOAT +#if defined(__cplusplus) __DEVICE__ -inline float abs(float __x) { return __ocml_fabs_f32(__x); } -__DEVICE__ -inline float acosf(float __x) { return __ocml_acos_f32(__x); } -__DEVICE__ -inline float acoshf(float __x) { return __ocml_acosh_f32(__x); } -__DEVICE__ -inline float asinf(float __x) { return __ocml_asin_f32(__x); } -__DEVICE__ -inline float asinhf(float __x) { return __ocml_asinh_f32(__x); } -__DEVICE__ -inline float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } -__DEVICE__ -inline float atanf(float __x) { return __ocml_atan_f32(__x); } -__DEVICE__ -inline float atanhf(float __x) { return __ocml_atanh_f32(__x); } -__DEVICE__ -inline float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } -__DEVICE__ -inline float ceilf(float __x) { return __ocml_ceil_f32(__x); } -__DEVICE__ -inline float copysignf(float __x, float __y) { - return __ocml_copysign_f32(__x, __y); +int abs(int __x) { + int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; } __DEVICE__ -inline float cosf(float __x) { return __ocml_cos_f32(__x); } +long labs(long __x) { + long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} __DEVICE__ -inline float coshf(float __x) { return __ocml_cosh_f32(__x); } +long long llabs(long long __x) { + long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); + return (__x ^ __sgn) - __sgn; +} +#endif + __DEVICE__ -inline float cospif(float __x) { return __ocml_cospi_f32(__x); } +float acosf(float __x) { return __ocml_acos_f32(__x); } + __DEVICE__ -inline float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } +float acoshf(float __x) { return __ocml_acosh_f32(__x); } + __DEVICE__ -inline float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } +float asinf(float __x) { return __ocml_asin_f32(__x); } + __DEVICE__ -inline float erfcf(float __x) { return __ocml_erfc_f32(__x); } +float asinhf(float __x) { return __ocml_asinh_f32(__x); } + __DEVICE__ -inline float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } +float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } + __DEVICE__ -inline float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } +float atanf(float __x) { return __ocml_atan_f32(__x); } + __DEVICE__ -inline float erff(float __x) { return __ocml_erf_f32(__x); } +float atanhf(float __x) { return __ocml_atanh_f32(__x); } + __DEVICE__ -inline float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } +float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } + __DEVICE__ -inline float exp10f(float __x) { return __ocml_exp10_f32(__x); } +float ceilf(float __x) { return __ocml_ceil_f32(__x); } + __DEVICE__ -inline float exp2f(float __x) { return __ocml_exp2_f32(__x); } +float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); } + __DEVICE__ -inline float expf(float __x) { return __ocml_exp_f32(__x); } +float cosf(float __x) { return __ocml_cos_f32(__x); } + __DEVICE__ -inline float expm1f(float __x) { return __ocml_expm1_f32(__x); } +float coshf(float __x) { return __ocml_cosh_f32(__x); } + __DEVICE__ -inline float fabsf(float __x) { return __ocml_fabs_f32(__x); } +float cospif(float __x) { return __ocml_cospi_f32(__x); } + __DEVICE__ -inline float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } +float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } + __DEVICE__ -inline float fdividef(float __x, float __y) { return __x / __y; } +float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } + __DEVICE__ -inline float floorf(float __x) { return __ocml_floor_f32(__x); } +float erfcf(float __x) { return __ocml_erfc_f32(__x); } + __DEVICE__ -inline float fmaf(float __x, float __y, float __z) { +float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } + +__DEVICE__ +float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } + +__DEVICE__ +float erff(float __x) { return __ocml_erf_f32(__x); } + +__DEVICE__ +float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } + +__DEVICE__ +float exp10f(float __x) { return __ocml_exp10_f32(__x); } + +__DEVICE__ +float exp2f(float __x) { return __ocml_exp2_f32(__x); } + +__DEVICE__ +float expf(float __x) { return __ocml_exp_f32(__x); } + +__DEVICE__ +float expm1f(float __x) { return __ocml_expm1_f32(__x); } + +__DEVICE__ +float fabsf(float __x) { return __ocml_fabs_f32(__x); } + +__DEVICE__ +float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } + +__DEVICE__ +float fdividef(float __x, float __y) { return __x / __y; } + +__DEVICE__ +float floorf(float __x) { return __ocml_floor_f32(__x); } + +__DEVICE__ +float fmaf(float __x, float __y, float __z) { return __ocml_fma_f32(__x, __y, __z); } + __DEVICE__ -inline float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } +float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } + __DEVICE__ -inline float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } +float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } + __DEVICE__ -inline float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } +float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } + __DEVICE__ -inline float frexpf(float __x, int *__nptr) { +float frexpf(float __x, int *__nptr) { int __tmp; float __r = __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); @@ -174,24 +253,31 @@ inline float frexpf(float __x, int *__nptr) { return __r; } + __DEVICE__ -inline float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } +float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } + __DEVICE__ -inline int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } +int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isfinite(float __x) { return __ocml_isfinite_f32(__x); } +__RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isinf(float __x) { return __ocml_isinf_f32(__x); } +__RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); } + __DEVICE__ -inline __RETURN_TYPE isnan(float __x) { return __ocml_isnan_f32(__x); } +__RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); } + __DEVICE__ -inline float j0f(float __x) { return __ocml_j0_f32(__x); } +float j0f(float __x) { return __ocml_j0_f32(__x); } + __DEVICE__ -inline float j1f(float __x) { return __ocml_j1_f32(__x); } +float j1f(float __x) { return __ocml_j1_f32(__x); } + __DEVICE__ -inline float jnf(int __n, - float __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. if (__n == 0) @@ -209,50 +295,61 @@ inline float jnf(int __n, return __x1; } + __DEVICE__ -inline float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } +float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } + __DEVICE__ -inline float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } +float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } + __DEVICE__ -inline long long int llrintf(float __x) { return __ocml_rint_f32(__x); } +long long int llrintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline long long int llroundf(float __x) { return __ocml_round_f32(__x); } +long long int llroundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float log10f(float __x) { return __ocml_log10_f32(__x); } +float log10f(float __x) { return __ocml_log10_f32(__x); } + __DEVICE__ -inline float log1pf(float __x) { return __ocml_log1p_f32(__x); } +float log1pf(float __x) { return __ocml_log1p_f32(__x); } + __DEVICE__ -inline float log2f(float __x) { return __ocml_log2_f32(__x); } +float log2f(float __x) { return __ocml_log2_f32(__x); } + __DEVICE__ -inline float logbf(float __x) { return __ocml_logb_f32(__x); } +float logbf(float __x) { return __ocml_logb_f32(__x); } + __DEVICE__ -inline float logf(float __x) { return __ocml_log_f32(__x); } +float logf(float __x) { return __ocml_log_f32(__x); } + __DEVICE__ -inline long int lrintf(float __x) { return __ocml_rint_f32(__x); } +long int lrintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline long int lroundf(float __x) { return __ocml_round_f32(__x); } +long int lroundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float modff(float __x, float *__iptr) { +float modff(float __x, float *__iptr) { float __tmp; float __r = __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__iptr = __tmp; - return __r; } + __DEVICE__ -inline float nanf(const char *__tagp) { +float nanf(const char *__tagp) { union { float val; struct ieee_float { - uint32_t mantissa : 22; - uint32_t quiet : 1; - uint32_t exponent : 8; - uint32_t sign : 1; + unsigned int mantissa : 22; + unsigned int quiet : 1; + unsigned int exponent : 8; + unsigned int sign : 1; } bits; - - static_assert(sizeof(float) == sizeof(ieee_float), ""); } __tmp; + __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; @@ -261,28 +358,34 @@ inline float nanf(const char *__tagp) { return __tmp.val; } + __DEVICE__ -inline float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } +float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } + __DEVICE__ -inline float nextafterf(float __x, float __y) { +float nextafterf(float __x, float __y) { return __ocml_nextafter_f32(__x, __y); } + __DEVICE__ -inline float norm3df(float __x, float __y, float __z) { +float norm3df(float __x, float __y, float __z) { return __ocml_len3_f32(__x, __y, __z); } + __DEVICE__ -inline float norm4df(float __x, float __y, float __z, float __w) { +float norm4df(float __x, float __y, float __z, float __w) { return __ocml_len4_f32(__x, __y, __z, __w); } + __DEVICE__ -inline float normcdff(float __x) { return __ocml_ncdf_f32(__x); } +float normcdff(float __x) { return __ocml_ncdf_f32(__x); } + __DEVICE__ -inline float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } +float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } + __DEVICE__ -inline float -normf(int __dim, - const float *__a) { // TODO: placeholder until OCML adds support. +float normf(int __dim, + const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -291,16 +394,23 @@ normf(int __dim, return __ocml_sqrt_f32(__r); } + __DEVICE__ -inline float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } +float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + __DEVICE__ -inline float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } +float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } + __DEVICE__ -inline float remainderf(float __x, float __y) { +float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } + +__DEVICE__ +float remainderf(float __x, float __y) { return __ocml_remainder_f32(__x, __y); } + __DEVICE__ -inline float remquof(float __x, float __y, int *__quo) { +float remquof(float __x, float __y, int *__quo) { int __tmp; float __r = __ocml_remquo_f32( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); @@ -308,25 +418,26 @@ inline float remquof(float __x, float __y, int *__quo) { return __r; } + __DEVICE__ -inline float rhypotf(float __x, float __y) { - return __ocml_rhypot_f32(__x, __y); -} +float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } + __DEVICE__ -inline float rintf(float __x) { return __ocml_rint_f32(__x); } +float rintf(float __x) { return __ocml_rint_f32(__x); } + __DEVICE__ -inline float rnorm3df(float __x, float __y, float __z) { +float rnorm3df(float __x, float __y, float __z) { return __ocml_rlen3_f32(__x, __y, __z); } __DEVICE__ -inline float rnorm4df(float __x, float __y, float __z, float __w) { +float rnorm4df(float __x, float __y, float __z, float __w) { return __ocml_rlen4_f32(__x, __y, __z, __w); } + __DEVICE__ -inline float -rnormf(int __dim, - const float *__a) { // TODO: placeholder until OCML adds support. +float rnormf(int __dim, + const float *__a) { // TODO: placeholder until OCML adds support. float __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -335,59 +446,74 @@ rnormf(int __dim, return __ocml_rsqrt_f32(__r); } + __DEVICE__ -inline float roundf(float __x) { return __ocml_round_f32(__x); } +float roundf(float __x) { return __ocml_round_f32(__x); } + __DEVICE__ -inline float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } +float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } + __DEVICE__ -inline float scalblnf(float __x, long int __n) { +float scalblnf(float __x, long int __n) { return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) : __ocml_scalb_f32(__x, __n); } -__DEVICE__ -inline float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } -__DEVICE__ -inline __RETURN_TYPE signbit(float __x) { return __ocml_signbit_f32(__x); } -__DEVICE__ -inline void sincosf(float __x, float *__sinptr, float *__cosptr) { - float __tmp; +__DEVICE__ +float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } + +__DEVICE__ +__RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } + +__DEVICE__ +void sincosf(float __x, float *__sinptr, float *__cosptr) { + float __tmp; *__sinptr = __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } -__DEVICE__ -inline void sincospif(float __x, float *__sinptr, float *__cosptr) { - float __tmp; +__DEVICE__ +void sincospif(float __x, float *__sinptr, float *__cosptr) { + float __tmp; *__sinptr = __ocml_sincospi_f32( __x, (__attribute__((address_space(5))) float *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline float sinf(float __x) { return __ocml_sin_f32(__x); } +float sinf(float __x) { return __ocml_sin_f32(__x); } + __DEVICE__ -inline float sinhf(float __x) { return __ocml_sinh_f32(__x); } +float sinhf(float __x) { return __ocml_sinh_f32(__x); } + __DEVICE__ -inline float sinpif(float __x) { return __ocml_sinpi_f32(__x); } +float sinpif(float __x) { return __ocml_sinpi_f32(__x); } + __DEVICE__ -inline float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } +float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } + __DEVICE__ -inline float tanf(float __x) { return __ocml_tan_f32(__x); } +float tanf(float __x) { return __ocml_tan_f32(__x); } + __DEVICE__ -inline float tanhf(float __x) { return __ocml_tanh_f32(__x); } +float tanhf(float __x) { return __ocml_tanh_f32(__x); } + __DEVICE__ -inline float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } +float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } + __DEVICE__ -inline float truncf(float __x) { return __ocml_trunc_f32(__x); } +float truncf(float __x) { return __ocml_trunc_f32(__x); } + __DEVICE__ -inline float y0f(float __x) { return __ocml_y0_f32(__x); } +float y0f(float __x) { return __ocml_y0_f32(__x); } + __DEVICE__ -inline float y1f(float __x) { return __ocml_y1_f32(__x); } +float y1f(float __x) { return __ocml_y1_f32(__x); } + __DEVICE__ -inline float ynf(int __n, - float __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. @@ -408,290 +534,343 @@ inline float ynf(int __n, } // BEGIN INTRINSICS + __DEVICE__ -inline float __cosf(float __x) { return __ocml_native_cos_f32(__x); } +float __cosf(float __x) { return __ocml_native_cos_f32(__x); } + __DEVICE__ -inline float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } +float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } + __DEVICE__ -inline float __expf(float __x) { return __ocml_native_exp_f32(__x); } +float __expf(float __x) { return __ocml_native_exp_f32(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fadd_rd(float __x, float __y) { - return __ocml_add_rtn_f32(__x, __y); -} +float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } +__DEVICE__ +float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } +__DEVICE__ +float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fadd_rn(float __x, float __y) { return __x + __y; } #endif -__DEVICE__ -inline float __fadd_rn(float __x, float __y) { return __x + __y; } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fadd_ru(float __x, float __y) { - return __ocml_add_rtp_f32(__x, __y); -} +float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } __DEVICE__ -inline float __fadd_rz(float __x, float __y) { - return __ocml_add_rtz_f32(__x, __y); -} +float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } __DEVICE__ -inline float __fdiv_rd(float __x, float __y) { - return __ocml_div_rtn_f32(__x, __y); -} +float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } +__DEVICE__ +float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fdiv_rn(float __x, float __y) { return __x / __y; } #endif + __DEVICE__ -inline float __fdiv_rn(float __x, float __y) { return __x / __y; } +float __fdividef(float __x, float __y) { return __x / __y; } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fdiv_ru(float __x, float __y) { - return __ocml_div_rtp_f32(__x, __y); -} -__DEVICE__ -inline float __fdiv_rz(float __x, float __y) { - return __ocml_div_rtz_f32(__x, __y); -} -#endif -__DEVICE__ -inline float __fdividef(float __x, float __y) { return __x / __y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS -__DEVICE__ -inline float __fmaf_rd(float __x, float __y, float __z) { +float __fmaf_rd(float __x, float __y, float __z) { return __ocml_fma_rtn_f32(__x, __y, __z); } -#endif __DEVICE__ -inline float __fmaf_rn(float __x, float __y, float __z) { - return __ocml_fma_f32(__x, __y, __z); +float __fmaf_rn(float __x, float __y, float __z) { + return __ocml_fma_rte_f32(__x, __y, __z); } -#if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fmaf_ru(float __x, float __y, float __z) { +float __fmaf_ru(float __x, float __y, float __z) { return __ocml_fma_rtp_f32(__x, __y, __z); } __DEVICE__ -inline float __fmaf_rz(float __x, float __y, float __z) { +float __fmaf_rz(float __x, float __y, float __z) { return __ocml_fma_rtz_f32(__x, __y, __z); } +#else __DEVICE__ -inline float __fmul_rd(float __x, float __y) { - return __ocml_mul_rtn_f32(__x, __y); +float __fmaf_rn(float __x, float __y, float __z) { + return __ocml_fma_f32(__x, __y, __z); } #endif -__DEVICE__ -inline float __fmul_rn(float __x, float __y) { return __x * __y; } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fmul_ru(float __x, float __y) { - return __ocml_mul_rtp_f32(__x, __y); -} +float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } __DEVICE__ -inline float __fmul_rz(float __x, float __y) { - return __ocml_mul_rtz_f32(__x, __y); -} +float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } __DEVICE__ -inline float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } +__DEVICE__ +float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fmul_rn(float __x, float __y) { return __x * __y; } #endif -__DEVICE__ -inline float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } __DEVICE__ -inline float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); } +float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } +__DEVICE__ +float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } +__DEVICE__ +float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } +#else +__DEVICE__ +float __frcp_rn(float __x) { return 1.0f / __x; } #endif + __DEVICE__ -inline float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } +float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } -#endif +float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } __DEVICE__ -inline float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } +float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } +__DEVICE__ +float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } +__DEVICE__ +float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } +#else +__DEVICE__ +float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } +#endif + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } +float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } __DEVICE__ -inline float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } +float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } __DEVICE__ -inline float __fsub_rd(float __x, float __y) { - return __ocml_sub_rtn_f32(__x, __y); -} +float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } +__DEVICE__ +float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } +#else +__DEVICE__ +float __fsub_rn(float __x, float __y) { return __x - __y; } #endif + __DEVICE__ -inline float __fsub_rn(float __x, float __y) { return __x - __y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS +float __log10f(float __x) { return __ocml_native_log10_f32(__x); } + __DEVICE__ -inline float __fsub_ru(float __x, float __y) { - return __ocml_sub_rtp_f32(__x, __y); -} +float __log2f(float __x) { return __ocml_native_log2_f32(__x); } + __DEVICE__ -inline float __fsub_rz(float __x, float __y) { - return __ocml_sub_rtz_f32(__x, __y); -} -#endif +float __logf(float __x) { return __ocml_native_log_f32(__x); } + __DEVICE__ -inline float __log10f(float __x) { return __ocml_native_log10_f32(__x); } +float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } + __DEVICE__ -inline float __log2f(float __x) { return __ocml_native_log2_f32(__x); } +float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } + __DEVICE__ -inline float __logf(float __x) { return __ocml_native_log_f32(__x); } -__DEVICE__ -inline float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } -__DEVICE__ -inline float __saturatef(float __x) { - return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); -} -__DEVICE__ -inline void __sincosf(float __x, float *__sinptr, float *__cosptr) { +void __sincosf(float __x, float *__sinptr, float *__cosptr) { *__sinptr = __ocml_native_sin_f32(__x); *__cosptr = __ocml_native_cos_f32(__x); } + __DEVICE__ -inline float __sinf(float __x) { return __ocml_native_sin_f32(__x); } +float __sinf(float __x) { return __ocml_native_sin_f32(__x); } + __DEVICE__ -inline float __tanf(float __x) { return __ocml_tan_f32(__x); } +float __tanf(float __x) { return __ocml_tan_f32(__x); } // END INTRINSICS // END FLOAT // BEGIN DOUBLE __DEVICE__ -inline double abs(double __x) { return __ocml_fabs_f64(__x); } +double acos(double __x) { return __ocml_acos_f64(__x); } + __DEVICE__ -inline double acos(double __x) { return __ocml_acos_f64(__x); } +double acosh(double __x) { return __ocml_acosh_f64(__x); } + __DEVICE__ -inline double acosh(double __x) { return __ocml_acosh_f64(__x); } +double asin(double __x) { return __ocml_asin_f64(__x); } + __DEVICE__ -inline double asin(double __x) { return __ocml_asin_f64(__x); } +double asinh(double __x) { return __ocml_asinh_f64(__x); } + __DEVICE__ -inline double asinh(double __x) { return __ocml_asinh_f64(__x); } +double atan(double __x) { return __ocml_atan_f64(__x); } + __DEVICE__ -inline double atan(double __x) { return __ocml_atan_f64(__x); } +double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } + __DEVICE__ -inline double atan2(double __x, double __y) { - return __ocml_atan2_f64(__x, __y); -} +double atanh(double __x) { return __ocml_atanh_f64(__x); } + __DEVICE__ -inline double atanh(double __x) { return __ocml_atanh_f64(__x); } +double cbrt(double __x) { return __ocml_cbrt_f64(__x); } + __DEVICE__ -inline double cbrt(double __x) { return __ocml_cbrt_f64(__x); } +double ceil(double __x) { return __ocml_ceil_f64(__x); } + __DEVICE__ -inline double ceil(double __x) { return __ocml_ceil_f64(__x); } -__DEVICE__ -inline double copysign(double __x, double __y) { +double copysign(double __x, double __y) { return __ocml_copysign_f64(__x, __y); } + __DEVICE__ -inline double cos(double __x) { return __ocml_cos_f64(__x); } +double cos(double __x) { return __ocml_cos_f64(__x); } + __DEVICE__ -inline double cosh(double __x) { return __ocml_cosh_f64(__x); } +double cosh(double __x) { return __ocml_cosh_f64(__x); } + __DEVICE__ -inline double cospi(double __x) { return __ocml_cospi_f64(__x); } +double cospi(double __x) { return __ocml_cospi_f64(__x); } + __DEVICE__ -inline double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } +double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } + __DEVICE__ -inline double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } +double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } + __DEVICE__ -inline double erf(double __x) { return __ocml_erf_f64(__x); } +double erf(double __x) { return __ocml_erf_f64(__x); } + __DEVICE__ -inline double erfc(double __x) { return __ocml_erfc_f64(__x); } +double erfc(double __x) { return __ocml_erfc_f64(__x); } + __DEVICE__ -inline double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } +double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } + __DEVICE__ -inline double erfcx(double __x) { return __ocml_erfcx_f64(__x); } +double erfcx(double __x) { return __ocml_erfcx_f64(__x); } + __DEVICE__ -inline double erfinv(double __x) { return __ocml_erfinv_f64(__x); } +double erfinv(double __x) { return __ocml_erfinv_f64(__x); } + __DEVICE__ -inline double exp(double __x) { return __ocml_exp_f64(__x); } +double exp(double __x) { return __ocml_exp_f64(__x); } + __DEVICE__ -inline double exp10(double __x) { return __ocml_exp10_f64(__x); } +double exp10(double __x) { return __ocml_exp10_f64(__x); } + __DEVICE__ -inline double exp2(double __x) { return __ocml_exp2_f64(__x); } +double exp2(double __x) { return __ocml_exp2_f64(__x); } + __DEVICE__ -inline double expm1(double __x) { return __ocml_expm1_f64(__x); } +double expm1(double __x) { return __ocml_expm1_f64(__x); } + __DEVICE__ -inline double fabs(double __x) { return __ocml_fabs_f64(__x); } +double fabs(double __x) { return __ocml_fabs_f64(__x); } + __DEVICE__ -inline double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } +double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } + __DEVICE__ -inline double floor(double __x) { return __ocml_floor_f64(__x); } +double floor(double __x) { return __ocml_floor_f64(__x); } + __DEVICE__ -inline double fma(double __x, double __y, double __z) { +double fma(double __x, double __y, double __z) { return __ocml_fma_f64(__x, __y, __z); } + __DEVICE__ -inline double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } +double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } + __DEVICE__ -inline double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } +double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } + __DEVICE__ -inline double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } +double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } + __DEVICE__ -inline double frexp(double __x, int *__nptr) { +double frexp(double __x, int *__nptr) { int __tmp; double __r = __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); *__nptr = __tmp; - return __r; } + __DEVICE__ -inline double hypot(double __x, double __y) { - return __ocml_hypot_f64(__x, __y); -} +double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } + __DEVICE__ -inline int ilogb(double __x) { return __ocml_ilogb_f64(__x); } +int ilogb(double __x) { return __ocml_ilogb_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isfinite(double __x) { return __ocml_isfinite_f64(__x); } +__RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isinf(double __x) { return __ocml_isinf_f64(__x); } +__RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); } + __DEVICE__ -inline __RETURN_TYPE isnan(double __x) { return __ocml_isnan_f64(__x); } +__RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); } + __DEVICE__ -inline double j0(double __x) { return __ocml_j0_f64(__x); } +double j0(double __x) { return __ocml_j0_f64(__x); } + __DEVICE__ -inline double j1(double __x) { return __ocml_j1_f64(__x); } +double j1(double __x) { return __ocml_j1_f64(__x); } + __DEVICE__ -inline double jn(int __n, - double __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) - return j0f(__x); + return j0(__x); if (__n == 1) - return j1f(__x); + return j1(__x); - double __x0 = j0f(__x); - double __x1 = j1f(__x); + double __x0 = j0(__x); + double __x1 = j1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; __x1 = __x2; } - return __x1; } + __DEVICE__ -inline double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } +double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } + __DEVICE__ -inline double lgamma(double __x) { return __ocml_lgamma_f64(__x); } +double lgamma(double __x) { return __ocml_lgamma_f64(__x); } + __DEVICE__ -inline long long int llrint(double __x) { return __ocml_rint_f64(__x); } +long long int llrint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline long long int llround(double __x) { return __ocml_round_f64(__x); } +long long int llround(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double log(double __x) { return __ocml_log_f64(__x); } +double log(double __x) { return __ocml_log_f64(__x); } + __DEVICE__ -inline double log10(double __x) { return __ocml_log10_f64(__x); } +double log10(double __x) { return __ocml_log10_f64(__x); } + __DEVICE__ -inline double log1p(double __x) { return __ocml_log1p_f64(__x); } +double log1p(double __x) { return __ocml_log1p_f64(__x); } + __DEVICE__ -inline double log2(double __x) { return __ocml_log2_f64(__x); } +double log2(double __x) { return __ocml_log2_f64(__x); } + __DEVICE__ -inline double logb(double __x) { return __ocml_logb_f64(__x); } +double logb(double __x) { return __ocml_logb_f64(__x); } + __DEVICE__ -inline long int lrint(double __x) { return __ocml_rint_f64(__x); } +long int lrint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline long int lround(double __x) { return __ocml_round_f64(__x); } +long int lround(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double modf(double __x, double *__iptr) { +double modf(double __x, double *__iptr) { double __tmp; double __r = __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); @@ -699,8 +878,9 @@ inline double modf(double __x, double *__iptr) { return __r; } + __DEVICE__ -inline double nan(const char *__tagp) { +double nan(const char *__tagp) { #if !_WIN32 union { double val; @@ -710,8 +890,8 @@ inline double nan(const char *__tagp) { uint32_t exponent : 11; uint32_t sign : 1; } bits; - static_assert(sizeof(double) == sizeof(ieee_double), ""); } __tmp; + __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); __tmp.bits.sign = 0u; __tmp.bits.exponent = ~0u; @@ -720,22 +900,24 @@ inline double nan(const char *__tagp) { return __tmp.val; #else - static_assert(sizeof(uint64_t) == sizeof(double)); - uint64_t val = __make_mantissa(__tagp); - val |= 0xFFF << 51; - return *reinterpret_cast(&val); + __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); + uint64_t __val = __make_mantissa(__tagp); + __val |= 0xFFF << 51; + return *reinterpret_cast(&__val); #endif } + __DEVICE__ -inline double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } +double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } + __DEVICE__ -inline double nextafter(double __x, double __y) { +double nextafter(double __x, double __y) { return __ocml_nextafter_f64(__x, __y); } + __DEVICE__ -inline double -norm(int __dim, - const double *__a) { // TODO: placeholder until OCML adds support. +double norm(int __dim, + const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -744,28 +926,39 @@ norm(int __dim, return __ocml_sqrt_f64(__r); } + __DEVICE__ -inline double norm3d(double __x, double __y, double __z) { +double norm3d(double __x, double __y, double __z) { return __ocml_len3_f64(__x, __y, __z); } + __DEVICE__ -inline double norm4d(double __x, double __y, double __z, double __w) { +double norm4d(double __x, double __y, double __z, double __w) { return __ocml_len4_f64(__x, __y, __z, __w); } + __DEVICE__ -inline double normcdf(double __x) { return __ocml_ncdf_f64(__x); } +double normcdf(double __x) { return __ocml_ncdf_f64(__x); } + __DEVICE__ -inline double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } +double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } + __DEVICE__ -inline double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } +double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } + __DEVICE__ -inline double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } +double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } + __DEVICE__ -inline double remainder(double __x, double __y) { +double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } + +__DEVICE__ +double remainder(double __x, double __y) { return __ocml_remainder_f64(__x, __y); } + __DEVICE__ -inline double remquo(double __x, double __y, int *__quo) { +double remquo(double __x, double __y, int *__quo) { int __tmp; double __r = __ocml_remquo_f64( __x, __y, (__attribute__((address_space(5))) int *)&__tmp); @@ -773,16 +966,16 @@ inline double remquo(double __x, double __y, int *__quo) { return __r; } + __DEVICE__ -inline double rhypot(double __x, double __y) { - return __ocml_rhypot_f64(__x, __y); -} +double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } + __DEVICE__ -inline double rint(double __x) { return __ocml_rint_f64(__x); } +double rint(double __x) { return __ocml_rint_f64(__x); } + __DEVICE__ -inline double -rnorm(int __dim, - const double *__a) { // TODO: placeholder until OCML adds support. +double rnorm(int __dim, + const double *__a) { // TODO: placeholder until OCML adds support. double __r = 0; while (__dim--) { __r += __a[0] * __a[0]; @@ -791,77 +984,93 @@ rnorm(int __dim, return __ocml_rsqrt_f64(__r); } + __DEVICE__ -inline double rnorm3d(double __x, double __y, double __z) { +double rnorm3d(double __x, double __y, double __z) { return __ocml_rlen3_f64(__x, __y, __z); } + __DEVICE__ -inline double rnorm4d(double __x, double __y, double __z, double __w) { +double rnorm4d(double __x, double __y, double __z, double __w) { return __ocml_rlen4_f64(__x, __y, __z, __w); } + __DEVICE__ -inline double round(double __x) { return __ocml_round_f64(__x); } +double round(double __x) { return __ocml_round_f64(__x); } + __DEVICE__ -inline double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } +double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } + __DEVICE__ -inline double scalbln(double __x, long int __n) { +double scalbln(double __x, long int __n) { return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) : __ocml_scalb_f64(__x, __n); } __DEVICE__ -inline double scalbn(double __x, int __n) { - return __ocml_scalbn_f64(__x, __n); -} +double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); } + __DEVICE__ -inline __RETURN_TYPE signbit(double __x) { return __ocml_signbit_f64(__x); } +__RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); } + __DEVICE__ -inline double sin(double __x) { return __ocml_sin_f64(__x); } +double sin(double __x) { return __ocml_sin_f64(__x); } + __DEVICE__ -inline void sincos(double __x, double *__sinptr, double *__cosptr) { +void sincos(double __x, double *__sinptr, double *__cosptr) { double __tmp; *__sinptr = __ocml_sincos_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline void sincospi(double __x, double *__sinptr, double *__cosptr) { +void sincospi(double __x, double *__sinptr, double *__cosptr) { double __tmp; *__sinptr = __ocml_sincospi_f64( __x, (__attribute__((address_space(5))) double *)&__tmp); *__cosptr = __tmp; } + __DEVICE__ -inline double sinh(double __x) { return __ocml_sinh_f64(__x); } +double sinh(double __x) { return __ocml_sinh_f64(__x); } + __DEVICE__ -inline double sinpi(double __x) { return __ocml_sinpi_f64(__x); } +double sinpi(double __x) { return __ocml_sinpi_f64(__x); } + __DEVICE__ -inline double sqrt(double __x) { return __ocml_sqrt_f64(__x); } +double sqrt(double __x) { return __ocml_sqrt_f64(__x); } + __DEVICE__ -inline double tan(double __x) { return __ocml_tan_f64(__x); } +double tan(double __x) { return __ocml_tan_f64(__x); } + __DEVICE__ -inline double tanh(double __x) { return __ocml_tanh_f64(__x); } +double tanh(double __x) { return __ocml_tanh_f64(__x); } + __DEVICE__ -inline double tgamma(double __x) { return __ocml_tgamma_f64(__x); } +double tgamma(double __x) { return __ocml_tgamma_f64(__x); } + __DEVICE__ -inline double trunc(double __x) { return __ocml_trunc_f64(__x); } +double trunc(double __x) { return __ocml_trunc_f64(__x); } + __DEVICE__ -inline double y0(double __x) { return __ocml_y0_f64(__x); } +double y0(double __x) { return __ocml_y0_f64(__x); } + __DEVICE__ -inline double y1(double __x) { return __ocml_y1_f64(__x); } +double y1(double __x) { return __ocml_y1_f64(__x); } + __DEVICE__ -inline double yn(int __n, - double __x) { // TODO: we could use Ahmes multiplication - // and the Miller & Brown algorithm +double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication + // and the Miller & Brown algorithm // for linear recurrences to get O(log n) steps, but it's unclear if // it'd be beneficial in this case. Placeholder until OCML adds // support. if (__n == 0) - return j0f(__x); + return y0(__x); if (__n == 1) - return j1f(__x); + return y1(__x); - double __x0 = j0f(__x); - double __x1 = j1f(__x); + double __x0 = y0(__x); + double __x1 = y1(__x); for (int __i = 1; __i < __n; ++__i) { double __x2 = (2 * __i) / __x * __x1 - __x0; __x0 = __x1; @@ -874,296 +1083,182 @@ inline double yn(int __n, // BEGIN INTRINSICS #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dadd_rd(double __x, double __y) { +double __dadd_rd(double __x, double __y) { return __ocml_add_rtn_f64(__x, __y); } -#endif __DEVICE__ -inline double __dadd_rn(double __x, double __y) { return __x + __y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS +double __dadd_rn(double __x, double __y) { + return __ocml_add_rte_f64(__x, __y); +} __DEVICE__ -inline double __dadd_ru(double __x, double __y) { +double __dadd_ru(double __x, double __y) { return __ocml_add_rtp_f64(__x, __y); } __DEVICE__ -inline double __dadd_rz(double __x, double __y) { +double __dadd_rz(double __x, double __y) { return __ocml_add_rtz_f64(__x, __y); } +#else __DEVICE__ -inline double __ddiv_rd(double __x, double __y) { - return __ocml_div_rtn_f64(__x, __y); -} +double __dadd_rn(double __x, double __y) { return __x + __y; } #endif -__DEVICE__ -inline double __ddiv_rn(double __x, double __y) { return __x / __y; } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __ddiv_ru(double __x, double __y) { +double __ddiv_rd(double __x, double __y) { + return __ocml_div_rtn_f64(__x, __y); +} +__DEVICE__ +double __ddiv_rn(double __x, double __y) { + return __ocml_div_rte_f64(__x, __y); +} +__DEVICE__ +double __ddiv_ru(double __x, double __y) { return __ocml_div_rtp_f64(__x, __y); } __DEVICE__ -inline double __ddiv_rz(double __x, double __y) { +double __ddiv_rz(double __x, double __y) { return __ocml_div_rtz_f64(__x, __y); } +#else __DEVICE__ -inline double __dmul_rd(double __x, double __y) { - return __ocml_mul_rtn_f64(__x, __y); -} +double __ddiv_rn(double __x, double __y) { return __x / __y; } #endif -__DEVICE__ -inline double __dmul_rn(double __x, double __y) { return __x * __y; } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dmul_ru(double __x, double __y) { +double __dmul_rd(double __x, double __y) { + return __ocml_mul_rtn_f64(__x, __y); +} +__DEVICE__ +double __dmul_rn(double __x, double __y) { + return __ocml_mul_rte_f64(__x, __y); +} +__DEVICE__ +double __dmul_ru(double __x, double __y) { return __ocml_mul_rtp_f64(__x, __y); } __DEVICE__ -inline double __dmul_rz(double __x, double __y) { +double __dmul_rz(double __x, double __y) { return __ocml_mul_rtz_f64(__x, __y); } +#else __DEVICE__ -inline double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __dmul_rn(double __x, double __y) { return __x * __y; } #endif -__DEVICE__ -inline double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } __DEVICE__ -inline double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); } +double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } __DEVICE__ -inline double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } +double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } +__DEVICE__ +double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } +#else +__DEVICE__ +double __drcp_rn(double __x) { return 1.0 / __x; } #endif -__DEVICE__ -inline double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } +double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } __DEVICE__ -inline double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } +double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } __DEVICE__ -inline double __dsub_rd(double __x, double __y) { +double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } +__DEVICE__ +double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } +#else +__DEVICE__ +double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } +#endif + +#if defined OCML_BASIC_ROUNDED_OPERATIONS +__DEVICE__ +double __dsub_rd(double __x, double __y) { return __ocml_sub_rtn_f64(__x, __y); } -#endif __DEVICE__ -inline double __dsub_rn(double __x, double __y) { return __x - __y; } -#if defined OCML_BASIC_ROUNDED_OPERATIONS +double __dsub_rn(double __x, double __y) { + return __ocml_sub_rte_f64(__x, __y); +} __DEVICE__ -inline double __dsub_ru(double __x, double __y) { +double __dsub_ru(double __x, double __y) { return __ocml_sub_rtp_f64(__x, __y); } __DEVICE__ -inline double __dsub_rz(double __x, double __y) { +double __dsub_rz(double __x, double __y) { return __ocml_sub_rtz_f64(__x, __y); } +#else __DEVICE__ -inline double __fma_rd(double __x, double __y, double __z) { - return __ocml_fma_rtn_f64(__x, __y, __z); -} +double __dsub_rn(double __x, double __y) { return __x - __y; } #endif -__DEVICE__ -inline double __fma_rn(double __x, double __y, double __z) { - return __ocml_fma_f64(__x, __y, __z); -} + #if defined OCML_BASIC_ROUNDED_OPERATIONS __DEVICE__ -inline double __fma_ru(double __x, double __y, double __z) { +double __fma_rd(double __x, double __y, double __z) { + return __ocml_fma_rtn_f64(__x, __y, __z); +} +__DEVICE__ +double __fma_rn(double __x, double __y, double __z) { + return __ocml_fma_rte_f64(__x, __y, __z); +} +__DEVICE__ +double __fma_ru(double __x, double __y, double __z) { return __ocml_fma_rtp_f64(__x, __y, __z); } __DEVICE__ -inline double __fma_rz(double __x, double __y, double __z) { +double __fma_rz(double __x, double __y, double __z) { return __ocml_fma_rtz_f64(__x, __y, __z); } +#else +__DEVICE__ +double __fma_rn(double __x, double __y, double __z) { + return __ocml_fma_f64(__x, __y, __z); +} #endif // END INTRINSICS // END DOUBLE -// BEGIN INTEGER -__DEVICE__ -inline int abs(int __x) { - int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} -__DEVICE__ -inline long labs(long __x) { - long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} -__DEVICE__ -inline long long llabs(long long __x) { - long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); - return (__x ^ __sgn) - __sgn; -} +// C only macros +#if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L +#define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) +#define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) +#define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) +#define signbit(__x) \ + _Generic((__x), float : __signbitf, double : __signbit)(__x) +#endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L #if defined(__cplusplus) -__DEVICE__ -inline long abs(long __x) { return labs(__x); } -__DEVICE__ -inline long long abs(long long __x) { return llabs(__x); } -#endif -// END INTEGER - -__DEVICE__ -inline _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { - return __ocml_fma_f16(__x, __y, __z); -} - -__DEVICE__ -inline float fma(float __x, float __y, float __z) { - return fmaf(__x, __y, __z); -} - -#pragma push_macro("__DEF_FUN1") -#pragma push_macro("__DEF_FUN2") -#pragma push_macro("__DEF_FUNI") -#pragma push_macro("__DEF_FLOAT_FUN2I") -#pragma push_macro("__HIP_OVERLOAD1") -#pragma push_macro("__HIP_OVERLOAD2") - -// __hip_enable_if::type is a type function which returns __T if __B is true. -template struct __hip_enable_if {}; - -template struct __hip_enable_if { typedef __T type; }; - -// __HIP_OVERLOAD1 is used to resolve function calls with integer argument to -// avoid compilation error due to ambibuity. e.g. floor(5) is resolved with -// floor(double). -#define __HIP_OVERLOAD1(__retty, __fn) \ - template \ - __DEVICE__ typename __hip_enable_if::is_integer, \ - __retty>::type \ - __fn(__T __x) { \ - return ::__fn((double)__x); \ - } - -// __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double -// or integer argument to avoid compilation error due to ambibuity. e.g. -// max(5.0f, 6.0) is resolved with max(double, double). -#define __HIP_OVERLOAD2(__retty, __fn) \ - template \ - __DEVICE__ \ - typename __hip_enable_if::is_specialized && \ - std::numeric_limits<__T2>::is_specialized, \ - __retty>::type \ - __fn(__T1 __x, __T2 __y) { \ - return __fn((double)__x, (double)__y); \ - } - -// Define cmath functions with float argument and returns float. -#define __DEF_FUN1(__retty, __func) \ - __DEVICE__ \ - inline float __func(float __x) { return __func##f(__x); } \ - __HIP_OVERLOAD1(__retty, __func) - -// Define cmath functions with float argument and returns __retty. -#define __DEF_FUNI(__retty, __func) \ - __DEVICE__ \ - inline __retty __func(float __x) { return __func##f(__x); } \ - __HIP_OVERLOAD1(__retty, __func) - -// define cmath functions with two float arguments. -#define __DEF_FUN2(__retty, __func) \ - __DEVICE__ \ - inline float __func(float __x, float __y) { return __func##f(__x, __y); } \ - __HIP_OVERLOAD2(__retty, __func) - -__DEF_FUN1(double, acos) -__DEF_FUN1(double, acosh) -__DEF_FUN1(double, asin) -__DEF_FUN1(double, asinh) -__DEF_FUN1(double, atan) -__DEF_FUN2(double, atan2); -__DEF_FUN1(double, atanh) -__DEF_FUN1(double, cbrt) -__DEF_FUN1(double, ceil) -__DEF_FUN2(double, copysign); -__DEF_FUN1(double, cos) -__DEF_FUN1(double, cosh) -__DEF_FUN1(double, erf) -__DEF_FUN1(double, erfc) -__DEF_FUN1(double, exp) -__DEF_FUN1(double, exp2) -__DEF_FUN1(double, expm1) -__DEF_FUN1(double, fabs) -__DEF_FUN2(double, fdim); -__DEF_FUN1(double, floor) -__DEF_FUN2(double, fmax); -__DEF_FUN2(double, fmin); -__DEF_FUN2(double, fmod); -//__HIP_OVERLOAD1(int, fpclassify) -__DEF_FUN2(double, hypot); -__DEF_FUNI(int, ilogb) -__HIP_OVERLOAD1(bool, isfinite) -__HIP_OVERLOAD2(bool, isgreater); -__HIP_OVERLOAD2(bool, isgreaterequal); -__HIP_OVERLOAD1(bool, isinf); -__HIP_OVERLOAD2(bool, isless); -__HIP_OVERLOAD2(bool, islessequal); -__HIP_OVERLOAD2(bool, islessgreater); -__HIP_OVERLOAD1(bool, isnan); -//__HIP_OVERLOAD1(bool, isnormal) -__HIP_OVERLOAD2(bool, isunordered); -__DEF_FUN1(double, lgamma) -__DEF_FUN1(double, log) -__DEF_FUN1(double, log10) -__DEF_FUN1(double, log1p) -__DEF_FUN1(double, log2) -__DEF_FUN1(double, logb) -__DEF_FUNI(long long, llrint) -__DEF_FUNI(long long, llround) -__DEF_FUNI(long, lrint) -__DEF_FUNI(long, lround) -__DEF_FUN1(double, nearbyint); -__DEF_FUN2(double, nextafter); -__DEF_FUN2(double, pow); -__DEF_FUN2(double, remainder); -__DEF_FUN1(double, rint); -__DEF_FUN1(double, round); -__HIP_OVERLOAD1(bool, signbit) -__DEF_FUN1(double, sin) -__DEF_FUN1(double, sinh) -__DEF_FUN1(double, sqrt) -__DEF_FUN1(double, tan) -__DEF_FUN1(double, tanh) -__DEF_FUN1(double, tgamma) -__DEF_FUN1(double, trunc); - -// define cmath functions with a float and an integer argument. -#define __DEF_FLOAT_FUN2I(__func) \ - __DEVICE__ \ - inline float __func(float __x, int __y) { return __func##f(__x, __y); } -__DEF_FLOAT_FUN2I(scalbn) - -template __DEVICE__ inline T min(T __arg1, T __arg2) { +template __DEVICE__ T min(T __arg1, T __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } -template __DEVICE__ inline T max(T __arg1, T __arg2) { +template __DEVICE__ T max(T __arg1, T __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } -__DEVICE__ inline int min(int __arg1, int __arg2) { +__DEVICE__ int min(int __arg1, int __arg2) { return (__arg1 < __arg2) ? __arg1 : __arg2; } -__DEVICE__ inline int max(int __arg1, int __arg2) { +__DEVICE__ int max(int __arg1, int __arg2) { return (__arg1 > __arg2) ? __arg1 : __arg2; } __DEVICE__ -inline float max(float __x, float __y) { return fmaxf(__x, __y); } +float max(float __x, float __y) { return fmaxf(__x, __y); } __DEVICE__ -inline double max(double __x, double __y) { return fmax(__x, __y); } +double max(double __x, double __y) { return fmax(__x, __y); } __DEVICE__ -inline float min(float __x, float __y) { return fminf(__x, __y); } +float min(float __x, float __y) { return fminf(__x, __y); } __DEVICE__ -inline double min(double __x, double __y) { return fmin(__x, __y); } - -__HIP_OVERLOAD2(double, max) -__HIP_OVERLOAD2(double, min) +double min(double __x, double __y) { return fmin(__x, __y); } __host__ inline static int min(int __arg1, int __arg2) { return std::min(__arg1, __arg2); @@ -1172,13 +1267,8 @@ __host__ inline static int min(int __arg1, int __arg2) { __host__ inline static int max(int __arg1, int __arg2) { return std::max(__arg1, __arg2); } +#endif -#pragma pop_macro("__DEF_FUN1") -#pragma pop_macro("__DEF_FUN2") -#pragma pop_macro("__DEF_FUNI") -#pragma pop_macro("__DEF_FLOAT_FUN2I") -#pragma pop_macro("__HIP_OVERLOAD1") -#pragma pop_macro("__HIP_OVERLOAD2") #pragma pop_macro("__DEVICE__") #pragma pop_macro("__RETURN_TYPE") diff --git a/lib/include/__clang_hip_runtime_wrapper.h b/lib/include/__clang_hip_runtime_wrapper.h index addae5605a..81a16a265a 100644 --- a/lib/include/__clang_hip_runtime_wrapper.h +++ b/lib/include/__clang_hip_runtime_wrapper.h @@ -28,6 +28,10 @@ #define __shared__ __attribute__((shared)) #define __constant__ __attribute__((constant)) +#if !defined(__cplusplus) || __cplusplus < 201103L + #define nullptr NULL; +#endif + #if __HIP_ENABLE_DEVICE_MALLOC__ extern "C" __device__ void *__hip_malloc(size_t __size); extern "C" __device__ void *__hip_free(void *__ptr); @@ -51,6 +55,7 @@ static inline __device__ void *free(void *__ptr) { #if !_OPENMP || __HIP_ENABLE_CUDA_WRAPPER_FOR_OPENMP__ #include <__clang_cuda_math_forward_declares.h> +#include <__clang_hip_cmath.h> #include <__clang_cuda_complex_builtins.h> #include diff --git a/lib/include/altivec.h b/lib/include/altivec.h index ac5f438363..4d50d47d51 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -1709,6 +1709,20 @@ vec_cmpeq(vector double __a, vector double __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpeq(vector signed __int128 __a, vector signed __int128 __b) { + return (vector bool __int128)__builtin_altivec_vcmpequq( + (vector bool __int128)__a, (vector bool __int128)__b); +} + +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpeq(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return (vector bool __int128)__builtin_altivec_vcmpequq( + (vector bool __int128)__a, (vector bool __int128)__b); +} +#endif + #ifdef __POWER9_VECTOR__ /* vec_cmpne */ @@ -1766,36 +1780,26 @@ vec_cmpne(vector unsigned int __a, vector unsigned int __b) { (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector bool long long __a, vector bool long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector signed long long __a, vector signed long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); -} - static __inline__ vector bool int __ATTRS_o_ai vec_cmpne(vector float __a, vector float __b) { return (vector bool int)__builtin_altivec_vcmpnew((vector int)__a, (vector int)__b); } -static __inline__ vector bool long long __ATTRS_o_ai -vec_cmpne(vector double __a, vector double __b) { - return (vector bool long long) - ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpne(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return (vector bool __int128) ~(__builtin_altivec_vcmpequq( + (vector bool __int128)__a, (vector bool __int128)__b)); } +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpne(vector signed __int128 __a, vector signed __int128 __b) { + return (vector bool __int128) ~(__builtin_altivec_vcmpequq( + (vector bool __int128)__a, (vector bool __int128)__b)); +} +#endif + /* vec_cmpnez */ static __inline__ vector bool char __ATTRS_o_ai @@ -1900,6 +1904,86 @@ vec_parity_lsbb(vector signed long long __a) { return __builtin_altivec_vprtybd(__a); } +#else +/* vec_cmpne */ + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector bool char __a, vector bool char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector signed char __a, vector signed char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool char __ATTRS_o_ai +vec_cmpne(vector unsigned char __a, vector unsigned char __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector bool short __a, vector bool short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector signed short __a, vector signed short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool short __ATTRS_o_ai +vec_cmpne(vector unsigned short __a, vector unsigned short __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector bool int __a, vector bool int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector signed int __a, vector signed int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector unsigned int __a, vector unsigned int __b) { + return ~(vec_cmpeq(__a, __b)); +} + +static __inline__ vector bool int __ATTRS_o_ai +vec_cmpne(vector float __a, vector float __b) { + return ~(vec_cmpeq(__a, __b)); +} +#endif + +#ifdef __POWER8_VECTOR__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector bool long long __a, vector bool long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector signed long long __a, vector signed long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector unsigned long long __a, vector unsigned long long __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} +#endif + +#ifdef __VSX__ +static __inline__ vector bool long long __ATTRS_o_ai +vec_cmpne(vector double __a, vector double __b) { + return (vector bool long long) + ~(__builtin_altivec_vcmpequd((vector long long)__a, (vector long long)__b)); +} #endif /* vec_cmpgt */ @@ -1962,6 +2046,20 @@ vec_cmpgt(vector double __a, vector double __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpgt(vector signed __int128 __a, vector signed __int128 __b) { + return (vector bool __int128)__builtin_altivec_vcmpgtsq( + (vector bool __int128)__a, (vector bool __int128)__b); +} + +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpgt(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return (vector bool __int128)__builtin_altivec_vcmpgtuq( + (vector bool __int128)__a, (vector bool __int128)__b); +} +#endif + /* vec_cmpge */ static __inline__ vector bool char __ATTRS_o_ai @@ -2022,6 +2120,18 @@ vec_cmpge(vector unsigned long long __a, vector unsigned long long __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpge(vector signed __int128 __a, vector signed __int128 __b) { + return ~(vec_cmpgt(__b, __a)); +} + +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmpge(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return ~(vec_cmpgt(__b, __a)); +} +#endif + /* vec_vcmpgefp */ static __inline__ vector bool int __attribute__((__always_inline__)) @@ -2134,6 +2244,18 @@ vec_cmple(vector unsigned long long __a, vector unsigned long long __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmple(vector signed __int128 __a, vector signed __int128 __b) { + return vec_cmpge(__b, __a); +} + +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmple(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return vec_cmpge(__b, __a); +} +#endif + /* vec_cmplt */ static __inline__ vector bool char __ATTRS_o_ai @@ -2178,6 +2300,18 @@ vec_cmplt(vector double __a, vector double __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmplt(vector signed __int128 __a, vector signed __int128 __b) { + return vec_cmpgt(__b, __a); +} + +static __inline__ vector bool __int128 __ATTRS_o_ai +vec_cmplt(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return vec_cmpgt(__b, __a); +} +#endif + #ifdef __POWER8_VECTOR__ static __inline__ vector bool long long __ATTRS_o_ai vec_cmplt(vector signed long long __a, vector signed long long __b) { @@ -2702,67 +2836,67 @@ vec_insert_exp(vector unsigned int __a, vector unsigned int __b) { } #if defined(__powerpc64__) -static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(signed char *__a, +static __inline__ vector signed char __ATTRS_o_ai vec_xl_len(const signed char *__a, size_t __b) { return (vector signed char)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len(unsigned char *__a, size_t __b) { +vec_xl_len(const unsigned char *__a, size_t __b) { return (vector unsigned char)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(signed short *__a, +static __inline__ vector signed short __ATTRS_o_ai vec_xl_len(const signed short *__a, size_t __b) { return (vector signed short)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_len(unsigned short *__a, size_t __b) { +vec_xl_len(const unsigned short *__a, size_t __b) { return (vector unsigned short)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(signed int *__a, +static __inline__ vector signed int __ATTRS_o_ai vec_xl_len(const signed int *__a, size_t __b) { return (vector signed int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(unsigned int *__a, +static __inline__ vector unsigned int __ATTRS_o_ai vec_xl_len(const unsigned int *__a, size_t __b) { return (vector unsigned int)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector float __ATTRS_o_ai vec_xl_len(float *__a, size_t __b) { +static __inline__ vector float __ATTRS_o_ai vec_xl_len(const float *__a, size_t __b) { return (vector float)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_len(signed __int128 *__a, size_t __b) { +vec_xl_len(const signed __int128 *__a, size_t __b) { return (vector signed __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_len(unsigned __int128 *__a, size_t __b) { +vec_xl_len(const unsigned __int128 *__a, size_t __b) { return (vector unsigned __int128)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_len(signed long long *__a, size_t __b) { +vec_xl_len(const signed long long *__a, size_t __b) { return (vector signed long long)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_len(unsigned long long *__a, size_t __b) { +vec_xl_len(const unsigned long long *__a, size_t __b) { return (vector unsigned long long)__builtin_vsx_lxvl(__a, (__b << 56)); } -static __inline__ vector double __ATTRS_o_ai vec_xl_len(double *__a, +static __inline__ vector double __ATTRS_o_ai vec_xl_len(const double *__a, size_t __b) { return (vector double)__builtin_vsx_lxvl(__a, (__b << 56)); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_len_r(unsigned char *__a, size_t __b) { +vec_xl_len_r(const unsigned char *__a, size_t __b) { vector unsigned char __res = (vector unsigned char)__builtin_vsx_lxvll(__a, (__b << 56)); #ifdef __LITTLE_ENDIAN__ @@ -2862,12 +2996,12 @@ static __inline__ void __ATTRS_o_ai vec_xst_len_r(vector unsigned char __a, #ifdef __VSX__ static __inline__ vector float __ATTRS_o_ai vec_cpsgn(vector float __a, vector float __b) { - return __builtin_vsx_xvcpsgnsp(__a, __b); + return __builtin_vsx_xvcpsgnsp(__b, __a); } static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, vector double __b) { - return __builtin_vsx_xvcpsgndp(__a, __b); + return __builtin_vsx_xvcpsgndp(__b, __a); } #endif @@ -2951,6 +3085,42 @@ static __inline__ vector double __ATTRS_o_ai vec_cpsgn(vector double __a, #define vec_vctuxs __builtin_altivec_vctuxs +/* vec_signext */ + +#ifdef __POWER9_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed char __a) { + return __builtin_altivec_vextsb2w(__a); +} + +static __inline__ vector signed int __ATTRS_o_ai +vec_signexti(vector signed short __a) { + return __builtin_altivec_vextsh2w(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed char __a) { + return __builtin_altivec_vextsb2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed short __a) { + return __builtin_altivec_vextsh2d(__a); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_signextll(vector signed int __a) { + return __builtin_altivec_vextsw2d(__a); +} +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_signextq(vector signed long long __a) { + return __builtin_altivec_vextsd2q(__a); +} +#endif + /* vec_signed */ static __inline__ vector signed int __ATTRS_o_ai @@ -3288,6 +3458,66 @@ static __inline__ vector double __ATTRS_o_ai vec_div(vector double __a, } #endif +/* vec_dive */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_dive(vector signed int __a, vector signed int __b) { + return __builtin_altivec_vdivesw(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_dive(vector unsigned int __a, vector unsigned int __b) { + return __builtin_altivec_vdiveuw(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_dive(vector signed long long __a, vector signed long long __b) { + return __builtin_altivec_vdivesd(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_dive(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vdiveud(__a, __b); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_dive(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __builtin_altivec_vdiveuq(__a, __b); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_dive(vector signed __int128 __a, vector signed __int128 __b) { + return __builtin_altivec_vdivesq(__a, __b); +} +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_div(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a / __b; +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_div(vector signed __int128 __a, vector signed __int128 __b) { + return __a / __b; +} +#endif /* __POWER10_VECTOR__ */ + +/* vec_xvtdiv */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swdiv(vector double __a, + vector double __b) { + return __builtin_vsx_xvtdivdp(__a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_test_swdivs(vector float __a, + vector float __b) { + return __builtin_vsx_xvtdivsp(__a, __b); +} +#endif + /* vec_dss */ #define vec_dss __builtin_altivec_dss @@ -3300,23 +3530,19 @@ static __inline__ void __attribute__((__always_inline__)) vec_dssall(void) { /* vec_dst */ #define vec_dst(__PTR, __CW, __STR) \ - __extension__( \ - { __builtin_altivec_dst((const void *)(__PTR), (__CW), (__STR)); }) + __builtin_altivec_dst((const void *)(__PTR), (__CW), (__STR)) /* vec_dstst */ #define vec_dstst(__PTR, __CW, __STR) \ - __extension__( \ - { __builtin_altivec_dstst((const void *)(__PTR), (__CW), (__STR)); }) + __builtin_altivec_dstst((const void *)(__PTR), (__CW), (__STR)) /* vec_dststt */ #define vec_dststt(__PTR, __CW, __STR) \ - __extension__( \ - { __builtin_altivec_dststt((const void *)(__PTR), (__CW), (__STR)); }) + __builtin_altivec_dststt((const void *)(__PTR), (__CW), (__STR)) /* vec_dstt */ #define vec_dstt(__PTR, __CW, __STR) \ - __extension__( \ - { __builtin_altivec_dstt((const void *)(__PTR), (__CW), (__STR)); }) + __builtin_altivec_dstt((const void *)(__PTR), (__CW), (__STR)) /* vec_eqv */ @@ -5467,6 +5693,16 @@ vec_msum(vector unsigned short __a, vector unsigned short __b, return __builtin_altivec_vmsumuhm(__a, __b, __c); } +/* vec_msumc */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_msumc(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vmsumcud(__a, __b, __c); +} +#endif + /* vec_vmsummbm */ static __inline__ vector int __attribute__((__always_inline__)) @@ -5693,6 +5929,26 @@ vec_mule(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mule(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulosd(__a, __b); +#else + return __builtin_altivec_vmulesd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mule(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuloud(__a, __b); +#else + return __builtin_altivec_vmuleud(__a, __b); +#endif +} +#endif + /* vec_vmulesb */ static __inline__ vector short __attribute__((__always_inline__)) @@ -5737,6 +5993,30 @@ vec_vmuleuh(vector unsigned short __a, vector unsigned short __b) { #endif } +/* vec_mulh */ + +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed int __ATTRS_o_ai +vec_mulh(vector signed int __a, vector signed int __b) { + return __builtin_altivec_vmulhsw(__a, __b); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_mulh(vector unsigned int __a, vector unsigned int __b) { + return __builtin_altivec_vmulhuw(__a, __b); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mulh(vector signed long long __a, vector signed long long __b) { + return __builtin_altivec_vmulhsd(__a, __b); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mulh(vector unsigned long long __a, vector unsigned long long __b) { + return __builtin_altivec_vmulhud(__a, __b); +} +#endif + /* vec_mulo */ static __inline__ vector short __ATTRS_o_ai vec_mulo(vector signed char __a, @@ -5795,6 +6075,26 @@ vec_mulo(vector unsigned int __a, vector unsigned int __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mulo(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmulesd(__a, __b); +#else + return __builtin_altivec_vmulosd(__a, __b); +#endif +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mulo(vector unsigned long long __a, vector unsigned long long __b) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vmuleud(__a, __b); +#else + return __builtin_altivec_vmuloud(__a, __b); +#endif +} +#endif + /* vec_vmulosb */ static __inline__ vector short __attribute__((__always_inline__)) @@ -7627,6 +7927,18 @@ vec_rl(vector unsigned long long __a, vector unsigned long long __b) { } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rl(vector signed __int128 __a, vector unsigned __int128 __b) { + return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector signed __int128)) - __a)); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rl(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return (__b << __a)|(__b >> ((__CHAR_BIT__ * sizeof(vector unsigned __int128)) - __a)); +} +#endif + /* vec_rlmi */ #ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai @@ -7640,8 +7952,24 @@ vec_rlmi(vector unsigned long long __a, vector unsigned long long __b, vector unsigned long long __c) { return __builtin_altivec_vrldmi(__a, __c, __b); } +#endif + +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rlmi(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + return __builtin_altivec_vrlqmi(__a, __c, __b); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rlmi(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + return __builtin_altivec_vrlqmi(__a, __c, __b); +} +#endif /* vec_rlnm */ +#ifdef __POWER9_VECTOR__ static __inline__ vector unsigned int __ATTRS_o_ai vec_rlnm(vector unsigned int __a, vector unsigned int __b, vector unsigned int __c) { @@ -7657,6 +7985,42 @@ vec_rlnm(vector unsigned long long __a, vector unsigned long long __b, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_rlnm(vector unsigned __int128 __a, vector unsigned __int128 __b, + vector unsigned __int128 __c) { + // Merge __b and __c using an appropriate shuffle. + vector unsigned char TmpB = (vector unsigned char)__b; + vector unsigned char TmpC = (vector unsigned char)__c; + vector unsigned char MaskAndShift = +#ifdef __LITTLE_ENDIAN__ + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0, + 1, -1, -1, -1, -1, -1); +#else + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1, + -1, -1, -1, -1, -1, -1, -1); +#endif + return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_rlnm(vector signed __int128 __a, vector signed __int128 __b, + vector signed __int128 __c) { + // Merge __b and __c using an appropriate shuffle. + vector unsigned char TmpB = (vector unsigned char)__b; + vector unsigned char TmpC = (vector unsigned char)__c; + vector unsigned char MaskAndShift = +#ifdef __LITTLE_ENDIAN__ + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, -1, -1, -1, 16, 0, + 1, -1, -1, -1, -1, -1); +#else + __builtin_shufflevector(TmpB, TmpC, -1, -1, -1, -1, -1, 31, 30, 15, -1, + -1, -1, -1, -1, -1, -1, -1); +#endif + return __builtin_altivec_vrlqnm(__a, (vector unsigned __int128) MaskAndShift); +} +#endif + /* vec_vrlb */ static __inline__ vector signed char __ATTRS_o_ai @@ -7771,6 +8135,18 @@ vec_vrsqrtefp(vector float __a) { return __builtin_altivec_vrsqrtefp(__a); } +/* vec_xvtsqrt */ + +#ifdef __VSX__ +static __inline__ int __ATTRS_o_ai vec_test_swsqrt(vector double __a) { + return __builtin_vsx_xvtsqrtdp(__a); +} + +static __inline__ int __ATTRS_o_ai vec_test_swsqrts(vector float __a) { + return __builtin_vsx_xvtsqrtsp(__a); +} +#endif + /* vec_sel */ #define __builtin_altivec_vsel_4si vec_sel @@ -7905,6 +8281,46 @@ vec_sel(vector double __a, vector double __b, vector unsigned long long __c) { ((vector long long)__b & (vector long long)__c); return (vector double)__res; } + +static __inline__ vector bool long long __ATTRS_o_ai +vec_sel(vector bool long long __a, vector bool long long __b, + vector bool long long __c) { + return (__a & ~__c) | (__b & __c); +} + +static __inline__ vector bool long long __ATTRS_o_ai +vec_sel(vector bool long long __a, vector bool long long __b, + vector unsigned long long __c) { + return (__a & ~(vector bool long long)__c) | + (__b & (vector bool long long)__c); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_sel(vector signed long long __a, vector signed long long __b, + vector bool long long __c) { + return (__a & ~(vector signed long long)__c) | + (__b & (vector signed long long)__c); +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_sel(vector signed long long __a, vector signed long long __b, + vector unsigned long long __c) { + return (__a & ~(vector signed long long)__c) | + (__b & (vector signed long long)__c); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_sel(vector unsigned long long __a, vector unsigned long long __b, + vector bool long long __c) { + return (__a & ~(vector unsigned long long)__c) | + (__b & (vector unsigned long long)__c); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_sel(vector unsigned long long __a, vector unsigned long long __b, + vector unsigned long long __c) { + return (__a & ~__c) | (__b & __c); +} #endif /* vec_vsel */ @@ -13900,6 +14316,18 @@ static __inline__ int __ATTRS_o_ai vec_all_eq(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_eq(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_LT, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_all_eq(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_LT, __a, __b); +} +#endif + /* vec_all_ge */ static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed char __a, @@ -14071,6 +14499,18 @@ static __inline__ int __ATTRS_o_ai vec_all_ge(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_ge(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_EQ, __b, __a); +} + +static __inline__ int __ATTRS_o_ai vec_all_ge(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_EQ, __b, __a); +} +#endif + /* vec_all_gt */ static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed char __a, @@ -14242,6 +14682,18 @@ static __inline__ int __ATTRS_o_ai vec_all_gt(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_gt(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_LT, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_all_gt(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_LT, __a, __b); +} +#endif + /* vec_all_in */ static __inline__ int __attribute__((__always_inline__)) @@ -14421,6 +14873,18 @@ static __inline__ int __ATTRS_o_ai vec_all_le(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_le(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_EQ, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_all_le(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_EQ, __a, __b); +} +#endif + /* vec_all_lt */ static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed char __a, @@ -14593,6 +15057,18 @@ static __inline__ int __ATTRS_o_ai vec_all_lt(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_lt(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_LT, __b, __a); +} + +static __inline__ int __ATTRS_o_ai vec_all_lt(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_LT, __b, __a); +} +#endif + /* vec_all_nan */ static __inline__ int __ATTRS_o_ai vec_all_nan(vector float __a) { @@ -14797,6 +15273,18 @@ static __inline__ int __ATTRS_o_ai vec_all_ne(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_all_ne(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_EQ, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_all_ne(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_EQ, __a, __b); +} +#endif + /* vec_all_nge */ static __inline__ int __ATTRS_o_ai vec_all_nge(vector float __a, @@ -15042,6 +15530,18 @@ static __inline__ int __ATTRS_o_ai vec_any_eq(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_any_eq(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_EQ_REV, __a, __b); +} +#endif + /* vec_any_ge */ static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed char __a, @@ -15221,6 +15721,18 @@ static __inline__ int __ATTRS_o_ai vec_any_ge(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_ge(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_LT_REV, __b, __a); +} + +static __inline__ int __ATTRS_o_ai vec_any_ge(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_LT_REV, __b, __a); +} +#endif + /* vec_any_gt */ static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed char __a, @@ -15400,6 +15912,18 @@ static __inline__ int __ATTRS_o_ai vec_any_gt(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_gt(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_EQ_REV, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_any_gt(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_EQ_REV, __a, __b); +} +#endif + /* vec_any_le */ static __inline__ int __ATTRS_o_ai vec_any_le(vector signed char __a, @@ -15579,6 +16103,18 @@ static __inline__ int __ATTRS_o_ai vec_any_le(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_le(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_LT_REV, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_any_le(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_LT_REV, __a, __b); +} +#endif + /* vec_any_lt */ static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed char __a, @@ -15758,6 +16294,18 @@ static __inline__ int __ATTRS_o_ai vec_any_lt(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_lt(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpgtsq_p(__CR6_EQ_REV, __b, __a); +} + +static __inline__ int __ATTRS_o_ai vec_any_lt(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpgtuq_p(__CR6_EQ_REV, __b, __a); +} +#endif + /* vec_any_nan */ static __inline__ int __attribute__((__always_inline__)) @@ -15953,6 +16501,18 @@ static __inline__ int __ATTRS_o_ai vec_any_ne(vector double __a, } #endif +#ifdef __POWER10_VECTOR__ +static __inline__ int __ATTRS_o_ai vec_any_ne(vector signed __int128 __a, + vector signed __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, __a, __b); +} + +static __inline__ int __ATTRS_o_ai vec_any_ne(vector unsigned __int128 __a, + vector unsigned __int128 __b) { + return __builtin_altivec_vcmpequq_p(__CR6_LT_REV, __a, __b); +} +#endif + /* vec_any_nge */ static __inline__ int __attribute__((__always_inline__)) @@ -16353,41 +16913,41 @@ typedef vector unsigned int unaligned_vec_uint __attribute__((aligned(1))); typedef vector float unaligned_vec_float __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed char vec_xl(signed long long __offset, - signed char *__ptr) { + const signed char *__ptr) { return *(unaligned_vec_schar *)(__ptr + __offset); } static inline __ATTRS_o_ai vector unsigned char -vec_xl(signed long long __offset, unsigned char *__ptr) { +vec_xl(signed long long __offset, const unsigned char *__ptr) { return *(unaligned_vec_uchar*)(__ptr + __offset); } static inline __ATTRS_o_ai vector signed short vec_xl(signed long long __offset, - signed short *__ptr) { + const signed short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sshort *)__addr; } static inline __ATTRS_o_ai vector unsigned short -vec_xl(signed long long __offset, unsigned short *__ptr) { +vec_xl(signed long long __offset, const unsigned short *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ushort *)__addr; } static inline __ATTRS_o_ai vector signed int vec_xl(signed long long __offset, - signed int *__ptr) { + const signed int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sint *)__addr; } static inline __ATTRS_o_ai vector unsigned int vec_xl(signed long long __offset, - unsigned int *__ptr) { + const unsigned int *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_uint *)__addr; } static inline __ATTRS_o_ai vector float vec_xl(signed long long __offset, - float *__ptr) { + const float *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_float *)__addr; } @@ -16398,19 +16958,19 @@ typedef vector unsigned long long unaligned_vec_ull __attribute__((aligned(1))); typedef vector double unaligned_vec_double __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed long long -vec_xl(signed long long __offset, signed long long *__ptr) { +vec_xl(signed long long __offset, const signed long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_sll *)__addr; } static inline __ATTRS_o_ai vector unsigned long long -vec_xl(signed long long __offset, unsigned long long *__ptr) { +vec_xl(signed long long __offset, const unsigned long long *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ull *)__addr; } static inline __ATTRS_o_ai vector double vec_xl(signed long long __offset, - double *__ptr) { + const double *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_double *)__addr; } @@ -16421,13 +16981,13 @@ typedef vector signed __int128 unaligned_vec_si128 __attribute__((aligned(1))); typedef vector unsigned __int128 unaligned_vec_ui128 __attribute__((aligned(1))); static inline __ATTRS_o_ai vector signed __int128 -vec_xl(signed long long __offset, signed __int128 *__ptr) { +vec_xl(signed long long __offset, const signed __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_si128 *)__addr; } static inline __ATTRS_o_ai vector unsigned __int128 -vec_xl(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl(signed long long __offset, const unsigned __int128 *__ptr) { signed char *__addr = (signed char *)__ptr + __offset; return *(unaligned_vec_ui128 *)__addr; } @@ -16437,71 +16997,71 @@ vec_xl(signed long long __offset, unsigned __int128 *__ptr) { #ifdef __LITTLE_ENDIAN__ static __inline__ vector signed char __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed char *__ptr) { +vec_xl_be(signed long long __offset, const signed char *__ptr) { vector signed char __vec = (vector signed char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector unsigned char __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned char *__ptr) { +vec_xl_be(signed long long __offset, const unsigned char *__ptr) { vector unsigned char __vec = (vector unsigned char)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); } static __inline__ vector signed short __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed short *__ptr) { +vec_xl_be(signed long long __offset, const signed short *__ptr) { vector signed short __vec = (vector signed short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector unsigned short __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned short *__ptr) { +vec_xl_be(signed long long __offset, const unsigned short *__ptr) { vector unsigned short __vec = (vector unsigned short)__builtin_vsx_lxvd2x_be(__offset, __ptr); return __builtin_shufflevector(__vec, __vec, 3, 2, 1, 0, 7, 6, 5, 4); } static __inline__ vector signed int __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed int *__ptr) { +vec_xl_be(signed long long __offset, const signed int *__ptr) { return (vector signed int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector unsigned int __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned int *__ptr) { +vec_xl_be(signed long long __offset, const unsigned int *__ptr) { return (vector unsigned int)__builtin_vsx_lxvw4x_be(__offset, __ptr); } static __inline__ vector float __ATTRS_o_ai -vec_xl_be(signed long long __offset, float *__ptr) { +vec_xl_be(signed long long __offset, const float *__ptr) { return (vector float)__builtin_vsx_lxvw4x_be(__offset, __ptr); } #ifdef __VSX__ static __inline__ vector signed long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed long long *__ptr) { +vec_xl_be(signed long long __offset, const signed long long *__ptr) { return (vector signed long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector unsigned long long __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned long long *__ptr) { +vec_xl_be(signed long long __offset, const unsigned long long *__ptr) { return (vector unsigned long long)__builtin_vsx_lxvd2x_be(__offset, __ptr); } static __inline__ vector double __ATTRS_o_ai -vec_xl_be(signed long long __offset, double *__ptr) { +vec_xl_be(signed long long __offset, const double *__ptr) { return (vector double)__builtin_vsx_lxvd2x_be(__offset, __ptr); } #endif #if defined(__POWER8_VECTOR__) && defined(__powerpc64__) static __inline__ vector signed __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, signed __int128 *__ptr) { +vec_xl_be(signed long long __offset, const signed __int128 *__ptr) { return vec_xl(__offset, __ptr); } static __inline__ vector unsigned __int128 __ATTRS_o_ai -vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { +vec_xl_be(signed long long __offset, const unsigned __int128 *__ptr) { return vec_xl(__offset, __ptr); } #endif @@ -16509,6 +17069,54 @@ vec_xl_be(signed long long __offset, unsigned __int128 *__ptr) { #define vec_xl_be vec_xl #endif +#if defined(__POWER10_VECTOR__) && defined(__VSX__) + +/* vect_xl_sext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, const signed char *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, const signed short *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, const signed int *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_sext(signed long long __offset, const signed long long *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +/* vec_xl_zext */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, const unsigned char *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, const unsigned short *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, const unsigned int *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_xl_zext(signed long long __offset, const unsigned long long *__pointer) { + return (vector unsigned __int128)*(__pointer + __offset); +} + +#endif + /* vec_xst */ static inline __ATTRS_o_ai void vec_xst(vector signed char __vec, @@ -16597,6 +17205,58 @@ static inline __ATTRS_o_ai void vec_xst(vector unsigned __int128 __vec, } #endif +/* vec_xst_trunc */ + +#if defined(__POWER10_VECTOR__) && defined(__VSX__) +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed char *__ptr) { + *(__ptr + __offset) = (signed char)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned char *__ptr) { + *(__ptr + __offset) = (unsigned char)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed short *__ptr) { + *(__ptr + __offset) = (signed short)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned short *__ptr) { + *(__ptr + __offset) = (unsigned short)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed int *__ptr) { + *(__ptr + __offset) = (signed int)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned int *__ptr) { + *(__ptr + __offset) = (unsigned int)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector signed __int128 __vec, + signed long long __offset, + signed long long *__ptr) { + *(__ptr + __offset) = (signed long long)__vec[0]; +} + +static inline __ATTRS_o_ai void vec_xst_trunc(vector unsigned __int128 __vec, + signed long long __offset, + unsigned long long *__ptr) { + *(__ptr + __offset) = (unsigned long long)__vec[0]; +} +#endif + /* vec_xst_be */ #ifdef __LITTLE_ENDIAN__ @@ -16763,6 +17423,100 @@ static vector signed char __ATTRS_o_ai vec_nabs(vector signed char __a) { } #ifdef __POWER10_VECTOR__ + +/* vec_extractm */ + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned char __a) { + return __builtin_altivec_vextractbm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned short __a) { + return __builtin_altivec_vextracthm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned int __a) { + return __builtin_altivec_vextractwm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned long long __a) { + return __builtin_altivec_vextractdm(__a); +} + +static __inline__ unsigned int __ATTRS_o_ai +vec_extractm(vector unsigned __int128 __a) { + return __builtin_altivec_vextractqm(__a); +} + +/* vec_expandm */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_expandm(vector unsigned char __a) { + return __builtin_altivec_vexpandbm(__a); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_expandm(vector unsigned short __a) { + return __builtin_altivec_vexpandhm(__a); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_expandm(vector unsigned int __a) { + return __builtin_altivec_vexpandwm(__a); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_expandm(vector unsigned long long __a) { + return __builtin_altivec_vexpanddm(__a); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_expandm(vector unsigned __int128 __a) { + return __builtin_altivec_vexpandqm(__a); +} + +/* vec_cntm */ + +#define vec_cntm(__a, __mp) \ + _Generic((__a), vector unsigned char \ + : __builtin_altivec_vcntmbb((__a), (unsigned int)(__mp)), \ + vector unsigned short \ + : __builtin_altivec_vcntmbh((__a), (unsigned int)(__mp)), \ + vector unsigned int \ + : __builtin_altivec_vcntmbw((__a), (unsigned int)(__mp)), \ + vector unsigned long long \ + : __builtin_altivec_vcntmbd((__a), (unsigned int)(__mp))) + +/* vec_gen[b|h|w|d|q]m */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_genbm(unsigned long long __bm) { + return __builtin_altivec_mtvsrbm(__bm); +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_genhm(unsigned long long __bm) { + return __builtin_altivec_mtvsrhm(__bm); +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_genwm(unsigned long long __bm) { + return __builtin_altivec_mtvsrwm(__bm); +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_gendm(unsigned long long __bm) { + return __builtin_altivec_mtvsrdm(__bm); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_genqm(unsigned long long __bm) { + return __builtin_altivec_mtvsrqm(__bm); +} + /* vec_pdep */ static __inline__ vector unsigned long long __ATTRS_o_ai @@ -16881,6 +17635,38 @@ vec_cnttzm(vector unsigned long long __a, vector unsigned long long __b) { return __builtin_altivec_vctzdm(__a, __b); } +/* vec_mod */ + +static __inline__ vector signed int __ATTRS_o_ai +vec_mod(vector signed int __a, vector signed int __b) { + return __a % __b; +} + +static __inline__ vector unsigned int __ATTRS_o_ai +vec_mod(vector unsigned int __a, vector unsigned int __b) { + return __a % __b; +} + +static __inline__ vector signed long long __ATTRS_o_ai +vec_mod(vector signed long long __a, vector signed long long __b) { + return __a % __b; +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_mod(vector unsigned long long __a, vector unsigned long long __b) { + return __a % __b; +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_mod(vector signed __int128 __a, vector signed __int128 __b) { + return __a % __b; +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_mod(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a % __b; +} + /* vec_sldbi */ #define vec_sldb(__a, __b, __c) __builtin_altivec_vsldbi(__a, __b, (__c & 0x7)) @@ -17027,6 +17813,92 @@ vec_inserth(vector unsigned int __a, vector unsigned int __b, #endif } +/* vec_extractl */ + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( + vector unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextdubvrx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextdubvlx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( + vector unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextduhvrx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextduhvlx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extractl( + vector unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextduwvrx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextduwvlx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extractl(vector unsigned long long __a, vector unsigned long long __b, + unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextddvrx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextddvlx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +/* vec_extracth */ + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( + vector unsigned char __a, vector unsigned char __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextdubvlx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextdubvrx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( + vector unsigned short __a, vector unsigned short __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextduhvlx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextduhvrx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai vec_extracth( + vector unsigned int __a, vector unsigned int __b, unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextduwvlx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextduwvrx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + +static __inline__ vector unsigned long long __ATTRS_o_ai +vec_extracth(vector unsigned long long __a, vector unsigned long long __b, + unsigned int __c) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vextddvlx(__a, __b, __c); +#else + vector unsigned long long __ret = __builtin_altivec_vextddvrx(__a, __b, __c); + return vec_sld(__ret, __ret, 8); +#endif +} + #ifdef __VSX__ /* vec_permx */ @@ -17095,6 +17967,14 @@ vec_blendv(vector double __a, vector double __b, return __builtin_vsx_xxblendvd(__a, __b, __c); } +/* vec_replace_elt */ + +#define vec_replace_elt __builtin_altivec_vec_replace_elt + +/* vec_replace_unaligned */ + +#define vec_replace_unaligned __builtin_altivec_vec_replace_unaligned + /* vec_splati */ #define vec_splati(__a) \ @@ -17161,6 +18041,197 @@ vec_test_lsbb_all_zeros(vector unsigned char __a) { return __builtin_vsx_xvtlsbb(__a, 0); } #endif /* __VSX__ */ + +/* vec_stril */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_stril(vector unsigned char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribr((vector signed char)__a); +#else + return __builtin_altivec_vstribl((vector signed char)__a); +#endif +} + +static __inline__ vector signed char __ATTRS_o_ai +vec_stril(vector signed char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribr(__a); +#else + return __builtin_altivec_vstribl(__a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_stril(vector unsigned short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihr((vector signed short)__a); +#else + return __builtin_altivec_vstrihl((vector signed short)__a); +#endif +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_stril(vector signed short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihr(__a); +#else + return __builtin_altivec_vstrihl(__a); +#endif +} + +/* vec_stril_p */ + +static __inline__ int __ATTRS_o_ai vec_stril_p(vector unsigned char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribr_p(__CR6_EQ, (vector signed char)__a); +#else + return __builtin_altivec_vstribl_p(__CR6_EQ, (vector signed char)__a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_stril_p(vector signed char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribr_p(__CR6_EQ, __a); +#else + return __builtin_altivec_vstribl_p(__CR6_EQ, __a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_stril_p(vector unsigned short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihr_p(__CR6_EQ, (vector signed short)__a); +#else + return __builtin_altivec_vstrihl_p(__CR6_EQ, (vector signed short)__a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_stril_p(vector signed short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihr_p(__CR6_EQ, __a); +#else + return __builtin_altivec_vstrihl_p(__CR6_EQ, __a); +#endif +} + +/* vec_strir */ + +static __inline__ vector unsigned char __ATTRS_o_ai +vec_strir(vector unsigned char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribl((vector signed char)__a); +#else + return __builtin_altivec_vstribr((vector signed char)__a); +#endif +} + +static __inline__ vector signed char __ATTRS_o_ai +vec_strir(vector signed char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribl(__a); +#else + return __builtin_altivec_vstribr(__a); +#endif +} + +static __inline__ vector unsigned short __ATTRS_o_ai +vec_strir(vector unsigned short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihl((vector signed short)__a); +#else + return __builtin_altivec_vstrihr((vector signed short)__a); +#endif +} + +static __inline__ vector signed short __ATTRS_o_ai +vec_strir(vector signed short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihl(__a); +#else + return __builtin_altivec_vstrihr(__a); +#endif +} + +/* vec_strir_p */ + +static __inline__ int __ATTRS_o_ai vec_strir_p(vector unsigned char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribl_p(__CR6_EQ, (vector signed char)__a); +#else + return __builtin_altivec_vstribr_p(__CR6_EQ, (vector signed char)__a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_strir_p(vector signed char __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstribl_p(__CR6_EQ, __a); +#else + return __builtin_altivec_vstribr_p(__CR6_EQ, __a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_strir_p(vector unsigned short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihl_p(__CR6_EQ, (vector signed short)__a); +#else + return __builtin_altivec_vstrihr_p(__CR6_EQ, (vector signed short)__a); +#endif +} + +static __inline__ int __ATTRS_o_ai vec_strir_p(vector signed short __a) { +#ifdef __LITTLE_ENDIAN__ + return __builtin_altivec_vstrihl_p(__CR6_EQ, __a); +#else + return __builtin_altivec_vstrihr_p(__CR6_EQ, __a); +#endif +} + +/* vs[l | r | ra] */ + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_sl(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * + __CHAR_BIT__)); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_sl(vector signed __int128 __a, vector unsigned __int128 __b) { + return __a << (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * + __CHAR_BIT__)); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_sr(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * + __CHAR_BIT__)); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_sr(vector signed __int128 __a, vector unsigned __int128 __b) { + return ( + vector signed __int128)(((vector unsigned __int128)__a) >> + (__b % + (vector unsigned __int128)(sizeof( + unsigned __int128) * + __CHAR_BIT__))); +} + +static __inline__ vector unsigned __int128 __ATTRS_o_ai +vec_sra(vector unsigned __int128 __a, vector unsigned __int128 __b) { + return ( + vector unsigned __int128)(((vector signed __int128)__a) >> + (__b % + (vector unsigned __int128)(sizeof( + unsigned __int128) * + __CHAR_BIT__))); +} + +static __inline__ vector signed __int128 __ATTRS_o_ai +vec_sra(vector signed __int128 __a, vector unsigned __int128 __b) { + return __a >> (__b % (vector unsigned __int128)(sizeof(unsigned __int128) * + __CHAR_BIT__)); +} + #endif /* __POWER10_VECTOR__ */ #undef __ATTRS_o_ai diff --git a/lib/include/amxintrin.h b/lib/include/amxintrin.h index 58254e21c8..823c7ca1f0 100644 --- a/lib/include/amxintrin.h +++ b/lib/include/amxintrin.h @@ -15,8 +15,8 @@ #define __AMXINTRIN_H #ifdef __x86_64__ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) +#define __DEFAULT_FN_ATTRS_TILE \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-tile"))) /// Load tile configuration from a 64-byte memory location specified by /// "mem_addr". The tile configuration includes the tile type palette, the @@ -31,9 +31,8 @@ /// /// \param __config /// A pointer to 512-bits configuration -static __inline__ void __DEFAULT_FN_ATTRS -_tile_loadconfig(const void *__config) -{ +static __inline__ void __DEFAULT_FN_ATTRS_TILE +_tile_loadconfig(const void *__config) { __builtin_ia32_tile_loadconfig(__config); } @@ -48,9 +47,8 @@ _tile_loadconfig(const void *__config) /// /// \param __config /// A pointer to 512-bits configuration -static __inline__ void __DEFAULT_FN_ATTRS -_tile_storeconfig(void *__config) -{ +static __inline__ void __DEFAULT_FN_ATTRS_TILE +_tile_storeconfig(void *__config) { __builtin_ia32_tile_storeconfig(__config); } @@ -60,9 +58,7 @@ _tile_storeconfig(void *__config) /// \headerfile /// /// This intrinsic corresponds to the TILERELEASE instruction. -static __inline__ void __DEFAULT_FN_ATTRS -_tile_release(void) -{ +static __inline__ void __DEFAULT_FN_ATTRS_TILE _tile_release(void) { __builtin_ia32_tilerelease(); } @@ -80,8 +76,9 @@ _tile_release(void) /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. -#define _tile_loadd(dst, base, stride) \ - __builtin_ia32_tileloadd64((dst), ((const void *)(base)), (__SIZE_TYPE__)(stride)) +#define _tile_loadd(dst, base, stride) \ + __builtin_ia32_tileloadd64((dst), ((const void *)(base)), \ + (__SIZE_TYPE__)(stride)) /// Load tile rows from memory specifieid by "base" address and "stride" into /// destination tile "dst" using the tile configuration previously configured @@ -99,8 +96,9 @@ _tile_release(void) /// A pointer to base address. /// \param stride /// The stride between the rows' data to be loaded in memory. -#define _tile_stream_loadd(dst, base, stride) \ - __builtin_ia32_tileloaddt164((dst), ((const void *)(base)), (__SIZE_TYPE__)(stride)) +#define _tile_stream_loadd(dst, base, stride) \ + __builtin_ia32_tileloaddt164((dst), ((const void *)(base)), \ + (__SIZE_TYPE__)(stride)) /// Store the tile specified by "src" to memory specifieid by "base" address and /// "stride" using the tile configuration previously configured via @@ -116,7 +114,7 @@ _tile_release(void) /// A pointer to base address. /// \param stride /// The stride between the rows' data to be stored in memory. -#define _tile_stored(dst, base, stride) \ +#define _tile_stored(dst, base, stride) \ __builtin_ia32_tilestored64((dst), ((void *)(base)), (__SIZE_TYPE__)(stride)) /// Zero the tile specified by "tdest". @@ -145,7 +143,8 @@ _tile_release(void) /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_dpbssd(dst, src0, src1) __builtin_ia32_tdpbssd((dst), (src0), (src1)) +#define _tile_dpbssd(dst, src0, src1) \ + __builtin_ia32_tdpbssd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of signed 8-bit integers in src0 with @@ -163,7 +162,8 @@ _tile_release(void) /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_dpbsud(dst, src0, src1) __builtin_ia32_tdpbsud((dst), (src0), (src1)) +#define _tile_dpbsud(dst, src0, src1) \ + __builtin_ia32_tdpbsud((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with @@ -181,7 +181,8 @@ _tile_release(void) /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_dpbusd(dst, src0, src1) __builtin_ia32_tdpbusd((dst), (src0), (src1)) +#define _tile_dpbusd(dst, src0, src1) \ + __builtin_ia32_tdpbusd((dst), (src0), (src1)) /// Compute dot-product of bytes in tiles with a source/destination accumulator. /// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in src0 with @@ -199,7 +200,8 @@ _tile_release(void) /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_dpbuud(dst, src0, src1) __builtin_ia32_tdpbuud((dst), (src0), (src1)) +#define _tile_dpbuud(dst, src0, src1) \ + __builtin_ia32_tdpbuud((dst), (src0), (src1)) /// Compute dot-product of BF16 (16-bit) floating-point pairs in tiles src0 and /// src1, accumulating the intermediate single-precision (32-bit) floating-point @@ -216,10 +218,61 @@ _tile_release(void) /// The 1st source tile. Max size is 1024 Bytes. /// \param src1 /// The 2nd source tile. Max size is 1024 Bytes. -#define _tile_dpbf16ps(dst, src0, src1) \ +#define _tile_dpbf16ps(dst, src0, src1) \ __builtin_ia32_tdpbf16ps((dst), (src0), (src1)) -#undef __DEFAULT_FN_ATTRS +#define __DEFAULT_FN_ATTRS_INT8 \ + __attribute__((__always_inline__, __nodebug__, __target__("amx-int8"))) + +typedef int _tile1024i __attribute__((__vector_size__(1024), __aligned__(64))); +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +_tile_loadd_internal(unsigned short m, unsigned short n, const void *base, + __SIZE_TYPE__ stride) { + return __builtin_ia32_tileloadd64_internal(m, n, base, + (__SIZE_TYPE__)(stride)); +} + +static __inline__ _tile1024i __DEFAULT_FN_ATTRS_INT8 +_tile_dpbssd_internal(unsigned short m, unsigned short n, unsigned short k, + _tile1024i dst, _tile1024i src1, _tile1024i src2) { + return __builtin_ia32_tdpbssd_internal(m, n, k, dst, src1, src2); +} + +static __inline__ void __DEFAULT_FN_ATTRS_INT8 +_tile_stored_internal(unsigned short m, unsigned short n, void *base, + __SIZE_TYPE__ stride, _tile1024i tile) { + return __builtin_ia32_tilestored64_internal(m, n, base, + (__SIZE_TYPE__)(stride), tile); +} + +typedef struct __tile1024i_str { + const unsigned short row; + const unsigned short col; + _tile1024i tile; +} __tile1024i; + +__DEFAULT_FN_ATTRS_TILE +static void __tile_loadd(__tile1024i *dst, const void *base, + __SIZE_TYPE__ stride) { + dst->tile = _tile_loadd_internal(dst->row, dst->col, base, stride); +} + +__DEFAULT_FN_ATTRS_INT8 +static void __tile_dpbssd(__tile1024i *dst, __tile1024i src1, + __tile1024i src2) { + dst->tile = _tile_dpbssd_internal(src1.row, src2.col, src1.col, dst->tile, + src1.tile, src2.tile); +} + +__DEFAULT_FN_ATTRS_TILE +static void __tile_stored(void *base, __SIZE_TYPE__ stride, __tile1024i src) { + _tile_stored_internal(src.row, src.col, base, stride, src.tile); +} + +__DEFAULT_FN_ATTRS_TILE +static void __tile_zero(__tile1024i *dst) { + dst->tile = __builtin_ia32_tilezero_internal(dst->row, dst->col); +} #endif /* __x86_64__ */ #endif /* __AMXINTRIN_H */ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index de568b4ff9..c156d89c1f 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -639,6 +639,32 @@ __jcvt(double __a) { } #endif +/* Armv8.7-A load/store 64-byte intrinsics */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_LS64) +typedef struct { + uint64_t val[8]; +} data512_t; + +static __inline__ data512_t __attribute__((__always_inline__, __nodebug__)) +__arm_ld64b(const void *__addr) { + data512_t __value; + __builtin_arm_ld64b(__addr, __value.val); + return __value; +} +static __inline__ void __attribute__((__always_inline__, __nodebug__)) +__arm_st64b(void *__addr, data512_t __value) { + __builtin_arm_st64b(__addr, __value.val); +} +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__arm_st64bv(void *__addr, data512_t __value) { + return __builtin_arm_st64bv(__addr, __value.val); +} +static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__)) +__arm_st64bv0(void *__addr, data512_t __value) { + return __builtin_arm_st64bv0(__addr, __value.val); +} +#endif + /* 10.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index da1e17cc00..dbb65b8a49 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -40429,6 +40429,638 @@ __ai float32x4_t vcaddq_rot90_f32(float32x4_t __p0, float32x4_t __p1) { } #endif +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#else +__ai float32x4_t vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float32x4_t __noswap_vcmlaq_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#else +__ai float32x2_t vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float32x2_t __noswap_vcmla_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_lane_f32(__p0_169, __p1_169, __p2_169, __p3_169) __extension__ ({ \ + float32x2_t __s0_169 = __p0_169; \ + float32x2_t __s1_169 = __p1_169; \ + float32x2_t __s2_169 = __p2_169; \ + float32x2_t __ret_169; \ +float32x2_t __reint_169 = __s2_169; \ +uint64x1_t __reint1_169 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_169, __p3_169)}; \ + __ret_169 = vcmla_f32(__s0_169, __s1_169, *(float32x2_t *) &__reint1_169); \ + __ret_169; \ +}) +#else +#define vcmla_lane_f32(__p0_170, __p1_170, __p2_170, __p3_170) __extension__ ({ \ + float32x2_t __s0_170 = __p0_170; \ + float32x2_t __s1_170 = __p1_170; \ + float32x2_t __s2_170 = __p2_170; \ + float32x2_t __rev0_170; __rev0_170 = __builtin_shufflevector(__s0_170, __s0_170, 1, 0); \ + float32x2_t __rev1_170; __rev1_170 = __builtin_shufflevector(__s1_170, __s1_170, 1, 0); \ + float32x2_t __rev2_170; __rev2_170 = __builtin_shufflevector(__s2_170, __s2_170, 1, 0); \ + float32x2_t __ret_170; \ +float32x2_t __reint_170 = __rev2_170; \ +uint64x1_t __reint1_170 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_170, __p3_170)}; \ + __ret_170 = __noswap_vcmla_f32(__rev0_170, __rev1_170, *(float32x2_t *) &__reint1_170); \ + __ret_170 = __builtin_shufflevector(__ret_170, __ret_170, 1, 0); \ + __ret_170; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_lane_f32(__p0_171, __p1_171, __p2_171, __p3_171) __extension__ ({ \ + float32x4_t __s0_171 = __p0_171; \ + float32x4_t __s1_171 = __p1_171; \ + float32x2_t __s2_171 = __p2_171; \ + float32x4_t __ret_171; \ +float32x2_t __reint_171 = __s2_171; \ +uint64x2_t __reint1_171 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_171, __p3_171), vget_lane_u64(*(uint64x1_t *) &__reint_171, __p3_171)}; \ + __ret_171 = vcmlaq_f32(__s0_171, __s1_171, *(float32x4_t *) &__reint1_171); \ + __ret_171; \ +}) +#else +#define vcmlaq_lane_f32(__p0_172, __p1_172, __p2_172, __p3_172) __extension__ ({ \ + float32x4_t __s0_172 = __p0_172; \ + float32x4_t __s1_172 = __p1_172; \ + float32x2_t __s2_172 = __p2_172; \ + float32x4_t __rev0_172; __rev0_172 = __builtin_shufflevector(__s0_172, __s0_172, 3, 2, 1, 0); \ + float32x4_t __rev1_172; __rev1_172 = __builtin_shufflevector(__s1_172, __s1_172, 3, 2, 1, 0); \ + float32x2_t __rev2_172; __rev2_172 = __builtin_shufflevector(__s2_172, __s2_172, 1, 0); \ + float32x4_t __ret_172; \ +float32x2_t __reint_172 = __rev2_172; \ +uint64x2_t __reint1_172 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_172, __p3_172), vget_lane_u64(*(uint64x1_t *) &__reint_172, __p3_172)}; \ + __ret_172 = __noswap_vcmlaq_f32(__rev0_172, __rev1_172, *(float32x4_t *) &__reint1_172); \ + __ret_172 = __builtin_shufflevector(__ret_172, __ret_172, 3, 2, 1, 0); \ + __ret_172; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_laneq_f32(__p0_173, __p1_173, __p2_173, __p3_173) __extension__ ({ \ + float32x2_t __s0_173 = __p0_173; \ + float32x2_t __s1_173 = __p1_173; \ + float32x4_t __s2_173 = __p2_173; \ + float32x2_t __ret_173; \ +float32x4_t __reint_173 = __s2_173; \ +uint64x1_t __reint1_173 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_173, __p3_173)}; \ + __ret_173 = vcmla_f32(__s0_173, __s1_173, *(float32x2_t *) &__reint1_173); \ + __ret_173; \ +}) +#else +#define vcmla_laneq_f32(__p0_174, __p1_174, __p2_174, __p3_174) __extension__ ({ \ + float32x2_t __s0_174 = __p0_174; \ + float32x2_t __s1_174 = __p1_174; \ + float32x4_t __s2_174 = __p2_174; \ + float32x2_t __rev0_174; __rev0_174 = __builtin_shufflevector(__s0_174, __s0_174, 1, 0); \ + float32x2_t __rev1_174; __rev1_174 = __builtin_shufflevector(__s1_174, __s1_174, 1, 0); \ + float32x4_t __rev2_174; __rev2_174 = __builtin_shufflevector(__s2_174, __s2_174, 3, 2, 1, 0); \ + float32x2_t __ret_174; \ +float32x4_t __reint_174 = __rev2_174; \ +uint64x1_t __reint1_174 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_174, __p3_174)}; \ + __ret_174 = __noswap_vcmla_f32(__rev0_174, __rev1_174, *(float32x2_t *) &__reint1_174); \ + __ret_174 = __builtin_shufflevector(__ret_174, __ret_174, 1, 0); \ + __ret_174; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_laneq_f32(__p0_175, __p1_175, __p2_175, __p3_175) __extension__ ({ \ + float32x4_t __s0_175 = __p0_175; \ + float32x4_t __s1_175 = __p1_175; \ + float32x4_t __s2_175 = __p2_175; \ + float32x4_t __ret_175; \ +float32x4_t __reint_175 = __s2_175; \ +uint64x2_t __reint1_175 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_175, __p3_175), vgetq_lane_u64(*(uint64x2_t *) &__reint_175, __p3_175)}; \ + __ret_175 = vcmlaq_f32(__s0_175, __s1_175, *(float32x4_t *) &__reint1_175); \ + __ret_175; \ +}) +#else +#define vcmlaq_laneq_f32(__p0_176, __p1_176, __p2_176, __p3_176) __extension__ ({ \ + float32x4_t __s0_176 = __p0_176; \ + float32x4_t __s1_176 = __p1_176; \ + float32x4_t __s2_176 = __p2_176; \ + float32x4_t __rev0_176; __rev0_176 = __builtin_shufflevector(__s0_176, __s0_176, 3, 2, 1, 0); \ + float32x4_t __rev1_176; __rev1_176 = __builtin_shufflevector(__s1_176, __s1_176, 3, 2, 1, 0); \ + float32x4_t __rev2_176; __rev2_176 = __builtin_shufflevector(__s2_176, __s2_176, 3, 2, 1, 0); \ + float32x4_t __ret_176; \ +float32x4_t __reint_176 = __rev2_176; \ +uint64x2_t __reint1_176 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_176, __p3_176), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_176, __p3_176)}; \ + __ret_176 = __noswap_vcmlaq_f32(__rev0_176, __rev1_176, *(float32x4_t *) &__reint1_176); \ + __ret_176 = __builtin_shufflevector(__ret_176, __ret_176, 3, 2, 1, 0); \ + __ret_176; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#else +__ai float32x4_t vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float32x4_t __noswap_vcmlaq_rot180_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#else +__ai float32x2_t vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float32x2_t __noswap_vcmla_rot180_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot180_lane_f32(__p0_177, __p1_177, __p2_177, __p3_177) __extension__ ({ \ + float32x2_t __s0_177 = __p0_177; \ + float32x2_t __s1_177 = __p1_177; \ + float32x2_t __s2_177 = __p2_177; \ + float32x2_t __ret_177; \ +float32x2_t __reint_177 = __s2_177; \ +uint64x1_t __reint1_177 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_177, __p3_177)}; \ + __ret_177 = vcmla_rot180_f32(__s0_177, __s1_177, *(float32x2_t *) &__reint1_177); \ + __ret_177; \ +}) +#else +#define vcmla_rot180_lane_f32(__p0_178, __p1_178, __p2_178, __p3_178) __extension__ ({ \ + float32x2_t __s0_178 = __p0_178; \ + float32x2_t __s1_178 = __p1_178; \ + float32x2_t __s2_178 = __p2_178; \ + float32x2_t __rev0_178; __rev0_178 = __builtin_shufflevector(__s0_178, __s0_178, 1, 0); \ + float32x2_t __rev1_178; __rev1_178 = __builtin_shufflevector(__s1_178, __s1_178, 1, 0); \ + float32x2_t __rev2_178; __rev2_178 = __builtin_shufflevector(__s2_178, __s2_178, 1, 0); \ + float32x2_t __ret_178; \ +float32x2_t __reint_178 = __rev2_178; \ +uint64x1_t __reint1_178 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_178, __p3_178)}; \ + __ret_178 = __noswap_vcmla_rot180_f32(__rev0_178, __rev1_178, *(float32x2_t *) &__reint1_178); \ + __ret_178 = __builtin_shufflevector(__ret_178, __ret_178, 1, 0); \ + __ret_178; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_lane_f32(__p0_179, __p1_179, __p2_179, __p3_179) __extension__ ({ \ + float32x4_t __s0_179 = __p0_179; \ + float32x4_t __s1_179 = __p1_179; \ + float32x2_t __s2_179 = __p2_179; \ + float32x4_t __ret_179; \ +float32x2_t __reint_179 = __s2_179; \ +uint64x2_t __reint1_179 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_179, __p3_179), vget_lane_u64(*(uint64x1_t *) &__reint_179, __p3_179)}; \ + __ret_179 = vcmlaq_rot180_f32(__s0_179, __s1_179, *(float32x4_t *) &__reint1_179); \ + __ret_179; \ +}) +#else +#define vcmlaq_rot180_lane_f32(__p0_180, __p1_180, __p2_180, __p3_180) __extension__ ({ \ + float32x4_t __s0_180 = __p0_180; \ + float32x4_t __s1_180 = __p1_180; \ + float32x2_t __s2_180 = __p2_180; \ + float32x4_t __rev0_180; __rev0_180 = __builtin_shufflevector(__s0_180, __s0_180, 3, 2, 1, 0); \ + float32x4_t __rev1_180; __rev1_180 = __builtin_shufflevector(__s1_180, __s1_180, 3, 2, 1, 0); \ + float32x2_t __rev2_180; __rev2_180 = __builtin_shufflevector(__s2_180, __s2_180, 1, 0); \ + float32x4_t __ret_180; \ +float32x2_t __reint_180 = __rev2_180; \ +uint64x2_t __reint1_180 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_180, __p3_180), vget_lane_u64(*(uint64x1_t *) &__reint_180, __p3_180)}; \ + __ret_180 = __noswap_vcmlaq_rot180_f32(__rev0_180, __rev1_180, *(float32x4_t *) &__reint1_180); \ + __ret_180 = __builtin_shufflevector(__ret_180, __ret_180, 3, 2, 1, 0); \ + __ret_180; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot180_laneq_f32(__p0_181, __p1_181, __p2_181, __p3_181) __extension__ ({ \ + float32x2_t __s0_181 = __p0_181; \ + float32x2_t __s1_181 = __p1_181; \ + float32x4_t __s2_181 = __p2_181; \ + float32x2_t __ret_181; \ +float32x4_t __reint_181 = __s2_181; \ +uint64x1_t __reint1_181 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_181, __p3_181)}; \ + __ret_181 = vcmla_rot180_f32(__s0_181, __s1_181, *(float32x2_t *) &__reint1_181); \ + __ret_181; \ +}) +#else +#define vcmla_rot180_laneq_f32(__p0_182, __p1_182, __p2_182, __p3_182) __extension__ ({ \ + float32x2_t __s0_182 = __p0_182; \ + float32x2_t __s1_182 = __p1_182; \ + float32x4_t __s2_182 = __p2_182; \ + float32x2_t __rev0_182; __rev0_182 = __builtin_shufflevector(__s0_182, __s0_182, 1, 0); \ + float32x2_t __rev1_182; __rev1_182 = __builtin_shufflevector(__s1_182, __s1_182, 1, 0); \ + float32x4_t __rev2_182; __rev2_182 = __builtin_shufflevector(__s2_182, __s2_182, 3, 2, 1, 0); \ + float32x2_t __ret_182; \ +float32x4_t __reint_182 = __rev2_182; \ +uint64x1_t __reint1_182 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_182, __p3_182)}; \ + __ret_182 = __noswap_vcmla_rot180_f32(__rev0_182, __rev1_182, *(float32x2_t *) &__reint1_182); \ + __ret_182 = __builtin_shufflevector(__ret_182, __ret_182, 1, 0); \ + __ret_182; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_laneq_f32(__p0_183, __p1_183, __p2_183, __p3_183) __extension__ ({ \ + float32x4_t __s0_183 = __p0_183; \ + float32x4_t __s1_183 = __p1_183; \ + float32x4_t __s2_183 = __p2_183; \ + float32x4_t __ret_183; \ +float32x4_t __reint_183 = __s2_183; \ +uint64x2_t __reint1_183 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_183, __p3_183), vgetq_lane_u64(*(uint64x2_t *) &__reint_183, __p3_183)}; \ + __ret_183 = vcmlaq_rot180_f32(__s0_183, __s1_183, *(float32x4_t *) &__reint1_183); \ + __ret_183; \ +}) +#else +#define vcmlaq_rot180_laneq_f32(__p0_184, __p1_184, __p2_184, __p3_184) __extension__ ({ \ + float32x4_t __s0_184 = __p0_184; \ + float32x4_t __s1_184 = __p1_184; \ + float32x4_t __s2_184 = __p2_184; \ + float32x4_t __rev0_184; __rev0_184 = __builtin_shufflevector(__s0_184, __s0_184, 3, 2, 1, 0); \ + float32x4_t __rev1_184; __rev1_184 = __builtin_shufflevector(__s1_184, __s1_184, 3, 2, 1, 0); \ + float32x4_t __rev2_184; __rev2_184 = __builtin_shufflevector(__s2_184, __s2_184, 3, 2, 1, 0); \ + float32x4_t __ret_184; \ +float32x4_t __reint_184 = __rev2_184; \ +uint64x2_t __reint1_184 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_184, __p3_184), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_184, __p3_184)}; \ + __ret_184 = __noswap_vcmlaq_rot180_f32(__rev0_184, __rev1_184, *(float32x4_t *) &__reint1_184); \ + __ret_184 = __builtin_shufflevector(__ret_184, __ret_184, 3, 2, 1, 0); \ + __ret_184; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#else +__ai float32x4_t vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float32x4_t __noswap_vcmlaq_rot270_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#else +__ai float32x2_t vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float32x2_t __noswap_vcmla_rot270_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot270_lane_f32(__p0_185, __p1_185, __p2_185, __p3_185) __extension__ ({ \ + float32x2_t __s0_185 = __p0_185; \ + float32x2_t __s1_185 = __p1_185; \ + float32x2_t __s2_185 = __p2_185; \ + float32x2_t __ret_185; \ +float32x2_t __reint_185 = __s2_185; \ +uint64x1_t __reint1_185 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_185, __p3_185)}; \ + __ret_185 = vcmla_rot270_f32(__s0_185, __s1_185, *(float32x2_t *) &__reint1_185); \ + __ret_185; \ +}) +#else +#define vcmla_rot270_lane_f32(__p0_186, __p1_186, __p2_186, __p3_186) __extension__ ({ \ + float32x2_t __s0_186 = __p0_186; \ + float32x2_t __s1_186 = __p1_186; \ + float32x2_t __s2_186 = __p2_186; \ + float32x2_t __rev0_186; __rev0_186 = __builtin_shufflevector(__s0_186, __s0_186, 1, 0); \ + float32x2_t __rev1_186; __rev1_186 = __builtin_shufflevector(__s1_186, __s1_186, 1, 0); \ + float32x2_t __rev2_186; __rev2_186 = __builtin_shufflevector(__s2_186, __s2_186, 1, 0); \ + float32x2_t __ret_186; \ +float32x2_t __reint_186 = __rev2_186; \ +uint64x1_t __reint1_186 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_186, __p3_186)}; \ + __ret_186 = __noswap_vcmla_rot270_f32(__rev0_186, __rev1_186, *(float32x2_t *) &__reint1_186); \ + __ret_186 = __builtin_shufflevector(__ret_186, __ret_186, 1, 0); \ + __ret_186; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_lane_f32(__p0_187, __p1_187, __p2_187, __p3_187) __extension__ ({ \ + float32x4_t __s0_187 = __p0_187; \ + float32x4_t __s1_187 = __p1_187; \ + float32x2_t __s2_187 = __p2_187; \ + float32x4_t __ret_187; \ +float32x2_t __reint_187 = __s2_187; \ +uint64x2_t __reint1_187 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_187, __p3_187), vget_lane_u64(*(uint64x1_t *) &__reint_187, __p3_187)}; \ + __ret_187 = vcmlaq_rot270_f32(__s0_187, __s1_187, *(float32x4_t *) &__reint1_187); \ + __ret_187; \ +}) +#else +#define vcmlaq_rot270_lane_f32(__p0_188, __p1_188, __p2_188, __p3_188) __extension__ ({ \ + float32x4_t __s0_188 = __p0_188; \ + float32x4_t __s1_188 = __p1_188; \ + float32x2_t __s2_188 = __p2_188; \ + float32x4_t __rev0_188; __rev0_188 = __builtin_shufflevector(__s0_188, __s0_188, 3, 2, 1, 0); \ + float32x4_t __rev1_188; __rev1_188 = __builtin_shufflevector(__s1_188, __s1_188, 3, 2, 1, 0); \ + float32x2_t __rev2_188; __rev2_188 = __builtin_shufflevector(__s2_188, __s2_188, 1, 0); \ + float32x4_t __ret_188; \ +float32x2_t __reint_188 = __rev2_188; \ +uint64x2_t __reint1_188 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_188, __p3_188), vget_lane_u64(*(uint64x1_t *) &__reint_188, __p3_188)}; \ + __ret_188 = __noswap_vcmlaq_rot270_f32(__rev0_188, __rev1_188, *(float32x4_t *) &__reint1_188); \ + __ret_188 = __builtin_shufflevector(__ret_188, __ret_188, 3, 2, 1, 0); \ + __ret_188; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot270_laneq_f32(__p0_189, __p1_189, __p2_189, __p3_189) __extension__ ({ \ + float32x2_t __s0_189 = __p0_189; \ + float32x2_t __s1_189 = __p1_189; \ + float32x4_t __s2_189 = __p2_189; \ + float32x2_t __ret_189; \ +float32x4_t __reint_189 = __s2_189; \ +uint64x1_t __reint1_189 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_189, __p3_189)}; \ + __ret_189 = vcmla_rot270_f32(__s0_189, __s1_189, *(float32x2_t *) &__reint1_189); \ + __ret_189; \ +}) +#else +#define vcmla_rot270_laneq_f32(__p0_190, __p1_190, __p2_190, __p3_190) __extension__ ({ \ + float32x2_t __s0_190 = __p0_190; \ + float32x2_t __s1_190 = __p1_190; \ + float32x4_t __s2_190 = __p2_190; \ + float32x2_t __rev0_190; __rev0_190 = __builtin_shufflevector(__s0_190, __s0_190, 1, 0); \ + float32x2_t __rev1_190; __rev1_190 = __builtin_shufflevector(__s1_190, __s1_190, 1, 0); \ + float32x4_t __rev2_190; __rev2_190 = __builtin_shufflevector(__s2_190, __s2_190, 3, 2, 1, 0); \ + float32x2_t __ret_190; \ +float32x4_t __reint_190 = __rev2_190; \ +uint64x1_t __reint1_190 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_190, __p3_190)}; \ + __ret_190 = __noswap_vcmla_rot270_f32(__rev0_190, __rev1_190, *(float32x2_t *) &__reint1_190); \ + __ret_190 = __builtin_shufflevector(__ret_190, __ret_190, 1, 0); \ + __ret_190; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_laneq_f32(__p0_191, __p1_191, __p2_191, __p3_191) __extension__ ({ \ + float32x4_t __s0_191 = __p0_191; \ + float32x4_t __s1_191 = __p1_191; \ + float32x4_t __s2_191 = __p2_191; \ + float32x4_t __ret_191; \ +float32x4_t __reint_191 = __s2_191; \ +uint64x2_t __reint1_191 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_191, __p3_191), vgetq_lane_u64(*(uint64x2_t *) &__reint_191, __p3_191)}; \ + __ret_191 = vcmlaq_rot270_f32(__s0_191, __s1_191, *(float32x4_t *) &__reint1_191); \ + __ret_191; \ +}) +#else +#define vcmlaq_rot270_laneq_f32(__p0_192, __p1_192, __p2_192, __p3_192) __extension__ ({ \ + float32x4_t __s0_192 = __p0_192; \ + float32x4_t __s1_192 = __p1_192; \ + float32x4_t __s2_192 = __p2_192; \ + float32x4_t __rev0_192; __rev0_192 = __builtin_shufflevector(__s0_192, __s0_192, 3, 2, 1, 0); \ + float32x4_t __rev1_192; __rev1_192 = __builtin_shufflevector(__s1_192, __s1_192, 3, 2, 1, 0); \ + float32x4_t __rev2_192; __rev2_192 = __builtin_shufflevector(__s2_192, __s2_192, 3, 2, 1, 0); \ + float32x4_t __ret_192; \ +float32x4_t __reint_192 = __rev2_192; \ +uint64x2_t __reint1_192 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_192, __p3_192), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_192, __p3_192)}; \ + __ret_192 = __noswap_vcmlaq_rot270_f32(__rev0_192, __rev1_192, *(float32x4_t *) &__reint1_192); \ + __ret_192 = __builtin_shufflevector(__ret_192, __ret_192, 3, 2, 1, 0); \ + __ret_192; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#else +__ai float32x4_t vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float32x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 41); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float32x4_t __noswap_vcmlaq_rot90_f32(float32x4_t __p0, float32x4_t __p1, float32x4_t __p2) { + float32x4_t __ret; + __ret = (float32x4_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#else +__ai float32x2_t vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float32x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 9); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float32x2_t __noswap_vcmla_rot90_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) { + float32x2_t __ret; + __ret = (float32x2_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot90_lane_f32(__p0_193, __p1_193, __p2_193, __p3_193) __extension__ ({ \ + float32x2_t __s0_193 = __p0_193; \ + float32x2_t __s1_193 = __p1_193; \ + float32x2_t __s2_193 = __p2_193; \ + float32x2_t __ret_193; \ +float32x2_t __reint_193 = __s2_193; \ +uint64x1_t __reint1_193 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_193, __p3_193)}; \ + __ret_193 = vcmla_rot90_f32(__s0_193, __s1_193, *(float32x2_t *) &__reint1_193); \ + __ret_193; \ +}) +#else +#define vcmla_rot90_lane_f32(__p0_194, __p1_194, __p2_194, __p3_194) __extension__ ({ \ + float32x2_t __s0_194 = __p0_194; \ + float32x2_t __s1_194 = __p1_194; \ + float32x2_t __s2_194 = __p2_194; \ + float32x2_t __rev0_194; __rev0_194 = __builtin_shufflevector(__s0_194, __s0_194, 1, 0); \ + float32x2_t __rev1_194; __rev1_194 = __builtin_shufflevector(__s1_194, __s1_194, 1, 0); \ + float32x2_t __rev2_194; __rev2_194 = __builtin_shufflevector(__s2_194, __s2_194, 1, 0); \ + float32x2_t __ret_194; \ +float32x2_t __reint_194 = __rev2_194; \ +uint64x1_t __reint1_194 = (uint64x1_t) {vget_lane_u64(*(uint64x1_t *) &__reint_194, __p3_194)}; \ + __ret_194 = __noswap_vcmla_rot90_f32(__rev0_194, __rev1_194, *(float32x2_t *) &__reint1_194); \ + __ret_194 = __builtin_shufflevector(__ret_194, __ret_194, 1, 0); \ + __ret_194; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_lane_f32(__p0_195, __p1_195, __p2_195, __p3_195) __extension__ ({ \ + float32x4_t __s0_195 = __p0_195; \ + float32x4_t __s1_195 = __p1_195; \ + float32x2_t __s2_195 = __p2_195; \ + float32x4_t __ret_195; \ +float32x2_t __reint_195 = __s2_195; \ +uint64x2_t __reint1_195 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_195, __p3_195), vget_lane_u64(*(uint64x1_t *) &__reint_195, __p3_195)}; \ + __ret_195 = vcmlaq_rot90_f32(__s0_195, __s1_195, *(float32x4_t *) &__reint1_195); \ + __ret_195; \ +}) +#else +#define vcmlaq_rot90_lane_f32(__p0_196, __p1_196, __p2_196, __p3_196) __extension__ ({ \ + float32x4_t __s0_196 = __p0_196; \ + float32x4_t __s1_196 = __p1_196; \ + float32x2_t __s2_196 = __p2_196; \ + float32x4_t __rev0_196; __rev0_196 = __builtin_shufflevector(__s0_196, __s0_196, 3, 2, 1, 0); \ + float32x4_t __rev1_196; __rev1_196 = __builtin_shufflevector(__s1_196, __s1_196, 3, 2, 1, 0); \ + float32x2_t __rev2_196; __rev2_196 = __builtin_shufflevector(__s2_196, __s2_196, 1, 0); \ + float32x4_t __ret_196; \ +float32x2_t __reint_196 = __rev2_196; \ +uint64x2_t __reint1_196 = (uint64x2_t) {vget_lane_u64(*(uint64x1_t *) &__reint_196, __p3_196), vget_lane_u64(*(uint64x1_t *) &__reint_196, __p3_196)}; \ + __ret_196 = __noswap_vcmlaq_rot90_f32(__rev0_196, __rev1_196, *(float32x4_t *) &__reint1_196); \ + __ret_196 = __builtin_shufflevector(__ret_196, __ret_196, 3, 2, 1, 0); \ + __ret_196; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot90_laneq_f32(__p0_197, __p1_197, __p2_197, __p3_197) __extension__ ({ \ + float32x2_t __s0_197 = __p0_197; \ + float32x2_t __s1_197 = __p1_197; \ + float32x4_t __s2_197 = __p2_197; \ + float32x2_t __ret_197; \ +float32x4_t __reint_197 = __s2_197; \ +uint64x1_t __reint1_197 = (uint64x1_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_197, __p3_197)}; \ + __ret_197 = vcmla_rot90_f32(__s0_197, __s1_197, *(float32x2_t *) &__reint1_197); \ + __ret_197; \ +}) +#else +#define vcmla_rot90_laneq_f32(__p0_198, __p1_198, __p2_198, __p3_198) __extension__ ({ \ + float32x2_t __s0_198 = __p0_198; \ + float32x2_t __s1_198 = __p1_198; \ + float32x4_t __s2_198 = __p2_198; \ + float32x2_t __rev0_198; __rev0_198 = __builtin_shufflevector(__s0_198, __s0_198, 1, 0); \ + float32x2_t __rev1_198; __rev1_198 = __builtin_shufflevector(__s1_198, __s1_198, 1, 0); \ + float32x4_t __rev2_198; __rev2_198 = __builtin_shufflevector(__s2_198, __s2_198, 3, 2, 1, 0); \ + float32x2_t __ret_198; \ +float32x4_t __reint_198 = __rev2_198; \ +uint64x1_t __reint1_198 = (uint64x1_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_198, __p3_198)}; \ + __ret_198 = __noswap_vcmla_rot90_f32(__rev0_198, __rev1_198, *(float32x2_t *) &__reint1_198); \ + __ret_198 = __builtin_shufflevector(__ret_198, __ret_198, 1, 0); \ + __ret_198; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_laneq_f32(__p0_199, __p1_199, __p2_199, __p3_199) __extension__ ({ \ + float32x4_t __s0_199 = __p0_199; \ + float32x4_t __s1_199 = __p1_199; \ + float32x4_t __s2_199 = __p2_199; \ + float32x4_t __ret_199; \ +float32x4_t __reint_199 = __s2_199; \ +uint64x2_t __reint1_199 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_199, __p3_199), vgetq_lane_u64(*(uint64x2_t *) &__reint_199, __p3_199)}; \ + __ret_199 = vcmlaq_rot90_f32(__s0_199, __s1_199, *(float32x4_t *) &__reint1_199); \ + __ret_199; \ +}) +#else +#define vcmlaq_rot90_laneq_f32(__p0_200, __p1_200, __p2_200, __p3_200) __extension__ ({ \ + float32x4_t __s0_200 = __p0_200; \ + float32x4_t __s1_200 = __p1_200; \ + float32x4_t __s2_200 = __p2_200; \ + float32x4_t __rev0_200; __rev0_200 = __builtin_shufflevector(__s0_200, __s0_200, 3, 2, 1, 0); \ + float32x4_t __rev1_200; __rev1_200 = __builtin_shufflevector(__s1_200, __s1_200, 3, 2, 1, 0); \ + float32x4_t __rev2_200; __rev2_200 = __builtin_shufflevector(__s2_200, __s2_200, 3, 2, 1, 0); \ + float32x4_t __ret_200; \ +float32x4_t __reint_200 = __rev2_200; \ +uint64x2_t __reint1_200 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_200, __p3_200), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_200, __p3_200)}; \ + __ret_200 = __noswap_vcmlaq_rot90_f32(__rev0_200, __rev1_200, *(float32x4_t *) &__reint1_200); \ + __ret_200 = __builtin_shufflevector(__ret_200, __ret_200, 3, 2, 1, 0); \ + __ret_200; \ +}) +#endif + #endif #if defined(__ARM_FEATURE_COMPLEX) && defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ @@ -40499,6 +41131,638 @@ __ai float16x8_t vcaddq_rot90_f16(float16x8_t __p0, float16x8_t __p1) { } #endif +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#else +__ai float16x8_t vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai float16x8_t __noswap_vcmlaq_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#else +__ai float16x4_t vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vcmla_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_lane_f16(__p0_201, __p1_201, __p2_201, __p3_201) __extension__ ({ \ + float16x4_t __s0_201 = __p0_201; \ + float16x4_t __s1_201 = __p1_201; \ + float16x4_t __s2_201 = __p2_201; \ + float16x4_t __ret_201; \ +float16x4_t __reint_201 = __s2_201; \ +uint32x2_t __reint1_201 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_201, __p3_201), vget_lane_u32(*(uint32x2_t *) &__reint_201, __p3_201)}; \ + __ret_201 = vcmla_f16(__s0_201, __s1_201, *(float16x4_t *) &__reint1_201); \ + __ret_201; \ +}) +#else +#define vcmla_lane_f16(__p0_202, __p1_202, __p2_202, __p3_202) __extension__ ({ \ + float16x4_t __s0_202 = __p0_202; \ + float16x4_t __s1_202 = __p1_202; \ + float16x4_t __s2_202 = __p2_202; \ + float16x4_t __rev0_202; __rev0_202 = __builtin_shufflevector(__s0_202, __s0_202, 3, 2, 1, 0); \ + float16x4_t __rev1_202; __rev1_202 = __builtin_shufflevector(__s1_202, __s1_202, 3, 2, 1, 0); \ + float16x4_t __rev2_202; __rev2_202 = __builtin_shufflevector(__s2_202, __s2_202, 3, 2, 1, 0); \ + float16x4_t __ret_202; \ +float16x4_t __reint_202 = __rev2_202; \ +uint32x2_t __reint1_202 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_202, __p3_202), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_202, __p3_202)}; \ + __ret_202 = __noswap_vcmla_f16(__rev0_202, __rev1_202, *(float16x4_t *) &__reint1_202); \ + __ret_202 = __builtin_shufflevector(__ret_202, __ret_202, 3, 2, 1, 0); \ + __ret_202; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_lane_f16(__p0_203, __p1_203, __p2_203, __p3_203) __extension__ ({ \ + float16x8_t __s0_203 = __p0_203; \ + float16x8_t __s1_203 = __p1_203; \ + float16x4_t __s2_203 = __p2_203; \ + float16x8_t __ret_203; \ +float16x4_t __reint_203 = __s2_203; \ +uint32x4_t __reint1_203 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_203, __p3_203), vget_lane_u32(*(uint32x2_t *) &__reint_203, __p3_203), vget_lane_u32(*(uint32x2_t *) &__reint_203, __p3_203), vget_lane_u32(*(uint32x2_t *) &__reint_203, __p3_203)}; \ + __ret_203 = vcmlaq_f16(__s0_203, __s1_203, *(float16x8_t *) &__reint1_203); \ + __ret_203; \ +}) +#else +#define vcmlaq_lane_f16(__p0_204, __p1_204, __p2_204, __p3_204) __extension__ ({ \ + float16x8_t __s0_204 = __p0_204; \ + float16x8_t __s1_204 = __p1_204; \ + float16x4_t __s2_204 = __p2_204; \ + float16x8_t __rev0_204; __rev0_204 = __builtin_shufflevector(__s0_204, __s0_204, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_204; __rev1_204 = __builtin_shufflevector(__s1_204, __s1_204, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_204; __rev2_204 = __builtin_shufflevector(__s2_204, __s2_204, 3, 2, 1, 0); \ + float16x8_t __ret_204; \ +float16x4_t __reint_204 = __rev2_204; \ +uint32x4_t __reint1_204 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_204, __p3_204), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_204, __p3_204), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_204, __p3_204), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_204, __p3_204)}; \ + __ret_204 = __noswap_vcmlaq_f16(__rev0_204, __rev1_204, *(float16x8_t *) &__reint1_204); \ + __ret_204 = __builtin_shufflevector(__ret_204, __ret_204, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_204; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_laneq_f16(__p0_205, __p1_205, __p2_205, __p3_205) __extension__ ({ \ + float16x4_t __s0_205 = __p0_205; \ + float16x4_t __s1_205 = __p1_205; \ + float16x8_t __s2_205 = __p2_205; \ + float16x4_t __ret_205; \ +float16x8_t __reint_205 = __s2_205; \ +uint32x2_t __reint1_205 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_205, __p3_205), vgetq_lane_u32(*(uint32x4_t *) &__reint_205, __p3_205)}; \ + __ret_205 = vcmla_f16(__s0_205, __s1_205, *(float16x4_t *) &__reint1_205); \ + __ret_205; \ +}) +#else +#define vcmla_laneq_f16(__p0_206, __p1_206, __p2_206, __p3_206) __extension__ ({ \ + float16x4_t __s0_206 = __p0_206; \ + float16x4_t __s1_206 = __p1_206; \ + float16x8_t __s2_206 = __p2_206; \ + float16x4_t __rev0_206; __rev0_206 = __builtin_shufflevector(__s0_206, __s0_206, 3, 2, 1, 0); \ + float16x4_t __rev1_206; __rev1_206 = __builtin_shufflevector(__s1_206, __s1_206, 3, 2, 1, 0); \ + float16x8_t __rev2_206; __rev2_206 = __builtin_shufflevector(__s2_206, __s2_206, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_206; \ +float16x8_t __reint_206 = __rev2_206; \ +uint32x2_t __reint1_206 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_206, __p3_206), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_206, __p3_206)}; \ + __ret_206 = __noswap_vcmla_f16(__rev0_206, __rev1_206, *(float16x4_t *) &__reint1_206); \ + __ret_206 = __builtin_shufflevector(__ret_206, __ret_206, 3, 2, 1, 0); \ + __ret_206; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_laneq_f16(__p0_207, __p1_207, __p2_207, __p3_207) __extension__ ({ \ + float16x8_t __s0_207 = __p0_207; \ + float16x8_t __s1_207 = __p1_207; \ + float16x8_t __s2_207 = __p2_207; \ + float16x8_t __ret_207; \ +float16x8_t __reint_207 = __s2_207; \ +uint32x4_t __reint1_207 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_207, __p3_207), vgetq_lane_u32(*(uint32x4_t *) &__reint_207, __p3_207), vgetq_lane_u32(*(uint32x4_t *) &__reint_207, __p3_207), vgetq_lane_u32(*(uint32x4_t *) &__reint_207, __p3_207)}; \ + __ret_207 = vcmlaq_f16(__s0_207, __s1_207, *(float16x8_t *) &__reint1_207); \ + __ret_207; \ +}) +#else +#define vcmlaq_laneq_f16(__p0_208, __p1_208, __p2_208, __p3_208) __extension__ ({ \ + float16x8_t __s0_208 = __p0_208; \ + float16x8_t __s1_208 = __p1_208; \ + float16x8_t __s2_208 = __p2_208; \ + float16x8_t __rev0_208; __rev0_208 = __builtin_shufflevector(__s0_208, __s0_208, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_208; __rev1_208 = __builtin_shufflevector(__s1_208, __s1_208, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_208; __rev2_208 = __builtin_shufflevector(__s2_208, __s2_208, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_208; \ +float16x8_t __reint_208 = __rev2_208; \ +uint32x4_t __reint1_208 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_208, __p3_208), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_208, __p3_208), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_208, __p3_208), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_208, __p3_208)}; \ + __ret_208 = __noswap_vcmlaq_f16(__rev0_208, __rev1_208, *(float16x8_t *) &__reint1_208); \ + __ret_208 = __builtin_shufflevector(__ret_208, __ret_208, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_208; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#else +__ai float16x8_t vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai float16x8_t __noswap_vcmlaq_rot180_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#else +__ai float16x4_t vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vcmla_rot180_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot180_lane_f16(__p0_209, __p1_209, __p2_209, __p3_209) __extension__ ({ \ + float16x4_t __s0_209 = __p0_209; \ + float16x4_t __s1_209 = __p1_209; \ + float16x4_t __s2_209 = __p2_209; \ + float16x4_t __ret_209; \ +float16x4_t __reint_209 = __s2_209; \ +uint32x2_t __reint1_209 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_209, __p3_209), vget_lane_u32(*(uint32x2_t *) &__reint_209, __p3_209)}; \ + __ret_209 = vcmla_rot180_f16(__s0_209, __s1_209, *(float16x4_t *) &__reint1_209); \ + __ret_209; \ +}) +#else +#define vcmla_rot180_lane_f16(__p0_210, __p1_210, __p2_210, __p3_210) __extension__ ({ \ + float16x4_t __s0_210 = __p0_210; \ + float16x4_t __s1_210 = __p1_210; \ + float16x4_t __s2_210 = __p2_210; \ + float16x4_t __rev0_210; __rev0_210 = __builtin_shufflevector(__s0_210, __s0_210, 3, 2, 1, 0); \ + float16x4_t __rev1_210; __rev1_210 = __builtin_shufflevector(__s1_210, __s1_210, 3, 2, 1, 0); \ + float16x4_t __rev2_210; __rev2_210 = __builtin_shufflevector(__s2_210, __s2_210, 3, 2, 1, 0); \ + float16x4_t __ret_210; \ +float16x4_t __reint_210 = __rev2_210; \ +uint32x2_t __reint1_210 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_210, __p3_210), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_210, __p3_210)}; \ + __ret_210 = __noswap_vcmla_rot180_f16(__rev0_210, __rev1_210, *(float16x4_t *) &__reint1_210); \ + __ret_210 = __builtin_shufflevector(__ret_210, __ret_210, 3, 2, 1, 0); \ + __ret_210; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_lane_f16(__p0_211, __p1_211, __p2_211, __p3_211) __extension__ ({ \ + float16x8_t __s0_211 = __p0_211; \ + float16x8_t __s1_211 = __p1_211; \ + float16x4_t __s2_211 = __p2_211; \ + float16x8_t __ret_211; \ +float16x4_t __reint_211 = __s2_211; \ +uint32x4_t __reint1_211 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211), vget_lane_u32(*(uint32x2_t *) &__reint_211, __p3_211)}; \ + __ret_211 = vcmlaq_rot180_f16(__s0_211, __s1_211, *(float16x8_t *) &__reint1_211); \ + __ret_211; \ +}) +#else +#define vcmlaq_rot180_lane_f16(__p0_212, __p1_212, __p2_212, __p3_212) __extension__ ({ \ + float16x8_t __s0_212 = __p0_212; \ + float16x8_t __s1_212 = __p1_212; \ + float16x4_t __s2_212 = __p2_212; \ + float16x8_t __rev0_212; __rev0_212 = __builtin_shufflevector(__s0_212, __s0_212, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_212; __rev1_212 = __builtin_shufflevector(__s1_212, __s1_212, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_212; __rev2_212 = __builtin_shufflevector(__s2_212, __s2_212, 3, 2, 1, 0); \ + float16x8_t __ret_212; \ +float16x4_t __reint_212 = __rev2_212; \ +uint32x4_t __reint1_212 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_212, __p3_212)}; \ + __ret_212 = __noswap_vcmlaq_rot180_f16(__rev0_212, __rev1_212, *(float16x8_t *) &__reint1_212); \ + __ret_212 = __builtin_shufflevector(__ret_212, __ret_212, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_212; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot180_laneq_f16(__p0_213, __p1_213, __p2_213, __p3_213) __extension__ ({ \ + float16x4_t __s0_213 = __p0_213; \ + float16x4_t __s1_213 = __p1_213; \ + float16x8_t __s2_213 = __p2_213; \ + float16x4_t __ret_213; \ +float16x8_t __reint_213 = __s2_213; \ +uint32x2_t __reint1_213 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_213, __p3_213), vgetq_lane_u32(*(uint32x4_t *) &__reint_213, __p3_213)}; \ + __ret_213 = vcmla_rot180_f16(__s0_213, __s1_213, *(float16x4_t *) &__reint1_213); \ + __ret_213; \ +}) +#else +#define vcmla_rot180_laneq_f16(__p0_214, __p1_214, __p2_214, __p3_214) __extension__ ({ \ + float16x4_t __s0_214 = __p0_214; \ + float16x4_t __s1_214 = __p1_214; \ + float16x8_t __s2_214 = __p2_214; \ + float16x4_t __rev0_214; __rev0_214 = __builtin_shufflevector(__s0_214, __s0_214, 3, 2, 1, 0); \ + float16x4_t __rev1_214; __rev1_214 = __builtin_shufflevector(__s1_214, __s1_214, 3, 2, 1, 0); \ + float16x8_t __rev2_214; __rev2_214 = __builtin_shufflevector(__s2_214, __s2_214, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_214; \ +float16x8_t __reint_214 = __rev2_214; \ +uint32x2_t __reint1_214 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_214, __p3_214), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_214, __p3_214)}; \ + __ret_214 = __noswap_vcmla_rot180_f16(__rev0_214, __rev1_214, *(float16x4_t *) &__reint1_214); \ + __ret_214 = __builtin_shufflevector(__ret_214, __ret_214, 3, 2, 1, 0); \ + __ret_214; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_laneq_f16(__p0_215, __p1_215, __p2_215, __p3_215) __extension__ ({ \ + float16x8_t __s0_215 = __p0_215; \ + float16x8_t __s1_215 = __p1_215; \ + float16x8_t __s2_215 = __p2_215; \ + float16x8_t __ret_215; \ +float16x8_t __reint_215 = __s2_215; \ +uint32x4_t __reint1_215 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215), vgetq_lane_u32(*(uint32x4_t *) &__reint_215, __p3_215)}; \ + __ret_215 = vcmlaq_rot180_f16(__s0_215, __s1_215, *(float16x8_t *) &__reint1_215); \ + __ret_215; \ +}) +#else +#define vcmlaq_rot180_laneq_f16(__p0_216, __p1_216, __p2_216, __p3_216) __extension__ ({ \ + float16x8_t __s0_216 = __p0_216; \ + float16x8_t __s1_216 = __p1_216; \ + float16x8_t __s2_216 = __p2_216; \ + float16x8_t __rev0_216; __rev0_216 = __builtin_shufflevector(__s0_216, __s0_216, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_216; __rev1_216 = __builtin_shufflevector(__s1_216, __s1_216, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_216; __rev2_216 = __builtin_shufflevector(__s2_216, __s2_216, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_216; \ +float16x8_t __reint_216 = __rev2_216; \ +uint32x4_t __reint1_216 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_216, __p3_216)}; \ + __ret_216 = __noswap_vcmlaq_rot180_f16(__rev0_216, __rev1_216, *(float16x8_t *) &__reint1_216); \ + __ret_216 = __builtin_shufflevector(__ret_216, __ret_216, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_216; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#else +__ai float16x8_t vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai float16x8_t __noswap_vcmlaq_rot270_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#else +__ai float16x4_t vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vcmla_rot270_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot270_lane_f16(__p0_217, __p1_217, __p2_217, __p3_217) __extension__ ({ \ + float16x4_t __s0_217 = __p0_217; \ + float16x4_t __s1_217 = __p1_217; \ + float16x4_t __s2_217 = __p2_217; \ + float16x4_t __ret_217; \ +float16x4_t __reint_217 = __s2_217; \ +uint32x2_t __reint1_217 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_217, __p3_217), vget_lane_u32(*(uint32x2_t *) &__reint_217, __p3_217)}; \ + __ret_217 = vcmla_rot270_f16(__s0_217, __s1_217, *(float16x4_t *) &__reint1_217); \ + __ret_217; \ +}) +#else +#define vcmla_rot270_lane_f16(__p0_218, __p1_218, __p2_218, __p3_218) __extension__ ({ \ + float16x4_t __s0_218 = __p0_218; \ + float16x4_t __s1_218 = __p1_218; \ + float16x4_t __s2_218 = __p2_218; \ + float16x4_t __rev0_218; __rev0_218 = __builtin_shufflevector(__s0_218, __s0_218, 3, 2, 1, 0); \ + float16x4_t __rev1_218; __rev1_218 = __builtin_shufflevector(__s1_218, __s1_218, 3, 2, 1, 0); \ + float16x4_t __rev2_218; __rev2_218 = __builtin_shufflevector(__s2_218, __s2_218, 3, 2, 1, 0); \ + float16x4_t __ret_218; \ +float16x4_t __reint_218 = __rev2_218; \ +uint32x2_t __reint1_218 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_218, __p3_218), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_218, __p3_218)}; \ + __ret_218 = __noswap_vcmla_rot270_f16(__rev0_218, __rev1_218, *(float16x4_t *) &__reint1_218); \ + __ret_218 = __builtin_shufflevector(__ret_218, __ret_218, 3, 2, 1, 0); \ + __ret_218; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_lane_f16(__p0_219, __p1_219, __p2_219, __p3_219) __extension__ ({ \ + float16x8_t __s0_219 = __p0_219; \ + float16x8_t __s1_219 = __p1_219; \ + float16x4_t __s2_219 = __p2_219; \ + float16x8_t __ret_219; \ +float16x4_t __reint_219 = __s2_219; \ +uint32x4_t __reint1_219 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219), vget_lane_u32(*(uint32x2_t *) &__reint_219, __p3_219)}; \ + __ret_219 = vcmlaq_rot270_f16(__s0_219, __s1_219, *(float16x8_t *) &__reint1_219); \ + __ret_219; \ +}) +#else +#define vcmlaq_rot270_lane_f16(__p0_220, __p1_220, __p2_220, __p3_220) __extension__ ({ \ + float16x8_t __s0_220 = __p0_220; \ + float16x8_t __s1_220 = __p1_220; \ + float16x4_t __s2_220 = __p2_220; \ + float16x8_t __rev0_220; __rev0_220 = __builtin_shufflevector(__s0_220, __s0_220, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_220; __rev1_220 = __builtin_shufflevector(__s1_220, __s1_220, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_220; __rev2_220 = __builtin_shufflevector(__s2_220, __s2_220, 3, 2, 1, 0); \ + float16x8_t __ret_220; \ +float16x4_t __reint_220 = __rev2_220; \ +uint32x4_t __reint1_220 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_220, __p3_220)}; \ + __ret_220 = __noswap_vcmlaq_rot270_f16(__rev0_220, __rev1_220, *(float16x8_t *) &__reint1_220); \ + __ret_220 = __builtin_shufflevector(__ret_220, __ret_220, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_220; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot270_laneq_f16(__p0_221, __p1_221, __p2_221, __p3_221) __extension__ ({ \ + float16x4_t __s0_221 = __p0_221; \ + float16x4_t __s1_221 = __p1_221; \ + float16x8_t __s2_221 = __p2_221; \ + float16x4_t __ret_221; \ +float16x8_t __reint_221 = __s2_221; \ +uint32x2_t __reint1_221 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_221, __p3_221), vgetq_lane_u32(*(uint32x4_t *) &__reint_221, __p3_221)}; \ + __ret_221 = vcmla_rot270_f16(__s0_221, __s1_221, *(float16x4_t *) &__reint1_221); \ + __ret_221; \ +}) +#else +#define vcmla_rot270_laneq_f16(__p0_222, __p1_222, __p2_222, __p3_222) __extension__ ({ \ + float16x4_t __s0_222 = __p0_222; \ + float16x4_t __s1_222 = __p1_222; \ + float16x8_t __s2_222 = __p2_222; \ + float16x4_t __rev0_222; __rev0_222 = __builtin_shufflevector(__s0_222, __s0_222, 3, 2, 1, 0); \ + float16x4_t __rev1_222; __rev1_222 = __builtin_shufflevector(__s1_222, __s1_222, 3, 2, 1, 0); \ + float16x8_t __rev2_222; __rev2_222 = __builtin_shufflevector(__s2_222, __s2_222, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_222; \ +float16x8_t __reint_222 = __rev2_222; \ +uint32x2_t __reint1_222 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_222, __p3_222), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_222, __p3_222)}; \ + __ret_222 = __noswap_vcmla_rot270_f16(__rev0_222, __rev1_222, *(float16x4_t *) &__reint1_222); \ + __ret_222 = __builtin_shufflevector(__ret_222, __ret_222, 3, 2, 1, 0); \ + __ret_222; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_laneq_f16(__p0_223, __p1_223, __p2_223, __p3_223) __extension__ ({ \ + float16x8_t __s0_223 = __p0_223; \ + float16x8_t __s1_223 = __p1_223; \ + float16x8_t __s2_223 = __p2_223; \ + float16x8_t __ret_223; \ +float16x8_t __reint_223 = __s2_223; \ +uint32x4_t __reint1_223 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223), vgetq_lane_u32(*(uint32x4_t *) &__reint_223, __p3_223)}; \ + __ret_223 = vcmlaq_rot270_f16(__s0_223, __s1_223, *(float16x8_t *) &__reint1_223); \ + __ret_223; \ +}) +#else +#define vcmlaq_rot270_laneq_f16(__p0_224, __p1_224, __p2_224, __p3_224) __extension__ ({ \ + float16x8_t __s0_224 = __p0_224; \ + float16x8_t __s1_224 = __p1_224; \ + float16x8_t __s2_224 = __p2_224; \ + float16x8_t __rev0_224; __rev0_224 = __builtin_shufflevector(__s0_224, __s0_224, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_224; __rev1_224 = __builtin_shufflevector(__s1_224, __s1_224, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_224; __rev2_224 = __builtin_shufflevector(__s2_224, __s2_224, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_224; \ +float16x8_t __reint_224 = __rev2_224; \ +uint32x4_t __reint1_224 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_224, __p3_224)}; \ + __ret_224 = __noswap_vcmlaq_rot270_f16(__rev0_224, __rev1_224, *(float16x8_t *) &__reint1_224); \ + __ret_224 = __builtin_shufflevector(__ret_224, __ret_224, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_224; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#else +__ai float16x8_t vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 40); + __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret; +} +__ai float16x8_t __noswap_vcmlaq_rot90_f16(float16x8_t __p0, float16x8_t __p1, float16x8_t __p2) { + float16x8_t __ret; + __ret = (float16x8_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 40); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#else +__ai float16x4_t vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); + float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__rev0, (int8x8_t)__rev1, (int8x8_t)__rev2, 8); + __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); + return __ret; +} +__ai float16x4_t __noswap_vcmla_rot90_f16(float16x4_t __p0, float16x4_t __p1, float16x4_t __p2) { + float16x4_t __ret; + __ret = (float16x4_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 8); + return __ret; +} +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot90_lane_f16(__p0_225, __p1_225, __p2_225, __p3_225) __extension__ ({ \ + float16x4_t __s0_225 = __p0_225; \ + float16x4_t __s1_225 = __p1_225; \ + float16x4_t __s2_225 = __p2_225; \ + float16x4_t __ret_225; \ +float16x4_t __reint_225 = __s2_225; \ +uint32x2_t __reint1_225 = (uint32x2_t) {vget_lane_u32(*(uint32x2_t *) &__reint_225, __p3_225), vget_lane_u32(*(uint32x2_t *) &__reint_225, __p3_225)}; \ + __ret_225 = vcmla_rot90_f16(__s0_225, __s1_225, *(float16x4_t *) &__reint1_225); \ + __ret_225; \ +}) +#else +#define vcmla_rot90_lane_f16(__p0_226, __p1_226, __p2_226, __p3_226) __extension__ ({ \ + float16x4_t __s0_226 = __p0_226; \ + float16x4_t __s1_226 = __p1_226; \ + float16x4_t __s2_226 = __p2_226; \ + float16x4_t __rev0_226; __rev0_226 = __builtin_shufflevector(__s0_226, __s0_226, 3, 2, 1, 0); \ + float16x4_t __rev1_226; __rev1_226 = __builtin_shufflevector(__s1_226, __s1_226, 3, 2, 1, 0); \ + float16x4_t __rev2_226; __rev2_226 = __builtin_shufflevector(__s2_226, __s2_226, 3, 2, 1, 0); \ + float16x4_t __ret_226; \ +float16x4_t __reint_226 = __rev2_226; \ +uint32x2_t __reint1_226 = (uint32x2_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_226, __p3_226), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_226, __p3_226)}; \ + __ret_226 = __noswap_vcmla_rot90_f16(__rev0_226, __rev1_226, *(float16x4_t *) &__reint1_226); \ + __ret_226 = __builtin_shufflevector(__ret_226, __ret_226, 3, 2, 1, 0); \ + __ret_226; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_lane_f16(__p0_227, __p1_227, __p2_227, __p3_227) __extension__ ({ \ + float16x8_t __s0_227 = __p0_227; \ + float16x8_t __s1_227 = __p1_227; \ + float16x4_t __s2_227 = __p2_227; \ + float16x8_t __ret_227; \ +float16x4_t __reint_227 = __s2_227; \ +uint32x4_t __reint1_227 = (uint32x4_t) {vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227), vget_lane_u32(*(uint32x2_t *) &__reint_227, __p3_227)}; \ + __ret_227 = vcmlaq_rot90_f16(__s0_227, __s1_227, *(float16x8_t *) &__reint1_227); \ + __ret_227; \ +}) +#else +#define vcmlaq_rot90_lane_f16(__p0_228, __p1_228, __p2_228, __p3_228) __extension__ ({ \ + float16x8_t __s0_228 = __p0_228; \ + float16x8_t __s1_228 = __p1_228; \ + float16x4_t __s2_228 = __p2_228; \ + float16x8_t __rev0_228; __rev0_228 = __builtin_shufflevector(__s0_228, __s0_228, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_228; __rev1_228 = __builtin_shufflevector(__s1_228, __s1_228, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_228; __rev2_228 = __builtin_shufflevector(__s2_228, __s2_228, 3, 2, 1, 0); \ + float16x8_t __ret_228; \ +float16x4_t __reint_228 = __rev2_228; \ +uint32x4_t __reint1_228 = (uint32x4_t) {__noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228), __noswap_vget_lane_u32(*(uint32x2_t *) &__reint_228, __p3_228)}; \ + __ret_228 = __noswap_vcmlaq_rot90_f16(__rev0_228, __rev1_228, *(float16x8_t *) &__reint1_228); \ + __ret_228 = __builtin_shufflevector(__ret_228, __ret_228, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_228; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot90_laneq_f16(__p0_229, __p1_229, __p2_229, __p3_229) __extension__ ({ \ + float16x4_t __s0_229 = __p0_229; \ + float16x4_t __s1_229 = __p1_229; \ + float16x8_t __s2_229 = __p2_229; \ + float16x4_t __ret_229; \ +float16x8_t __reint_229 = __s2_229; \ +uint32x2_t __reint1_229 = (uint32x2_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_229, __p3_229), vgetq_lane_u32(*(uint32x4_t *) &__reint_229, __p3_229)}; \ + __ret_229 = vcmla_rot90_f16(__s0_229, __s1_229, *(float16x4_t *) &__reint1_229); \ + __ret_229; \ +}) +#else +#define vcmla_rot90_laneq_f16(__p0_230, __p1_230, __p2_230, __p3_230) __extension__ ({ \ + float16x4_t __s0_230 = __p0_230; \ + float16x4_t __s1_230 = __p1_230; \ + float16x8_t __s2_230 = __p2_230; \ + float16x4_t __rev0_230; __rev0_230 = __builtin_shufflevector(__s0_230, __s0_230, 3, 2, 1, 0); \ + float16x4_t __rev1_230; __rev1_230 = __builtin_shufflevector(__s1_230, __s1_230, 3, 2, 1, 0); \ + float16x8_t __rev2_230; __rev2_230 = __builtin_shufflevector(__s2_230, __s2_230, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_230; \ +float16x8_t __reint_230 = __rev2_230; \ +uint32x2_t __reint1_230 = (uint32x2_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_230, __p3_230), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_230, __p3_230)}; \ + __ret_230 = __noswap_vcmla_rot90_f16(__rev0_230, __rev1_230, *(float16x4_t *) &__reint1_230); \ + __ret_230 = __builtin_shufflevector(__ret_230, __ret_230, 3, 2, 1, 0); \ + __ret_230; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_laneq_f16(__p0_231, __p1_231, __p2_231, __p3_231) __extension__ ({ \ + float16x8_t __s0_231 = __p0_231; \ + float16x8_t __s1_231 = __p1_231; \ + float16x8_t __s2_231 = __p2_231; \ + float16x8_t __ret_231; \ +float16x8_t __reint_231 = __s2_231; \ +uint32x4_t __reint1_231 = (uint32x4_t) {vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231), vgetq_lane_u32(*(uint32x4_t *) &__reint_231, __p3_231)}; \ + __ret_231 = vcmlaq_rot90_f16(__s0_231, __s1_231, *(float16x8_t *) &__reint1_231); \ + __ret_231; \ +}) +#else +#define vcmlaq_rot90_laneq_f16(__p0_232, __p1_232, __p2_232, __p3_232) __extension__ ({ \ + float16x8_t __s0_232 = __p0_232; \ + float16x8_t __s1_232 = __p1_232; \ + float16x8_t __s2_232 = __p2_232; \ + float16x8_t __rev0_232; __rev0_232 = __builtin_shufflevector(__s0_232, __s0_232, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_232; __rev1_232 = __builtin_shufflevector(__s1_232, __s1_232, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_232; __rev2_232 = __builtin_shufflevector(__s2_232, __s2_232, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_232; \ +float16x8_t __reint_232 = __rev2_232; \ +uint32x4_t __reint1_232 = (uint32x4_t) {__noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232), __noswap_vgetq_lane_u32(*(uint32x4_t *) &__reint_232, __p3_232)}; \ + __ret_232 = __noswap_vcmlaq_rot90_f16(__rev0_232, __rev1_232, *(float16x8_t *) &__reint1_232); \ + __ret_232 = __builtin_shufflevector(__ret_232, __ret_232, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_232; \ +}) +#endif + #endif #if defined(__ARM_FEATURE_COMPLEX) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ @@ -40535,6 +41799,478 @@ __ai float64x2_t vcaddq_rot90_f64(float64x2_t __p0, float64x2_t __p1) { } #endif +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#else +__ai float64x2_t vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vcmlaq_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#endif + +__ai float64x1_t vcmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcmla_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#define vcmla_lane_f64(__p0_233, __p1_233, __p2_233, __p3_233) __extension__ ({ \ + float64x1_t __s0_233 = __p0_233; \ + float64x1_t __s1_233 = __p1_233; \ + float64x1_t __s2_233 = __p2_233; \ + float64x1_t __ret_233; \ +float64x1_t __reint_233 = __s2_233; \ +uint64x2_t __reint1_233 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_233, __p3_233), vgetq_lane_u64(*(uint64x2_t *) &__reint_233, __p3_233)}; \ + __ret_233 = vcmla_f64(__s0_233, __s1_233, *(float64x1_t *) &__reint1_233); \ + __ret_233; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_lane_f64(__p0_234, __p1_234, __p2_234, __p3_234) __extension__ ({ \ + float64x2_t __s0_234 = __p0_234; \ + float64x2_t __s1_234 = __p1_234; \ + float64x1_t __s2_234 = __p2_234; \ + float64x2_t __ret_234; \ +float64x1_t __reint_234 = __s2_234; \ +uint64x2_t __reint1_234 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_234, __p3_234), vgetq_lane_u64(*(uint64x2_t *) &__reint_234, __p3_234)}; \ + __ret_234 = vcmlaq_f64(__s0_234, __s1_234, *(float64x2_t *) &__reint1_234); \ + __ret_234; \ +}) +#else +#define vcmlaq_lane_f64(__p0_235, __p1_235, __p2_235, __p3_235) __extension__ ({ \ + float64x2_t __s0_235 = __p0_235; \ + float64x2_t __s1_235 = __p1_235; \ + float64x1_t __s2_235 = __p2_235; \ + float64x2_t __rev0_235; __rev0_235 = __builtin_shufflevector(__s0_235, __s0_235, 1, 0); \ + float64x2_t __rev1_235; __rev1_235 = __builtin_shufflevector(__s1_235, __s1_235, 1, 0); \ + float64x2_t __ret_235; \ +float64x1_t __reint_235 = __s2_235; \ +uint64x2_t __reint1_235 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_235, __p3_235), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_235, __p3_235)}; \ + __ret_235 = __noswap_vcmlaq_f64(__rev0_235, __rev1_235, *(float64x2_t *) &__reint1_235); \ + __ret_235 = __builtin_shufflevector(__ret_235, __ret_235, 1, 0); \ + __ret_235; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_laneq_f64(__p0_236, __p1_236, __p2_236, __p3_236) __extension__ ({ \ + float64x1_t __s0_236 = __p0_236; \ + float64x1_t __s1_236 = __p1_236; \ + float64x2_t __s2_236 = __p2_236; \ + float64x1_t __ret_236; \ +float64x2_t __reint_236 = __s2_236; \ +uint64x2_t __reint1_236 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_236, __p3_236), vgetq_lane_u64(*(uint64x2_t *) &__reint_236, __p3_236)}; \ + __ret_236 = vcmla_f64(__s0_236, __s1_236, *(float64x1_t *) &__reint1_236); \ + __ret_236; \ +}) +#else +#define vcmla_laneq_f64(__p0_237, __p1_237, __p2_237, __p3_237) __extension__ ({ \ + float64x1_t __s0_237 = __p0_237; \ + float64x1_t __s1_237 = __p1_237; \ + float64x2_t __s2_237 = __p2_237; \ + float64x2_t __rev2_237; __rev2_237 = __builtin_shufflevector(__s2_237, __s2_237, 1, 0); \ + float64x1_t __ret_237; \ +float64x2_t __reint_237 = __rev2_237; \ +uint64x2_t __reint1_237 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_237, __p3_237), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_237, __p3_237)}; \ + __ret_237 = vcmla_f64(__s0_237, __s1_237, *(float64x1_t *) &__reint1_237); \ + __ret_237; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_laneq_f64(__p0_238, __p1_238, __p2_238, __p3_238) __extension__ ({ \ + float64x2_t __s0_238 = __p0_238; \ + float64x2_t __s1_238 = __p1_238; \ + float64x2_t __s2_238 = __p2_238; \ + float64x2_t __ret_238; \ +float64x2_t __reint_238 = __s2_238; \ +uint64x2_t __reint1_238 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_238, __p3_238), vgetq_lane_u64(*(uint64x2_t *) &__reint_238, __p3_238)}; \ + __ret_238 = vcmlaq_f64(__s0_238, __s1_238, *(float64x2_t *) &__reint1_238); \ + __ret_238; \ +}) +#else +#define vcmlaq_laneq_f64(__p0_239, __p1_239, __p2_239, __p3_239) __extension__ ({ \ + float64x2_t __s0_239 = __p0_239; \ + float64x2_t __s1_239 = __p1_239; \ + float64x2_t __s2_239 = __p2_239; \ + float64x2_t __rev0_239; __rev0_239 = __builtin_shufflevector(__s0_239, __s0_239, 1, 0); \ + float64x2_t __rev1_239; __rev1_239 = __builtin_shufflevector(__s1_239, __s1_239, 1, 0); \ + float64x2_t __rev2_239; __rev2_239 = __builtin_shufflevector(__s2_239, __s2_239, 1, 0); \ + float64x2_t __ret_239; \ +float64x2_t __reint_239 = __rev2_239; \ +uint64x2_t __reint1_239 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_239, __p3_239), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_239, __p3_239)}; \ + __ret_239 = __noswap_vcmlaq_f64(__rev0_239, __rev1_239, *(float64x2_t *) &__reint1_239); \ + __ret_239 = __builtin_shufflevector(__ret_239, __ret_239, 1, 0); \ + __ret_239; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#else +__ai float64x2_t vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vcmlaq_rot180_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot180_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#endif + +__ai float64x1_t vcmla_rot180_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcmla_rot180_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#define vcmla_rot180_lane_f64(__p0_240, __p1_240, __p2_240, __p3_240) __extension__ ({ \ + float64x1_t __s0_240 = __p0_240; \ + float64x1_t __s1_240 = __p1_240; \ + float64x1_t __s2_240 = __p2_240; \ + float64x1_t __ret_240; \ +float64x1_t __reint_240 = __s2_240; \ +uint64x2_t __reint1_240 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_240, __p3_240), vgetq_lane_u64(*(uint64x2_t *) &__reint_240, __p3_240)}; \ + __ret_240 = vcmla_rot180_f64(__s0_240, __s1_240, *(float64x1_t *) &__reint1_240); \ + __ret_240; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_lane_f64(__p0_241, __p1_241, __p2_241, __p3_241) __extension__ ({ \ + float64x2_t __s0_241 = __p0_241; \ + float64x2_t __s1_241 = __p1_241; \ + float64x1_t __s2_241 = __p2_241; \ + float64x2_t __ret_241; \ +float64x1_t __reint_241 = __s2_241; \ +uint64x2_t __reint1_241 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_241, __p3_241), vgetq_lane_u64(*(uint64x2_t *) &__reint_241, __p3_241)}; \ + __ret_241 = vcmlaq_rot180_f64(__s0_241, __s1_241, *(float64x2_t *) &__reint1_241); \ + __ret_241; \ +}) +#else +#define vcmlaq_rot180_lane_f64(__p0_242, __p1_242, __p2_242, __p3_242) __extension__ ({ \ + float64x2_t __s0_242 = __p0_242; \ + float64x2_t __s1_242 = __p1_242; \ + float64x1_t __s2_242 = __p2_242; \ + float64x2_t __rev0_242; __rev0_242 = __builtin_shufflevector(__s0_242, __s0_242, 1, 0); \ + float64x2_t __rev1_242; __rev1_242 = __builtin_shufflevector(__s1_242, __s1_242, 1, 0); \ + float64x2_t __ret_242; \ +float64x1_t __reint_242 = __s2_242; \ +uint64x2_t __reint1_242 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_242, __p3_242), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_242, __p3_242)}; \ + __ret_242 = __noswap_vcmlaq_rot180_f64(__rev0_242, __rev1_242, *(float64x2_t *) &__reint1_242); \ + __ret_242 = __builtin_shufflevector(__ret_242, __ret_242, 1, 0); \ + __ret_242; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot180_laneq_f64(__p0_243, __p1_243, __p2_243, __p3_243) __extension__ ({ \ + float64x1_t __s0_243 = __p0_243; \ + float64x1_t __s1_243 = __p1_243; \ + float64x2_t __s2_243 = __p2_243; \ + float64x1_t __ret_243; \ +float64x2_t __reint_243 = __s2_243; \ +uint64x2_t __reint1_243 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_243, __p3_243), vgetq_lane_u64(*(uint64x2_t *) &__reint_243, __p3_243)}; \ + __ret_243 = vcmla_rot180_f64(__s0_243, __s1_243, *(float64x1_t *) &__reint1_243); \ + __ret_243; \ +}) +#else +#define vcmla_rot180_laneq_f64(__p0_244, __p1_244, __p2_244, __p3_244) __extension__ ({ \ + float64x1_t __s0_244 = __p0_244; \ + float64x1_t __s1_244 = __p1_244; \ + float64x2_t __s2_244 = __p2_244; \ + float64x2_t __rev2_244; __rev2_244 = __builtin_shufflevector(__s2_244, __s2_244, 1, 0); \ + float64x1_t __ret_244; \ +float64x2_t __reint_244 = __rev2_244; \ +uint64x2_t __reint1_244 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_244, __p3_244), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_244, __p3_244)}; \ + __ret_244 = vcmla_rot180_f64(__s0_244, __s1_244, *(float64x1_t *) &__reint1_244); \ + __ret_244; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot180_laneq_f64(__p0_245, __p1_245, __p2_245, __p3_245) __extension__ ({ \ + float64x2_t __s0_245 = __p0_245; \ + float64x2_t __s1_245 = __p1_245; \ + float64x2_t __s2_245 = __p2_245; \ + float64x2_t __ret_245; \ +float64x2_t __reint_245 = __s2_245; \ +uint64x2_t __reint1_245 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_245, __p3_245), vgetq_lane_u64(*(uint64x2_t *) &__reint_245, __p3_245)}; \ + __ret_245 = vcmlaq_rot180_f64(__s0_245, __s1_245, *(float64x2_t *) &__reint1_245); \ + __ret_245; \ +}) +#else +#define vcmlaq_rot180_laneq_f64(__p0_246, __p1_246, __p2_246, __p3_246) __extension__ ({ \ + float64x2_t __s0_246 = __p0_246; \ + float64x2_t __s1_246 = __p1_246; \ + float64x2_t __s2_246 = __p2_246; \ + float64x2_t __rev0_246; __rev0_246 = __builtin_shufflevector(__s0_246, __s0_246, 1, 0); \ + float64x2_t __rev1_246; __rev1_246 = __builtin_shufflevector(__s1_246, __s1_246, 1, 0); \ + float64x2_t __rev2_246; __rev2_246 = __builtin_shufflevector(__s2_246, __s2_246, 1, 0); \ + float64x2_t __ret_246; \ +float64x2_t __reint_246 = __rev2_246; \ +uint64x2_t __reint1_246 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_246, __p3_246), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_246, __p3_246)}; \ + __ret_246 = __noswap_vcmlaq_rot180_f64(__rev0_246, __rev1_246, *(float64x2_t *) &__reint1_246); \ + __ret_246 = __builtin_shufflevector(__ret_246, __ret_246, 1, 0); \ + __ret_246; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#else +__ai float64x2_t vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vcmlaq_rot270_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot270_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#endif + +__ai float64x1_t vcmla_rot270_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcmla_rot270_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#define vcmla_rot270_lane_f64(__p0_247, __p1_247, __p2_247, __p3_247) __extension__ ({ \ + float64x1_t __s0_247 = __p0_247; \ + float64x1_t __s1_247 = __p1_247; \ + float64x1_t __s2_247 = __p2_247; \ + float64x1_t __ret_247; \ +float64x1_t __reint_247 = __s2_247; \ +uint64x2_t __reint1_247 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_247, __p3_247), vgetq_lane_u64(*(uint64x2_t *) &__reint_247, __p3_247)}; \ + __ret_247 = vcmla_rot270_f64(__s0_247, __s1_247, *(float64x1_t *) &__reint1_247); \ + __ret_247; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_lane_f64(__p0_248, __p1_248, __p2_248, __p3_248) __extension__ ({ \ + float64x2_t __s0_248 = __p0_248; \ + float64x2_t __s1_248 = __p1_248; \ + float64x1_t __s2_248 = __p2_248; \ + float64x2_t __ret_248; \ +float64x1_t __reint_248 = __s2_248; \ +uint64x2_t __reint1_248 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_248, __p3_248), vgetq_lane_u64(*(uint64x2_t *) &__reint_248, __p3_248)}; \ + __ret_248 = vcmlaq_rot270_f64(__s0_248, __s1_248, *(float64x2_t *) &__reint1_248); \ + __ret_248; \ +}) +#else +#define vcmlaq_rot270_lane_f64(__p0_249, __p1_249, __p2_249, __p3_249) __extension__ ({ \ + float64x2_t __s0_249 = __p0_249; \ + float64x2_t __s1_249 = __p1_249; \ + float64x1_t __s2_249 = __p2_249; \ + float64x2_t __rev0_249; __rev0_249 = __builtin_shufflevector(__s0_249, __s0_249, 1, 0); \ + float64x2_t __rev1_249; __rev1_249 = __builtin_shufflevector(__s1_249, __s1_249, 1, 0); \ + float64x2_t __ret_249; \ +float64x1_t __reint_249 = __s2_249; \ +uint64x2_t __reint1_249 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_249, __p3_249), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_249, __p3_249)}; \ + __ret_249 = __noswap_vcmlaq_rot270_f64(__rev0_249, __rev1_249, *(float64x2_t *) &__reint1_249); \ + __ret_249 = __builtin_shufflevector(__ret_249, __ret_249, 1, 0); \ + __ret_249; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot270_laneq_f64(__p0_250, __p1_250, __p2_250, __p3_250) __extension__ ({ \ + float64x1_t __s0_250 = __p0_250; \ + float64x1_t __s1_250 = __p1_250; \ + float64x2_t __s2_250 = __p2_250; \ + float64x1_t __ret_250; \ +float64x2_t __reint_250 = __s2_250; \ +uint64x2_t __reint1_250 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_250, __p3_250), vgetq_lane_u64(*(uint64x2_t *) &__reint_250, __p3_250)}; \ + __ret_250 = vcmla_rot270_f64(__s0_250, __s1_250, *(float64x1_t *) &__reint1_250); \ + __ret_250; \ +}) +#else +#define vcmla_rot270_laneq_f64(__p0_251, __p1_251, __p2_251, __p3_251) __extension__ ({ \ + float64x1_t __s0_251 = __p0_251; \ + float64x1_t __s1_251 = __p1_251; \ + float64x2_t __s2_251 = __p2_251; \ + float64x2_t __rev2_251; __rev2_251 = __builtin_shufflevector(__s2_251, __s2_251, 1, 0); \ + float64x1_t __ret_251; \ +float64x2_t __reint_251 = __rev2_251; \ +uint64x2_t __reint1_251 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_251, __p3_251), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_251, __p3_251)}; \ + __ret_251 = vcmla_rot270_f64(__s0_251, __s1_251, *(float64x1_t *) &__reint1_251); \ + __ret_251; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot270_laneq_f64(__p0_252, __p1_252, __p2_252, __p3_252) __extension__ ({ \ + float64x2_t __s0_252 = __p0_252; \ + float64x2_t __s1_252 = __p1_252; \ + float64x2_t __s2_252 = __p2_252; \ + float64x2_t __ret_252; \ +float64x2_t __reint_252 = __s2_252; \ +uint64x2_t __reint1_252 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_252, __p3_252), vgetq_lane_u64(*(uint64x2_t *) &__reint_252, __p3_252)}; \ + __ret_252 = vcmlaq_rot270_f64(__s0_252, __s1_252, *(float64x2_t *) &__reint1_252); \ + __ret_252; \ +}) +#else +#define vcmlaq_rot270_laneq_f64(__p0_253, __p1_253, __p2_253, __p3_253) __extension__ ({ \ + float64x2_t __s0_253 = __p0_253; \ + float64x2_t __s1_253 = __p1_253; \ + float64x2_t __s2_253 = __p2_253; \ + float64x2_t __rev0_253; __rev0_253 = __builtin_shufflevector(__s0_253, __s0_253, 1, 0); \ + float64x2_t __rev1_253; __rev1_253 = __builtin_shufflevector(__s1_253, __s1_253, 1, 0); \ + float64x2_t __rev2_253; __rev2_253 = __builtin_shufflevector(__s2_253, __s2_253, 1, 0); \ + float64x2_t __ret_253; \ +float64x2_t __reint_253 = __rev2_253; \ +uint64x2_t __reint1_253 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_253, __p3_253), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_253, __p3_253)}; \ + __ret_253 = __noswap_vcmlaq_rot270_f64(__rev0_253, __rev1_253, *(float64x2_t *) &__reint1_253); \ + __ret_253 = __builtin_shufflevector(__ret_253, __ret_253, 1, 0); \ + __ret_253; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +__ai float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#else +__ai float64x2_t vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); + float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + float64x2_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 1, 0); + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 42); + __ret = __builtin_shufflevector(__ret, __ret, 1, 0); + return __ret; +} +__ai float64x2_t __noswap_vcmlaq_rot90_f64(float64x2_t __p0, float64x2_t __p1, float64x2_t __p2) { + float64x2_t __ret; + __ret = (float64x2_t) __builtin_neon_vcmlaq_rot90_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 42); + return __ret; +} +#endif + +__ai float64x1_t vcmla_rot90_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) { + float64x1_t __ret; + __ret = (float64x1_t) __builtin_neon_vcmla_rot90_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 10); + return __ret; +} +#define vcmla_rot90_lane_f64(__p0_254, __p1_254, __p2_254, __p3_254) __extension__ ({ \ + float64x1_t __s0_254 = __p0_254; \ + float64x1_t __s1_254 = __p1_254; \ + float64x1_t __s2_254 = __p2_254; \ + float64x1_t __ret_254; \ +float64x1_t __reint_254 = __s2_254; \ +uint64x2_t __reint1_254 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_254, __p3_254), vgetq_lane_u64(*(uint64x2_t *) &__reint_254, __p3_254)}; \ + __ret_254 = vcmla_rot90_f64(__s0_254, __s1_254, *(float64x1_t *) &__reint1_254); \ + __ret_254; \ +}) +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_lane_f64(__p0_255, __p1_255, __p2_255, __p3_255) __extension__ ({ \ + float64x2_t __s0_255 = __p0_255; \ + float64x2_t __s1_255 = __p1_255; \ + float64x1_t __s2_255 = __p2_255; \ + float64x2_t __ret_255; \ +float64x1_t __reint_255 = __s2_255; \ +uint64x2_t __reint1_255 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_255, __p3_255), vgetq_lane_u64(*(uint64x2_t *) &__reint_255, __p3_255)}; \ + __ret_255 = vcmlaq_rot90_f64(__s0_255, __s1_255, *(float64x2_t *) &__reint1_255); \ + __ret_255; \ +}) +#else +#define vcmlaq_rot90_lane_f64(__p0_256, __p1_256, __p2_256, __p3_256) __extension__ ({ \ + float64x2_t __s0_256 = __p0_256; \ + float64x2_t __s1_256 = __p1_256; \ + float64x1_t __s2_256 = __p2_256; \ + float64x2_t __rev0_256; __rev0_256 = __builtin_shufflevector(__s0_256, __s0_256, 1, 0); \ + float64x2_t __rev1_256; __rev1_256 = __builtin_shufflevector(__s1_256, __s1_256, 1, 0); \ + float64x2_t __ret_256; \ +float64x1_t __reint_256 = __s2_256; \ +uint64x2_t __reint1_256 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_256, __p3_256), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_256, __p3_256)}; \ + __ret_256 = __noswap_vcmlaq_rot90_f64(__rev0_256, __rev1_256, *(float64x2_t *) &__reint1_256); \ + __ret_256 = __builtin_shufflevector(__ret_256, __ret_256, 1, 0); \ + __ret_256; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmla_rot90_laneq_f64(__p0_257, __p1_257, __p2_257, __p3_257) __extension__ ({ \ + float64x1_t __s0_257 = __p0_257; \ + float64x1_t __s1_257 = __p1_257; \ + float64x2_t __s2_257 = __p2_257; \ + float64x1_t __ret_257; \ +float64x2_t __reint_257 = __s2_257; \ +uint64x2_t __reint1_257 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_257, __p3_257), vgetq_lane_u64(*(uint64x2_t *) &__reint_257, __p3_257)}; \ + __ret_257 = vcmla_rot90_f64(__s0_257, __s1_257, *(float64x1_t *) &__reint1_257); \ + __ret_257; \ +}) +#else +#define vcmla_rot90_laneq_f64(__p0_258, __p1_258, __p2_258, __p3_258) __extension__ ({ \ + float64x1_t __s0_258 = __p0_258; \ + float64x1_t __s1_258 = __p1_258; \ + float64x2_t __s2_258 = __p2_258; \ + float64x2_t __rev2_258; __rev2_258 = __builtin_shufflevector(__s2_258, __s2_258, 1, 0); \ + float64x1_t __ret_258; \ +float64x2_t __reint_258 = __rev2_258; \ +uint64x2_t __reint1_258 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_258, __p3_258), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_258, __p3_258)}; \ + __ret_258 = vcmla_rot90_f64(__s0_258, __s1_258, *(float64x1_t *) &__reint1_258); \ + __ret_258; \ +}) +#endif + +#ifdef __LITTLE_ENDIAN__ +#define vcmlaq_rot90_laneq_f64(__p0_259, __p1_259, __p2_259, __p3_259) __extension__ ({ \ + float64x2_t __s0_259 = __p0_259; \ + float64x2_t __s1_259 = __p1_259; \ + float64x2_t __s2_259 = __p2_259; \ + float64x2_t __ret_259; \ +float64x2_t __reint_259 = __s2_259; \ +uint64x2_t __reint1_259 = (uint64x2_t) {vgetq_lane_u64(*(uint64x2_t *) &__reint_259, __p3_259), vgetq_lane_u64(*(uint64x2_t *) &__reint_259, __p3_259)}; \ + __ret_259 = vcmlaq_rot90_f64(__s0_259, __s1_259, *(float64x2_t *) &__reint1_259); \ + __ret_259; \ +}) +#else +#define vcmlaq_rot90_laneq_f64(__p0_260, __p1_260, __p2_260, __p3_260) __extension__ ({ \ + float64x2_t __s0_260 = __p0_260; \ + float64x2_t __s1_260 = __p1_260; \ + float64x2_t __s2_260 = __p2_260; \ + float64x2_t __rev0_260; __rev0_260 = __builtin_shufflevector(__s0_260, __s0_260, 1, 0); \ + float64x2_t __rev1_260; __rev1_260 = __builtin_shufflevector(__s1_260, __s1_260, 1, 0); \ + float64x2_t __rev2_260; __rev2_260 = __builtin_shufflevector(__s2_260, __s2_260, 1, 0); \ + float64x2_t __ret_260; \ +float64x2_t __reint_260 = __rev2_260; \ +uint64x2_t __reint1_260 = (uint64x2_t) {__noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_260, __p3_260), __noswap_vgetq_lane_u64(*(uint64x2_t *) &__reint_260, __p3_260)}; \ + __ret_260 = __noswap_vcmlaq_rot90_f64(__rev0_260, __rev1_260, *(float64x2_t *) &__reint1_260); \ + __ret_260 = __builtin_shufflevector(__ret_260, __ret_260, 1, 0); \ + __ret_260; \ +}) +#endif + #endif #if defined(__ARM_FEATURE_DOTPROD) #ifdef __LITTLE_ENDIAN__ @@ -40630,228 +42366,228 @@ __ai int32x2_t __noswap_vdot_s32(int32x2_t __p0, int8x8_t __p1, int8x8_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_u32(__p0_169, __p1_169, __p2_169, __p3_169) __extension__ ({ \ - uint32x4_t __s0_169 = __p0_169; \ - uint8x16_t __s1_169 = __p1_169; \ - uint8x8_t __s2_169 = __p2_169; \ - uint32x4_t __ret_169; \ -uint8x8_t __reint_169 = __s2_169; \ -uint32x4_t __reint1_169 = splatq_lane_u32(*(uint32x2_t *) &__reint_169, __p3_169); \ - __ret_169 = vdotq_u32(__s0_169, __s1_169, *(uint8x16_t *) &__reint1_169); \ - __ret_169; \ +#define vdotq_lane_u32(__p0_261, __p1_261, __p2_261, __p3_261) __extension__ ({ \ + uint32x4_t __s0_261 = __p0_261; \ + uint8x16_t __s1_261 = __p1_261; \ + uint8x8_t __s2_261 = __p2_261; \ + uint32x4_t __ret_261; \ +uint8x8_t __reint_261 = __s2_261; \ +uint32x4_t __reint1_261 = splatq_lane_u32(*(uint32x2_t *) &__reint_261, __p3_261); \ + __ret_261 = vdotq_u32(__s0_261, __s1_261, *(uint8x16_t *) &__reint1_261); \ + __ret_261; \ }) #else -#define vdotq_lane_u32(__p0_170, __p1_170, __p2_170, __p3_170) __extension__ ({ \ - uint32x4_t __s0_170 = __p0_170; \ - uint8x16_t __s1_170 = __p1_170; \ - uint8x8_t __s2_170 = __p2_170; \ - uint32x4_t __rev0_170; __rev0_170 = __builtin_shufflevector(__s0_170, __s0_170, 3, 2, 1, 0); \ - uint8x16_t __rev1_170; __rev1_170 = __builtin_shufflevector(__s1_170, __s1_170, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_170; __rev2_170 = __builtin_shufflevector(__s2_170, __s2_170, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_170; \ -uint8x8_t __reint_170 = __rev2_170; \ -uint32x4_t __reint1_170 = __noswap_splatq_lane_u32(*(uint32x2_t *) &__reint_170, __p3_170); \ - __ret_170 = __noswap_vdotq_u32(__rev0_170, __rev1_170, *(uint8x16_t *) &__reint1_170); \ - __ret_170 = __builtin_shufflevector(__ret_170, __ret_170, 3, 2, 1, 0); \ - __ret_170; \ +#define vdotq_lane_u32(__p0_262, __p1_262, __p2_262, __p3_262) __extension__ ({ \ + uint32x4_t __s0_262 = __p0_262; \ + uint8x16_t __s1_262 = __p1_262; \ + uint8x8_t __s2_262 = __p2_262; \ + uint32x4_t __rev0_262; __rev0_262 = __builtin_shufflevector(__s0_262, __s0_262, 3, 2, 1, 0); \ + uint8x16_t __rev1_262; __rev1_262 = __builtin_shufflevector(__s1_262, __s1_262, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_262; __rev2_262 = __builtin_shufflevector(__s2_262, __s2_262, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_262; \ +uint8x8_t __reint_262 = __rev2_262; \ +uint32x4_t __reint1_262 = __noswap_splatq_lane_u32(*(uint32x2_t *) &__reint_262, __p3_262); \ + __ret_262 = __noswap_vdotq_u32(__rev0_262, __rev1_262, *(uint8x16_t *) &__reint1_262); \ + __ret_262 = __builtin_shufflevector(__ret_262, __ret_262, 3, 2, 1, 0); \ + __ret_262; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_lane_s32(__p0_171, __p1_171, __p2_171, __p3_171) __extension__ ({ \ - int32x4_t __s0_171 = __p0_171; \ - int8x16_t __s1_171 = __p1_171; \ - int8x8_t __s2_171 = __p2_171; \ - int32x4_t __ret_171; \ -int8x8_t __reint_171 = __s2_171; \ -int32x4_t __reint1_171 = splatq_lane_s32(*(int32x2_t *) &__reint_171, __p3_171); \ - __ret_171 = vdotq_s32(__s0_171, __s1_171, *(int8x16_t *) &__reint1_171); \ - __ret_171; \ +#define vdotq_lane_s32(__p0_263, __p1_263, __p2_263, __p3_263) __extension__ ({ \ + int32x4_t __s0_263 = __p0_263; \ + int8x16_t __s1_263 = __p1_263; \ + int8x8_t __s2_263 = __p2_263; \ + int32x4_t __ret_263; \ +int8x8_t __reint_263 = __s2_263; \ +int32x4_t __reint1_263 = splatq_lane_s32(*(int32x2_t *) &__reint_263, __p3_263); \ + __ret_263 = vdotq_s32(__s0_263, __s1_263, *(int8x16_t *) &__reint1_263); \ + __ret_263; \ }) #else -#define vdotq_lane_s32(__p0_172, __p1_172, __p2_172, __p3_172) __extension__ ({ \ - int32x4_t __s0_172 = __p0_172; \ - int8x16_t __s1_172 = __p1_172; \ - int8x8_t __s2_172 = __p2_172; \ - int32x4_t __rev0_172; __rev0_172 = __builtin_shufflevector(__s0_172, __s0_172, 3, 2, 1, 0); \ - int8x16_t __rev1_172; __rev1_172 = __builtin_shufflevector(__s1_172, __s1_172, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_172; __rev2_172 = __builtin_shufflevector(__s2_172, __s2_172, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_172; \ -int8x8_t __reint_172 = __rev2_172; \ -int32x4_t __reint1_172 = __noswap_splatq_lane_s32(*(int32x2_t *) &__reint_172, __p3_172); \ - __ret_172 = __noswap_vdotq_s32(__rev0_172, __rev1_172, *(int8x16_t *) &__reint1_172); \ - __ret_172 = __builtin_shufflevector(__ret_172, __ret_172, 3, 2, 1, 0); \ - __ret_172; \ +#define vdotq_lane_s32(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ + int32x4_t __s0_264 = __p0_264; \ + int8x16_t __s1_264 = __p1_264; \ + int8x8_t __s2_264 = __p2_264; \ + int32x4_t __rev0_264; __rev0_264 = __builtin_shufflevector(__s0_264, __s0_264, 3, 2, 1, 0); \ + int8x16_t __rev1_264; __rev1_264 = __builtin_shufflevector(__s1_264, __s1_264, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_264; __rev2_264 = __builtin_shufflevector(__s2_264, __s2_264, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_264; \ +int8x8_t __reint_264 = __rev2_264; \ +int32x4_t __reint1_264 = __noswap_splatq_lane_s32(*(int32x2_t *) &__reint_264, __p3_264); \ + __ret_264 = __noswap_vdotq_s32(__rev0_264, __rev1_264, *(int8x16_t *) &__reint1_264); \ + __ret_264 = __builtin_shufflevector(__ret_264, __ret_264, 3, 2, 1, 0); \ + __ret_264; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_lane_u32(__p0_173, __p1_173, __p2_173, __p3_173) __extension__ ({ \ - uint32x2_t __s0_173 = __p0_173; \ - uint8x8_t __s1_173 = __p1_173; \ - uint8x8_t __s2_173 = __p2_173; \ - uint32x2_t __ret_173; \ -uint8x8_t __reint_173 = __s2_173; \ -uint32x2_t __reint1_173 = splat_lane_u32(*(uint32x2_t *) &__reint_173, __p3_173); \ - __ret_173 = vdot_u32(__s0_173, __s1_173, *(uint8x8_t *) &__reint1_173); \ - __ret_173; \ +#define vdot_lane_u32(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ + uint32x2_t __s0_265 = __p0_265; \ + uint8x8_t __s1_265 = __p1_265; \ + uint8x8_t __s2_265 = __p2_265; \ + uint32x2_t __ret_265; \ +uint8x8_t __reint_265 = __s2_265; \ +uint32x2_t __reint1_265 = splat_lane_u32(*(uint32x2_t *) &__reint_265, __p3_265); \ + __ret_265 = vdot_u32(__s0_265, __s1_265, *(uint8x8_t *) &__reint1_265); \ + __ret_265; \ }) #else -#define vdot_lane_u32(__p0_174, __p1_174, __p2_174, __p3_174) __extension__ ({ \ - uint32x2_t __s0_174 = __p0_174; \ - uint8x8_t __s1_174 = __p1_174; \ - uint8x8_t __s2_174 = __p2_174; \ - uint32x2_t __rev0_174; __rev0_174 = __builtin_shufflevector(__s0_174, __s0_174, 1, 0); \ - uint8x8_t __rev1_174; __rev1_174 = __builtin_shufflevector(__s1_174, __s1_174, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_174; __rev2_174 = __builtin_shufflevector(__s2_174, __s2_174, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x2_t __ret_174; \ -uint8x8_t __reint_174 = __rev2_174; \ -uint32x2_t __reint1_174 = __noswap_splat_lane_u32(*(uint32x2_t *) &__reint_174, __p3_174); \ - __ret_174 = __noswap_vdot_u32(__rev0_174, __rev1_174, *(uint8x8_t *) &__reint1_174); \ - __ret_174 = __builtin_shufflevector(__ret_174, __ret_174, 1, 0); \ - __ret_174; \ +#define vdot_lane_u32(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ + uint32x2_t __s0_266 = __p0_266; \ + uint8x8_t __s1_266 = __p1_266; \ + uint8x8_t __s2_266 = __p2_266; \ + uint32x2_t __rev0_266; __rev0_266 = __builtin_shufflevector(__s0_266, __s0_266, 1, 0); \ + uint8x8_t __rev1_266; __rev1_266 = __builtin_shufflevector(__s1_266, __s1_266, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_266; __rev2_266 = __builtin_shufflevector(__s2_266, __s2_266, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x2_t __ret_266; \ +uint8x8_t __reint_266 = __rev2_266; \ +uint32x2_t __reint1_266 = __noswap_splat_lane_u32(*(uint32x2_t *) &__reint_266, __p3_266); \ + __ret_266 = __noswap_vdot_u32(__rev0_266, __rev1_266, *(uint8x8_t *) &__reint1_266); \ + __ret_266 = __builtin_shufflevector(__ret_266, __ret_266, 1, 0); \ + __ret_266; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_lane_s32(__p0_175, __p1_175, __p2_175, __p3_175) __extension__ ({ \ - int32x2_t __s0_175 = __p0_175; \ - int8x8_t __s1_175 = __p1_175; \ - int8x8_t __s2_175 = __p2_175; \ - int32x2_t __ret_175; \ -int8x8_t __reint_175 = __s2_175; \ -int32x2_t __reint1_175 = splat_lane_s32(*(int32x2_t *) &__reint_175, __p3_175); \ - __ret_175 = vdot_s32(__s0_175, __s1_175, *(int8x8_t *) &__reint1_175); \ - __ret_175; \ +#define vdot_lane_s32(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ + int32x2_t __s0_267 = __p0_267; \ + int8x8_t __s1_267 = __p1_267; \ + int8x8_t __s2_267 = __p2_267; \ + int32x2_t __ret_267; \ +int8x8_t __reint_267 = __s2_267; \ +int32x2_t __reint1_267 = splat_lane_s32(*(int32x2_t *) &__reint_267, __p3_267); \ + __ret_267 = vdot_s32(__s0_267, __s1_267, *(int8x8_t *) &__reint1_267); \ + __ret_267; \ }) #else -#define vdot_lane_s32(__p0_176, __p1_176, __p2_176, __p3_176) __extension__ ({ \ - int32x2_t __s0_176 = __p0_176; \ - int8x8_t __s1_176 = __p1_176; \ - int8x8_t __s2_176 = __p2_176; \ - int32x2_t __rev0_176; __rev0_176 = __builtin_shufflevector(__s0_176, __s0_176, 1, 0); \ - int8x8_t __rev1_176; __rev1_176 = __builtin_shufflevector(__s1_176, __s1_176, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_176; __rev2_176 = __builtin_shufflevector(__s2_176, __s2_176, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_176; \ -int8x8_t __reint_176 = __rev2_176; \ -int32x2_t __reint1_176 = __noswap_splat_lane_s32(*(int32x2_t *) &__reint_176, __p3_176); \ - __ret_176 = __noswap_vdot_s32(__rev0_176, __rev1_176, *(int8x8_t *) &__reint1_176); \ - __ret_176 = __builtin_shufflevector(__ret_176, __ret_176, 1, 0); \ - __ret_176; \ +#define vdot_lane_s32(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ + int32x2_t __s0_268 = __p0_268; \ + int8x8_t __s1_268 = __p1_268; \ + int8x8_t __s2_268 = __p2_268; \ + int32x2_t __rev0_268; __rev0_268 = __builtin_shufflevector(__s0_268, __s0_268, 1, 0); \ + int8x8_t __rev1_268; __rev1_268 = __builtin_shufflevector(__s1_268, __s1_268, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_268; __rev2_268 = __builtin_shufflevector(__s2_268, __s2_268, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_268; \ +int8x8_t __reint_268 = __rev2_268; \ +int32x2_t __reint1_268 = __noswap_splat_lane_s32(*(int32x2_t *) &__reint_268, __p3_268); \ + __ret_268 = __noswap_vdot_s32(__rev0_268, __rev1_268, *(int8x8_t *) &__reint1_268); \ + __ret_268 = __builtin_shufflevector(__ret_268, __ret_268, 1, 0); \ + __ret_268; \ }) #endif #endif #if defined(__ARM_FEATURE_DOTPROD) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vdotq_laneq_u32(__p0_177, __p1_177, __p2_177, __p3_177) __extension__ ({ \ - uint32x4_t __s0_177 = __p0_177; \ - uint8x16_t __s1_177 = __p1_177; \ - uint8x16_t __s2_177 = __p2_177; \ - uint32x4_t __ret_177; \ -uint8x16_t __reint_177 = __s2_177; \ -uint32x4_t __reint1_177 = splatq_laneq_u32(*(uint32x4_t *) &__reint_177, __p3_177); \ - __ret_177 = vdotq_u32(__s0_177, __s1_177, *(uint8x16_t *) &__reint1_177); \ - __ret_177; \ +#define vdotq_laneq_u32(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ + uint32x4_t __s0_269 = __p0_269; \ + uint8x16_t __s1_269 = __p1_269; \ + uint8x16_t __s2_269 = __p2_269; \ + uint32x4_t __ret_269; \ +uint8x16_t __reint_269 = __s2_269; \ +uint32x4_t __reint1_269 = splatq_laneq_u32(*(uint32x4_t *) &__reint_269, __p3_269); \ + __ret_269 = vdotq_u32(__s0_269, __s1_269, *(uint8x16_t *) &__reint1_269); \ + __ret_269; \ }) #else -#define vdotq_laneq_u32(__p0_178, __p1_178, __p2_178, __p3_178) __extension__ ({ \ - uint32x4_t __s0_178 = __p0_178; \ - uint8x16_t __s1_178 = __p1_178; \ - uint8x16_t __s2_178 = __p2_178; \ - uint32x4_t __rev0_178; __rev0_178 = __builtin_shufflevector(__s0_178, __s0_178, 3, 2, 1, 0); \ - uint8x16_t __rev1_178; __rev1_178 = __builtin_shufflevector(__s1_178, __s1_178, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_178; __rev2_178 = __builtin_shufflevector(__s2_178, __s2_178, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_178; \ -uint8x16_t __reint_178 = __rev2_178; \ -uint32x4_t __reint1_178 = __noswap_splatq_laneq_u32(*(uint32x4_t *) &__reint_178, __p3_178); \ - __ret_178 = __noswap_vdotq_u32(__rev0_178, __rev1_178, *(uint8x16_t *) &__reint1_178); \ - __ret_178 = __builtin_shufflevector(__ret_178, __ret_178, 3, 2, 1, 0); \ - __ret_178; \ +#define vdotq_laneq_u32(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ + uint32x4_t __s0_270 = __p0_270; \ + uint8x16_t __s1_270 = __p1_270; \ + uint8x16_t __s2_270 = __p2_270; \ + uint32x4_t __rev0_270; __rev0_270 = __builtin_shufflevector(__s0_270, __s0_270, 3, 2, 1, 0); \ + uint8x16_t __rev1_270; __rev1_270 = __builtin_shufflevector(__s1_270, __s1_270, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_270; __rev2_270 = __builtin_shufflevector(__s2_270, __s2_270, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_270; \ +uint8x16_t __reint_270 = __rev2_270; \ +uint32x4_t __reint1_270 = __noswap_splatq_laneq_u32(*(uint32x4_t *) &__reint_270, __p3_270); \ + __ret_270 = __noswap_vdotq_u32(__rev0_270, __rev1_270, *(uint8x16_t *) &__reint1_270); \ + __ret_270 = __builtin_shufflevector(__ret_270, __ret_270, 3, 2, 1, 0); \ + __ret_270; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdotq_laneq_s32(__p0_179, __p1_179, __p2_179, __p3_179) __extension__ ({ \ - int32x4_t __s0_179 = __p0_179; \ - int8x16_t __s1_179 = __p1_179; \ - int8x16_t __s2_179 = __p2_179; \ - int32x4_t __ret_179; \ -int8x16_t __reint_179 = __s2_179; \ -int32x4_t __reint1_179 = splatq_laneq_s32(*(int32x4_t *) &__reint_179, __p3_179); \ - __ret_179 = vdotq_s32(__s0_179, __s1_179, *(int8x16_t *) &__reint1_179); \ - __ret_179; \ +#define vdotq_laneq_s32(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ + int32x4_t __s0_271 = __p0_271; \ + int8x16_t __s1_271 = __p1_271; \ + int8x16_t __s2_271 = __p2_271; \ + int32x4_t __ret_271; \ +int8x16_t __reint_271 = __s2_271; \ +int32x4_t __reint1_271 = splatq_laneq_s32(*(int32x4_t *) &__reint_271, __p3_271); \ + __ret_271 = vdotq_s32(__s0_271, __s1_271, *(int8x16_t *) &__reint1_271); \ + __ret_271; \ }) #else -#define vdotq_laneq_s32(__p0_180, __p1_180, __p2_180, __p3_180) __extension__ ({ \ - int32x4_t __s0_180 = __p0_180; \ - int8x16_t __s1_180 = __p1_180; \ - int8x16_t __s2_180 = __p2_180; \ - int32x4_t __rev0_180; __rev0_180 = __builtin_shufflevector(__s0_180, __s0_180, 3, 2, 1, 0); \ - int8x16_t __rev1_180; __rev1_180 = __builtin_shufflevector(__s1_180, __s1_180, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_180; __rev2_180 = __builtin_shufflevector(__s2_180, __s2_180, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_180; \ -int8x16_t __reint_180 = __rev2_180; \ -int32x4_t __reint1_180 = __noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_180, __p3_180); \ - __ret_180 = __noswap_vdotq_s32(__rev0_180, __rev1_180, *(int8x16_t *) &__reint1_180); \ - __ret_180 = __builtin_shufflevector(__ret_180, __ret_180, 3, 2, 1, 0); \ - __ret_180; \ +#define vdotq_laneq_s32(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ + int32x4_t __s0_272 = __p0_272; \ + int8x16_t __s1_272 = __p1_272; \ + int8x16_t __s2_272 = __p2_272; \ + int32x4_t __rev0_272; __rev0_272 = __builtin_shufflevector(__s0_272, __s0_272, 3, 2, 1, 0); \ + int8x16_t __rev1_272; __rev1_272 = __builtin_shufflevector(__s1_272, __s1_272, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_272; __rev2_272 = __builtin_shufflevector(__s2_272, __s2_272, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_272; \ +int8x16_t __reint_272 = __rev2_272; \ +int32x4_t __reint1_272 = __noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_272, __p3_272); \ + __ret_272 = __noswap_vdotq_s32(__rev0_272, __rev1_272, *(int8x16_t *) &__reint1_272); \ + __ret_272 = __builtin_shufflevector(__ret_272, __ret_272, 3, 2, 1, 0); \ + __ret_272; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_laneq_u32(__p0_181, __p1_181, __p2_181, __p3_181) __extension__ ({ \ - uint32x2_t __s0_181 = __p0_181; \ - uint8x8_t __s1_181 = __p1_181; \ - uint8x16_t __s2_181 = __p2_181; \ - uint32x2_t __ret_181; \ -uint8x16_t __reint_181 = __s2_181; \ -uint32x2_t __reint1_181 = splat_laneq_u32(*(uint32x4_t *) &__reint_181, __p3_181); \ - __ret_181 = vdot_u32(__s0_181, __s1_181, *(uint8x8_t *) &__reint1_181); \ - __ret_181; \ +#define vdot_laneq_u32(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ + uint32x2_t __s0_273 = __p0_273; \ + uint8x8_t __s1_273 = __p1_273; \ + uint8x16_t __s2_273 = __p2_273; \ + uint32x2_t __ret_273; \ +uint8x16_t __reint_273 = __s2_273; \ +uint32x2_t __reint1_273 = splat_laneq_u32(*(uint32x4_t *) &__reint_273, __p3_273); \ + __ret_273 = vdot_u32(__s0_273, __s1_273, *(uint8x8_t *) &__reint1_273); \ + __ret_273; \ }) #else -#define vdot_laneq_u32(__p0_182, __p1_182, __p2_182, __p3_182) __extension__ ({ \ - uint32x2_t __s0_182 = __p0_182; \ - uint8x8_t __s1_182 = __p1_182; \ - uint8x16_t __s2_182 = __p2_182; \ - uint32x2_t __rev0_182; __rev0_182 = __builtin_shufflevector(__s0_182, __s0_182, 1, 0); \ - uint8x8_t __rev1_182; __rev1_182 = __builtin_shufflevector(__s1_182, __s1_182, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_182; __rev2_182 = __builtin_shufflevector(__s2_182, __s2_182, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x2_t __ret_182; \ -uint8x16_t __reint_182 = __rev2_182; \ -uint32x2_t __reint1_182 = __noswap_splat_laneq_u32(*(uint32x4_t *) &__reint_182, __p3_182); \ - __ret_182 = __noswap_vdot_u32(__rev0_182, __rev1_182, *(uint8x8_t *) &__reint1_182); \ - __ret_182 = __builtin_shufflevector(__ret_182, __ret_182, 1, 0); \ - __ret_182; \ +#define vdot_laneq_u32(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ + uint32x2_t __s0_274 = __p0_274; \ + uint8x8_t __s1_274 = __p1_274; \ + uint8x16_t __s2_274 = __p2_274; \ + uint32x2_t __rev0_274; __rev0_274 = __builtin_shufflevector(__s0_274, __s0_274, 1, 0); \ + uint8x8_t __rev1_274; __rev1_274 = __builtin_shufflevector(__s1_274, __s1_274, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_274; __rev2_274 = __builtin_shufflevector(__s2_274, __s2_274, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x2_t __ret_274; \ +uint8x16_t __reint_274 = __rev2_274; \ +uint32x2_t __reint1_274 = __noswap_splat_laneq_u32(*(uint32x4_t *) &__reint_274, __p3_274); \ + __ret_274 = __noswap_vdot_u32(__rev0_274, __rev1_274, *(uint8x8_t *) &__reint1_274); \ + __ret_274 = __builtin_shufflevector(__ret_274, __ret_274, 1, 0); \ + __ret_274; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdot_laneq_s32(__p0_183, __p1_183, __p2_183, __p3_183) __extension__ ({ \ - int32x2_t __s0_183 = __p0_183; \ - int8x8_t __s1_183 = __p1_183; \ - int8x16_t __s2_183 = __p2_183; \ - int32x2_t __ret_183; \ -int8x16_t __reint_183 = __s2_183; \ -int32x2_t __reint1_183 = splat_laneq_s32(*(int32x4_t *) &__reint_183, __p3_183); \ - __ret_183 = vdot_s32(__s0_183, __s1_183, *(int8x8_t *) &__reint1_183); \ - __ret_183; \ +#define vdot_laneq_s32(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ + int32x2_t __s0_275 = __p0_275; \ + int8x8_t __s1_275 = __p1_275; \ + int8x16_t __s2_275 = __p2_275; \ + int32x2_t __ret_275; \ +int8x16_t __reint_275 = __s2_275; \ +int32x2_t __reint1_275 = splat_laneq_s32(*(int32x4_t *) &__reint_275, __p3_275); \ + __ret_275 = vdot_s32(__s0_275, __s1_275, *(int8x8_t *) &__reint1_275); \ + __ret_275; \ }) #else -#define vdot_laneq_s32(__p0_184, __p1_184, __p2_184, __p3_184) __extension__ ({ \ - int32x2_t __s0_184 = __p0_184; \ - int8x8_t __s1_184 = __p1_184; \ - int8x16_t __s2_184 = __p2_184; \ - int32x2_t __rev0_184; __rev0_184 = __builtin_shufflevector(__s0_184, __s0_184, 1, 0); \ - int8x8_t __rev1_184; __rev1_184 = __builtin_shufflevector(__s1_184, __s1_184, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_184; __rev2_184 = __builtin_shufflevector(__s2_184, __s2_184, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_184; \ -int8x16_t __reint_184 = __rev2_184; \ -int32x2_t __reint1_184 = __noswap_splat_laneq_s32(*(int32x4_t *) &__reint_184, __p3_184); \ - __ret_184 = __noswap_vdot_s32(__rev0_184, __rev1_184, *(int8x8_t *) &__reint1_184); \ - __ret_184 = __builtin_shufflevector(__ret_184, __ret_184, 1, 0); \ - __ret_184; \ +#define vdot_laneq_s32(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ + int32x2_t __s0_276 = __p0_276; \ + int8x8_t __s1_276 = __p1_276; \ + int8x16_t __s2_276 = __p2_276; \ + int32x2_t __rev0_276; __rev0_276 = __builtin_shufflevector(__s0_276, __s0_276, 1, 0); \ + int8x8_t __rev1_276; __rev1_276 = __builtin_shufflevector(__s1_276, __s1_276, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_276; __rev2_276 = __builtin_shufflevector(__s2_276, __s2_276, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_276; \ +int8x16_t __reint_276 = __rev2_276; \ +int32x2_t __reint1_276 = __noswap_splat_laneq_s32(*(int32x4_t *) &__reint_276, __p3_276); \ + __ret_276 = __noswap_vdot_s32(__rev0_276, __rev1_276, *(int8x8_t *) &__reint1_276); \ + __ret_276 = __builtin_shufflevector(__ret_276, __ret_276, 1, 0); \ + __ret_276; \ }) #endif @@ -42518,44 +44254,44 @@ __ai float16x4_t vmul_f16(float16x4_t __p0, float16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f16(__p0_185, __p1_185, __p2_185) __extension__ ({ \ - float16x8_t __s0_185 = __p0_185; \ - float16x4_t __s1_185 = __p1_185; \ - float16x8_t __ret_185; \ - __ret_185 = __s0_185 * splatq_lane_f16(__s1_185, __p2_185); \ - __ret_185; \ +#define vmulq_lane_f16(__p0_277, __p1_277, __p2_277) __extension__ ({ \ + float16x8_t __s0_277 = __p0_277; \ + float16x4_t __s1_277 = __p1_277; \ + float16x8_t __ret_277; \ + __ret_277 = __s0_277 * splatq_lane_f16(__s1_277, __p2_277); \ + __ret_277; \ }) #else -#define vmulq_lane_f16(__p0_186, __p1_186, __p2_186) __extension__ ({ \ - float16x8_t __s0_186 = __p0_186; \ - float16x4_t __s1_186 = __p1_186; \ - float16x8_t __rev0_186; __rev0_186 = __builtin_shufflevector(__s0_186, __s0_186, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1_186; __rev1_186 = __builtin_shufflevector(__s1_186, __s1_186, 3, 2, 1, 0); \ - float16x8_t __ret_186; \ - __ret_186 = __rev0_186 * __noswap_splatq_lane_f16(__rev1_186, __p2_186); \ - __ret_186 = __builtin_shufflevector(__ret_186, __ret_186, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_186; \ +#define vmulq_lane_f16(__p0_278, __p1_278, __p2_278) __extension__ ({ \ + float16x8_t __s0_278 = __p0_278; \ + float16x4_t __s1_278 = __p1_278; \ + float16x8_t __rev0_278; __rev0_278 = __builtin_shufflevector(__s0_278, __s0_278, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev1_278; __rev1_278 = __builtin_shufflevector(__s1_278, __s1_278, 3, 2, 1, 0); \ + float16x8_t __ret_278; \ + __ret_278 = __rev0_278 * __noswap_splatq_lane_f16(__rev1_278, __p2_278); \ + __ret_278 = __builtin_shufflevector(__ret_278, __ret_278, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_278; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_lane_f16(__p0_187, __p1_187, __p2_187) __extension__ ({ \ - float16x4_t __s0_187 = __p0_187; \ - float16x4_t __s1_187 = __p1_187; \ - float16x4_t __ret_187; \ - __ret_187 = __s0_187 * splat_lane_f16(__s1_187, __p2_187); \ - __ret_187; \ +#define vmul_lane_f16(__p0_279, __p1_279, __p2_279) __extension__ ({ \ + float16x4_t __s0_279 = __p0_279; \ + float16x4_t __s1_279 = __p1_279; \ + float16x4_t __ret_279; \ + __ret_279 = __s0_279 * splat_lane_f16(__s1_279, __p2_279); \ + __ret_279; \ }) #else -#define vmul_lane_f16(__p0_188, __p1_188, __p2_188) __extension__ ({ \ - float16x4_t __s0_188 = __p0_188; \ - float16x4_t __s1_188 = __p1_188; \ - float16x4_t __rev0_188; __rev0_188 = __builtin_shufflevector(__s0_188, __s0_188, 3, 2, 1, 0); \ - float16x4_t __rev1_188; __rev1_188 = __builtin_shufflevector(__s1_188, __s1_188, 3, 2, 1, 0); \ - float16x4_t __ret_188; \ - __ret_188 = __rev0_188 * __noswap_splat_lane_f16(__rev1_188, __p2_188); \ - __ret_188 = __builtin_shufflevector(__ret_188, __ret_188, 3, 2, 1, 0); \ - __ret_188; \ +#define vmul_lane_f16(__p0_280, __p1_280, __p2_280) __extension__ ({ \ + float16x4_t __s0_280 = __p0_280; \ + float16x4_t __s1_280 = __p1_280; \ + float16x4_t __rev0_280; __rev0_280 = __builtin_shufflevector(__s0_280, __s0_280, 3, 2, 1, 0); \ + float16x4_t __rev1_280; __rev1_280 = __builtin_shufflevector(__s1_280, __s1_280, 3, 2, 1, 0); \ + float16x4_t __ret_280; \ + __ret_280 = __rev0_280 * __noswap_splat_lane_f16(__rev1_280, __p2_280); \ + __ret_280 = __builtin_shufflevector(__ret_280, __ret_280, 3, 2, 1, 0); \ + __ret_280; \ }) #endif @@ -43297,140 +45033,140 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsh_lane_f16(__p0_189, __p1_189, __p2_189, __p3_189) __extension__ ({ \ - float16_t __s0_189 = __p0_189; \ - float16_t __s1_189 = __p1_189; \ - float16x4_t __s2_189 = __p2_189; \ - float16_t __ret_189; \ - __ret_189 = vfmah_lane_f16(__s0_189, -__s1_189, __s2_189, __p3_189); \ - __ret_189; \ +#define vfmsh_lane_f16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ + float16_t __s0_281 = __p0_281; \ + float16_t __s1_281 = __p1_281; \ + float16x4_t __s2_281 = __p2_281; \ + float16_t __ret_281; \ + __ret_281 = vfmah_lane_f16(__s0_281, -__s1_281, __s2_281, __p3_281); \ + __ret_281; \ }) #else -#define vfmsh_lane_f16(__p0_190, __p1_190, __p2_190, __p3_190) __extension__ ({ \ - float16_t __s0_190 = __p0_190; \ - float16_t __s1_190 = __p1_190; \ - float16x4_t __s2_190 = __p2_190; \ - float16x4_t __rev2_190; __rev2_190 = __builtin_shufflevector(__s2_190, __s2_190, 3, 2, 1, 0); \ - float16_t __ret_190; \ - __ret_190 = __noswap_vfmah_lane_f16(__s0_190, -__s1_190, __rev2_190, __p3_190); \ - __ret_190; \ +#define vfmsh_lane_f16(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ + float16_t __s0_282 = __p0_282; \ + float16_t __s1_282 = __p1_282; \ + float16x4_t __s2_282 = __p2_282; \ + float16x4_t __rev2_282; __rev2_282 = __builtin_shufflevector(__s2_282, __s2_282, 3, 2, 1, 0); \ + float16_t __ret_282; \ + __ret_282 = __noswap_vfmah_lane_f16(__s0_282, -__s1_282, __rev2_282, __p3_282); \ + __ret_282; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f16(__p0_191, __p1_191, __p2_191, __p3_191) __extension__ ({ \ - float16x8_t __s0_191 = __p0_191; \ - float16x8_t __s1_191 = __p1_191; \ - float16x4_t __s2_191 = __p2_191; \ - float16x8_t __ret_191; \ - __ret_191 = vfmaq_lane_f16(__s0_191, -__s1_191, __s2_191, __p3_191); \ - __ret_191; \ +#define vfmsq_lane_f16(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ + float16x8_t __s0_283 = __p0_283; \ + float16x8_t __s1_283 = __p1_283; \ + float16x4_t __s2_283 = __p2_283; \ + float16x8_t __ret_283; \ + __ret_283 = vfmaq_lane_f16(__s0_283, -__s1_283, __s2_283, __p3_283); \ + __ret_283; \ }) #else -#define vfmsq_lane_f16(__p0_192, __p1_192, __p2_192, __p3_192) __extension__ ({ \ - float16x8_t __s0_192 = __p0_192; \ - float16x8_t __s1_192 = __p1_192; \ - float16x4_t __s2_192 = __p2_192; \ - float16x8_t __rev0_192; __rev0_192 = __builtin_shufflevector(__s0_192, __s0_192, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_192; __rev1_192 = __builtin_shufflevector(__s1_192, __s1_192, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_192; __rev2_192 = __builtin_shufflevector(__s2_192, __s2_192, 3, 2, 1, 0); \ - float16x8_t __ret_192; \ - __ret_192 = __noswap_vfmaq_lane_f16(__rev0_192, -__rev1_192, __rev2_192, __p3_192); \ - __ret_192 = __builtin_shufflevector(__ret_192, __ret_192, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_192; \ +#define vfmsq_lane_f16(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ + float16x8_t __s0_284 = __p0_284; \ + float16x8_t __s1_284 = __p1_284; \ + float16x4_t __s2_284 = __p2_284; \ + float16x8_t __rev0_284; __rev0_284 = __builtin_shufflevector(__s0_284, __s0_284, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_284; __rev1_284 = __builtin_shufflevector(__s1_284, __s1_284, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_284; __rev2_284 = __builtin_shufflevector(__s2_284, __s2_284, 3, 2, 1, 0); \ + float16x8_t __ret_284; \ + __ret_284 = __noswap_vfmaq_lane_f16(__rev0_284, -__rev1_284, __rev2_284, __p3_284); \ + __ret_284 = __builtin_shufflevector(__ret_284, __ret_284, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_284; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f16(__p0_193, __p1_193, __p2_193, __p3_193) __extension__ ({ \ - float16x4_t __s0_193 = __p0_193; \ - float16x4_t __s1_193 = __p1_193; \ - float16x4_t __s2_193 = __p2_193; \ - float16x4_t __ret_193; \ - __ret_193 = vfma_lane_f16(__s0_193, -__s1_193, __s2_193, __p3_193); \ - __ret_193; \ +#define vfms_lane_f16(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ + float16x4_t __s0_285 = __p0_285; \ + float16x4_t __s1_285 = __p1_285; \ + float16x4_t __s2_285 = __p2_285; \ + float16x4_t __ret_285; \ + __ret_285 = vfma_lane_f16(__s0_285, -__s1_285, __s2_285, __p3_285); \ + __ret_285; \ }) #else -#define vfms_lane_f16(__p0_194, __p1_194, __p2_194, __p3_194) __extension__ ({ \ - float16x4_t __s0_194 = __p0_194; \ - float16x4_t __s1_194 = __p1_194; \ - float16x4_t __s2_194 = __p2_194; \ - float16x4_t __rev0_194; __rev0_194 = __builtin_shufflevector(__s0_194, __s0_194, 3, 2, 1, 0); \ - float16x4_t __rev1_194; __rev1_194 = __builtin_shufflevector(__s1_194, __s1_194, 3, 2, 1, 0); \ - float16x4_t __rev2_194; __rev2_194 = __builtin_shufflevector(__s2_194, __s2_194, 3, 2, 1, 0); \ - float16x4_t __ret_194; \ - __ret_194 = __noswap_vfma_lane_f16(__rev0_194, -__rev1_194, __rev2_194, __p3_194); \ - __ret_194 = __builtin_shufflevector(__ret_194, __ret_194, 3, 2, 1, 0); \ - __ret_194; \ +#define vfms_lane_f16(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ + float16x4_t __s0_286 = __p0_286; \ + float16x4_t __s1_286 = __p1_286; \ + float16x4_t __s2_286 = __p2_286; \ + float16x4_t __rev0_286; __rev0_286 = __builtin_shufflevector(__s0_286, __s0_286, 3, 2, 1, 0); \ + float16x4_t __rev1_286; __rev1_286 = __builtin_shufflevector(__s1_286, __s1_286, 3, 2, 1, 0); \ + float16x4_t __rev2_286; __rev2_286 = __builtin_shufflevector(__s2_286, __s2_286, 3, 2, 1, 0); \ + float16x4_t __ret_286; \ + __ret_286 = __noswap_vfma_lane_f16(__rev0_286, -__rev1_286, __rev2_286, __p3_286); \ + __ret_286 = __builtin_shufflevector(__ret_286, __ret_286, 3, 2, 1, 0); \ + __ret_286; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsh_laneq_f16(__p0_195, __p1_195, __p2_195, __p3_195) __extension__ ({ \ - float16_t __s0_195 = __p0_195; \ - float16_t __s1_195 = __p1_195; \ - float16x8_t __s2_195 = __p2_195; \ - float16_t __ret_195; \ - __ret_195 = vfmah_laneq_f16(__s0_195, -__s1_195, __s2_195, __p3_195); \ - __ret_195; \ +#define vfmsh_laneq_f16(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ + float16_t __s0_287 = __p0_287; \ + float16_t __s1_287 = __p1_287; \ + float16x8_t __s2_287 = __p2_287; \ + float16_t __ret_287; \ + __ret_287 = vfmah_laneq_f16(__s0_287, -__s1_287, __s2_287, __p3_287); \ + __ret_287; \ }) #else -#define vfmsh_laneq_f16(__p0_196, __p1_196, __p2_196, __p3_196) __extension__ ({ \ - float16_t __s0_196 = __p0_196; \ - float16_t __s1_196 = __p1_196; \ - float16x8_t __s2_196 = __p2_196; \ - float16x8_t __rev2_196; __rev2_196 = __builtin_shufflevector(__s2_196, __s2_196, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_196; \ - __ret_196 = __noswap_vfmah_laneq_f16(__s0_196, -__s1_196, __rev2_196, __p3_196); \ - __ret_196; \ +#define vfmsh_laneq_f16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ + float16_t __s0_288 = __p0_288; \ + float16_t __s1_288 = __p1_288; \ + float16x8_t __s2_288 = __p2_288; \ + float16x8_t __rev2_288; __rev2_288 = __builtin_shufflevector(__s2_288, __s2_288, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_288; \ + __ret_288 = __noswap_vfmah_laneq_f16(__s0_288, -__s1_288, __rev2_288, __p3_288); \ + __ret_288; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f16(__p0_197, __p1_197, __p2_197, __p3_197) __extension__ ({ \ - float16x8_t __s0_197 = __p0_197; \ - float16x8_t __s1_197 = __p1_197; \ - float16x8_t __s2_197 = __p2_197; \ - float16x8_t __ret_197; \ - __ret_197 = vfmaq_laneq_f16(__s0_197, -__s1_197, __s2_197, __p3_197); \ - __ret_197; \ +#define vfmsq_laneq_f16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ + float16x8_t __s0_289 = __p0_289; \ + float16x8_t __s1_289 = __p1_289; \ + float16x8_t __s2_289 = __p2_289; \ + float16x8_t __ret_289; \ + __ret_289 = vfmaq_laneq_f16(__s0_289, -__s1_289, __s2_289, __p3_289); \ + __ret_289; \ }) #else -#define vfmsq_laneq_f16(__p0_198, __p1_198, __p2_198, __p3_198) __extension__ ({ \ - float16x8_t __s0_198 = __p0_198; \ - float16x8_t __s1_198 = __p1_198; \ - float16x8_t __s2_198 = __p2_198; \ - float16x8_t __rev0_198; __rev0_198 = __builtin_shufflevector(__s0_198, __s0_198, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_198; __rev1_198 = __builtin_shufflevector(__s1_198, __s1_198, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_198; __rev2_198 = __builtin_shufflevector(__s2_198, __s2_198, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_198; \ - __ret_198 = __noswap_vfmaq_laneq_f16(__rev0_198, -__rev1_198, __rev2_198, __p3_198); \ - __ret_198 = __builtin_shufflevector(__ret_198, __ret_198, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_198; \ +#define vfmsq_laneq_f16(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ + float16x8_t __s0_290 = __p0_290; \ + float16x8_t __s1_290 = __p1_290; \ + float16x8_t __s2_290 = __p2_290; \ + float16x8_t __rev0_290; __rev0_290 = __builtin_shufflevector(__s0_290, __s0_290, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_290; __rev1_290 = __builtin_shufflevector(__s1_290, __s1_290, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_290; __rev2_290 = __builtin_shufflevector(__s2_290, __s2_290, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_290; \ + __ret_290 = __noswap_vfmaq_laneq_f16(__rev0_290, -__rev1_290, __rev2_290, __p3_290); \ + __ret_290 = __builtin_shufflevector(__ret_290, __ret_290, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_290; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f16(__p0_199, __p1_199, __p2_199, __p3_199) __extension__ ({ \ - float16x4_t __s0_199 = __p0_199; \ - float16x4_t __s1_199 = __p1_199; \ - float16x8_t __s2_199 = __p2_199; \ - float16x4_t __ret_199; \ - __ret_199 = vfma_laneq_f16(__s0_199, -__s1_199, __s2_199, __p3_199); \ - __ret_199; \ +#define vfms_laneq_f16(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ + float16x4_t __s0_291 = __p0_291; \ + float16x4_t __s1_291 = __p1_291; \ + float16x8_t __s2_291 = __p2_291; \ + float16x4_t __ret_291; \ + __ret_291 = vfma_laneq_f16(__s0_291, -__s1_291, __s2_291, __p3_291); \ + __ret_291; \ }) #else -#define vfms_laneq_f16(__p0_200, __p1_200, __p2_200, __p3_200) __extension__ ({ \ - float16x4_t __s0_200 = __p0_200; \ - float16x4_t __s1_200 = __p1_200; \ - float16x8_t __s2_200 = __p2_200; \ - float16x4_t __rev0_200; __rev0_200 = __builtin_shufflevector(__s0_200, __s0_200, 3, 2, 1, 0); \ - float16x4_t __rev1_200; __rev1_200 = __builtin_shufflevector(__s1_200, __s1_200, 3, 2, 1, 0); \ - float16x8_t __rev2_200; __rev2_200 = __builtin_shufflevector(__s2_200, __s2_200, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret_200; \ - __ret_200 = __noswap_vfma_laneq_f16(__rev0_200, -__rev1_200, __rev2_200, __p3_200); \ - __ret_200 = __builtin_shufflevector(__ret_200, __ret_200, 3, 2, 1, 0); \ - __ret_200; \ +#define vfms_laneq_f16(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ + float16x4_t __s0_292 = __p0_292; \ + float16x4_t __s1_292 = __p1_292; \ + float16x8_t __s2_292 = __p2_292; \ + float16x4_t __rev0_292; __rev0_292 = __builtin_shufflevector(__s0_292, __s0_292, 3, 2, 1, 0); \ + float16x4_t __rev1_292; __rev1_292 = __builtin_shufflevector(__s1_292, __s1_292, 3, 2, 1, 0); \ + float16x8_t __rev2_292; __rev2_292 = __builtin_shufflevector(__s2_292, __s2_292, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_292; \ + __ret_292 = __noswap_vfma_laneq_f16(__rev0_292, -__rev1_292, __rev2_292, __p3_292); \ + __ret_292 = __builtin_shufflevector(__ret_292, __ret_292, 3, 2, 1, 0); \ + __ret_292; \ }) #endif @@ -43617,44 +45353,44 @@ __ai float16x4_t vdiv_f16(float16x4_t __p0, float16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f16(__p0_201, __p1_201, __p2_201) __extension__ ({ \ - float16x8_t __s0_201 = __p0_201; \ - float16x8_t __s1_201 = __p1_201; \ - float16x8_t __ret_201; \ - __ret_201 = __s0_201 * splatq_laneq_f16(__s1_201, __p2_201); \ - __ret_201; \ +#define vmulq_laneq_f16(__p0_293, __p1_293, __p2_293) __extension__ ({ \ + float16x8_t __s0_293 = __p0_293; \ + float16x8_t __s1_293 = __p1_293; \ + float16x8_t __ret_293; \ + __ret_293 = __s0_293 * splatq_laneq_f16(__s1_293, __p2_293); \ + __ret_293; \ }) #else -#define vmulq_laneq_f16(__p0_202, __p1_202, __p2_202) __extension__ ({ \ - float16x8_t __s0_202 = __p0_202; \ - float16x8_t __s1_202 = __p1_202; \ - float16x8_t __rev0_202; __rev0_202 = __builtin_shufflevector(__s0_202, __s0_202, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_202; __rev1_202 = __builtin_shufflevector(__s1_202, __s1_202, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_202; \ - __ret_202 = __rev0_202 * __noswap_splatq_laneq_f16(__rev1_202, __p2_202); \ - __ret_202 = __builtin_shufflevector(__ret_202, __ret_202, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_202; \ +#define vmulq_laneq_f16(__p0_294, __p1_294, __p2_294) __extension__ ({ \ + float16x8_t __s0_294 = __p0_294; \ + float16x8_t __s1_294 = __p1_294; \ + float16x8_t __rev0_294; __rev0_294 = __builtin_shufflevector(__s0_294, __s0_294, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_294; __rev1_294 = __builtin_shufflevector(__s1_294, __s1_294, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_294; \ + __ret_294 = __rev0_294 * __noswap_splatq_laneq_f16(__rev1_294, __p2_294); \ + __ret_294 = __builtin_shufflevector(__ret_294, __ret_294, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_294; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f16(__p0_203, __p1_203, __p2_203) __extension__ ({ \ - float16x4_t __s0_203 = __p0_203; \ - float16x8_t __s1_203 = __p1_203; \ - float16x4_t __ret_203; \ - __ret_203 = __s0_203 * splat_laneq_f16(__s1_203, __p2_203); \ - __ret_203; \ +#define vmul_laneq_f16(__p0_295, __p1_295, __p2_295) __extension__ ({ \ + float16x4_t __s0_295 = __p0_295; \ + float16x8_t __s1_295 = __p1_295; \ + float16x4_t __ret_295; \ + __ret_295 = __s0_295 * splat_laneq_f16(__s1_295, __p2_295); \ + __ret_295; \ }) #else -#define vmul_laneq_f16(__p0_204, __p1_204, __p2_204) __extension__ ({ \ - float16x4_t __s0_204 = __p0_204; \ - float16x8_t __s1_204 = __p1_204; \ - float16x4_t __rev0_204; __rev0_204 = __builtin_shufflevector(__s0_204, __s0_204, 3, 2, 1, 0); \ - float16x8_t __rev1_204; __rev1_204 = __builtin_shufflevector(__s1_204, __s1_204, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret_204; \ - __ret_204 = __rev0_204 * __noswap_splat_laneq_f16(__rev1_204, __p2_204); \ - __ret_204 = __builtin_shufflevector(__ret_204, __ret_204, 3, 2, 1, 0); \ - __ret_204; \ +#define vmul_laneq_f16(__p0_296, __p1_296, __p2_296) __extension__ ({ \ + float16x4_t __s0_296 = __p0_296; \ + float16x8_t __s1_296 = __p1_296; \ + float16x4_t __rev0_296; __rev0_296 = __builtin_shufflevector(__s0_296, __s0_296, 3, 2, 1, 0); \ + float16x8_t __rev1_296; __rev1_296 = __builtin_shufflevector(__s1_296, __s1_296, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_296; \ + __ret_296 = __rev0_296 * __noswap_splat_laneq_f16(__rev1_296, __p2_296); \ + __ret_296 = __builtin_shufflevector(__ret_296, __ret_296, 3, 2, 1, 0); \ + __ret_296; \ }) #endif @@ -43722,44 +45458,44 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f16(__p0_205, __p1_205, __p2_205) __extension__ ({ \ - float16x8_t __s0_205 = __p0_205; \ - float16x4_t __s1_205 = __p1_205; \ - float16x8_t __ret_205; \ - __ret_205 = vmulxq_f16(__s0_205, splatq_lane_f16(__s1_205, __p2_205)); \ - __ret_205; \ +#define vmulxq_lane_f16(__p0_297, __p1_297, __p2_297) __extension__ ({ \ + float16x8_t __s0_297 = __p0_297; \ + float16x4_t __s1_297 = __p1_297; \ + float16x8_t __ret_297; \ + __ret_297 = vmulxq_f16(__s0_297, splatq_lane_f16(__s1_297, __p2_297)); \ + __ret_297; \ }) #else -#define vmulxq_lane_f16(__p0_206, __p1_206, __p2_206) __extension__ ({ \ - float16x8_t __s0_206 = __p0_206; \ - float16x4_t __s1_206 = __p1_206; \ - float16x8_t __rev0_206; __rev0_206 = __builtin_shufflevector(__s0_206, __s0_206, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev1_206; __rev1_206 = __builtin_shufflevector(__s1_206, __s1_206, 3, 2, 1, 0); \ - float16x8_t __ret_206; \ - __ret_206 = __noswap_vmulxq_f16(__rev0_206, __noswap_splatq_lane_f16(__rev1_206, __p2_206)); \ - __ret_206 = __builtin_shufflevector(__ret_206, __ret_206, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_206; \ +#define vmulxq_lane_f16(__p0_298, __p1_298, __p2_298) __extension__ ({ \ + float16x8_t __s0_298 = __p0_298; \ + float16x4_t __s1_298 = __p1_298; \ + float16x8_t __rev0_298; __rev0_298 = __builtin_shufflevector(__s0_298, __s0_298, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev1_298; __rev1_298 = __builtin_shufflevector(__s1_298, __s1_298, 3, 2, 1, 0); \ + float16x8_t __ret_298; \ + __ret_298 = __noswap_vmulxq_f16(__rev0_298, __noswap_splatq_lane_f16(__rev1_298, __p2_298)); \ + __ret_298 = __builtin_shufflevector(__ret_298, __ret_298, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_298; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f16(__p0_207, __p1_207, __p2_207) __extension__ ({ \ - float16x4_t __s0_207 = __p0_207; \ - float16x4_t __s1_207 = __p1_207; \ - float16x4_t __ret_207; \ - __ret_207 = vmulx_f16(__s0_207, splat_lane_f16(__s1_207, __p2_207)); \ - __ret_207; \ +#define vmulx_lane_f16(__p0_299, __p1_299, __p2_299) __extension__ ({ \ + float16x4_t __s0_299 = __p0_299; \ + float16x4_t __s1_299 = __p1_299; \ + float16x4_t __ret_299; \ + __ret_299 = vmulx_f16(__s0_299, splat_lane_f16(__s1_299, __p2_299)); \ + __ret_299; \ }) #else -#define vmulx_lane_f16(__p0_208, __p1_208, __p2_208) __extension__ ({ \ - float16x4_t __s0_208 = __p0_208; \ - float16x4_t __s1_208 = __p1_208; \ - float16x4_t __rev0_208; __rev0_208 = __builtin_shufflevector(__s0_208, __s0_208, 3, 2, 1, 0); \ - float16x4_t __rev1_208; __rev1_208 = __builtin_shufflevector(__s1_208, __s1_208, 3, 2, 1, 0); \ - float16x4_t __ret_208; \ - __ret_208 = __noswap_vmulx_f16(__rev0_208, __noswap_splat_lane_f16(__rev1_208, __p2_208)); \ - __ret_208 = __builtin_shufflevector(__ret_208, __ret_208, 3, 2, 1, 0); \ - __ret_208; \ +#define vmulx_lane_f16(__p0_300, __p1_300, __p2_300) __extension__ ({ \ + float16x4_t __s0_300 = __p0_300; \ + float16x4_t __s1_300 = __p1_300; \ + float16x4_t __rev0_300; __rev0_300 = __builtin_shufflevector(__s0_300, __s0_300, 3, 2, 1, 0); \ + float16x4_t __rev1_300; __rev1_300 = __builtin_shufflevector(__s1_300, __s1_300, 3, 2, 1, 0); \ + float16x4_t __ret_300; \ + __ret_300 = __noswap_vmulx_f16(__rev0_300, __noswap_splat_lane_f16(__rev1_300, __p2_300)); \ + __ret_300 = __builtin_shufflevector(__ret_300, __ret_300, 3, 2, 1, 0); \ + __ret_300; \ }) #endif @@ -43783,44 +45519,44 @@ __ai float16x4_t __noswap_vmulx_f16(float16x4_t __p0, float16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f16(__p0_209, __p1_209, __p2_209) __extension__ ({ \ - float16x8_t __s0_209 = __p0_209; \ - float16x8_t __s1_209 = __p1_209; \ - float16x8_t __ret_209; \ - __ret_209 = vmulxq_f16(__s0_209, splatq_laneq_f16(__s1_209, __p2_209)); \ - __ret_209; \ +#define vmulxq_laneq_f16(__p0_301, __p1_301, __p2_301) __extension__ ({ \ + float16x8_t __s0_301 = __p0_301; \ + float16x8_t __s1_301 = __p1_301; \ + float16x8_t __ret_301; \ + __ret_301 = vmulxq_f16(__s0_301, splatq_laneq_f16(__s1_301, __p2_301)); \ + __ret_301; \ }) #else -#define vmulxq_laneq_f16(__p0_210, __p1_210, __p2_210) __extension__ ({ \ - float16x8_t __s0_210 = __p0_210; \ - float16x8_t __s1_210 = __p1_210; \ - float16x8_t __rev0_210; __rev0_210 = __builtin_shufflevector(__s0_210, __s0_210, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev1_210; __rev1_210 = __builtin_shufflevector(__s1_210, __s1_210, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_210; \ - __ret_210 = __noswap_vmulxq_f16(__rev0_210, __noswap_splatq_laneq_f16(__rev1_210, __p2_210)); \ - __ret_210 = __builtin_shufflevector(__ret_210, __ret_210, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_210; \ +#define vmulxq_laneq_f16(__p0_302, __p1_302, __p2_302) __extension__ ({ \ + float16x8_t __s0_302 = __p0_302; \ + float16x8_t __s1_302 = __p1_302; \ + float16x8_t __rev0_302; __rev0_302 = __builtin_shufflevector(__s0_302, __s0_302, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev1_302; __rev1_302 = __builtin_shufflevector(__s1_302, __s1_302, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_302; \ + __ret_302 = __noswap_vmulxq_f16(__rev0_302, __noswap_splatq_laneq_f16(__rev1_302, __p2_302)); \ + __ret_302 = __builtin_shufflevector(__ret_302, __ret_302, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_302; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f16(__p0_211, __p1_211, __p2_211) __extension__ ({ \ - float16x4_t __s0_211 = __p0_211; \ - float16x8_t __s1_211 = __p1_211; \ - float16x4_t __ret_211; \ - __ret_211 = vmulx_f16(__s0_211, splat_laneq_f16(__s1_211, __p2_211)); \ - __ret_211; \ +#define vmulx_laneq_f16(__p0_303, __p1_303, __p2_303) __extension__ ({ \ + float16x4_t __s0_303 = __p0_303; \ + float16x8_t __s1_303 = __p1_303; \ + float16x4_t __ret_303; \ + __ret_303 = vmulx_f16(__s0_303, splat_laneq_f16(__s1_303, __p2_303)); \ + __ret_303; \ }) #else -#define vmulx_laneq_f16(__p0_212, __p1_212, __p2_212) __extension__ ({ \ - float16x4_t __s0_212 = __p0_212; \ - float16x8_t __s1_212 = __p1_212; \ - float16x4_t __rev0_212; __rev0_212 = __builtin_shufflevector(__s0_212, __s0_212, 3, 2, 1, 0); \ - float16x8_t __rev1_212; __rev1_212 = __builtin_shufflevector(__s1_212, __s1_212, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret_212; \ - __ret_212 = __noswap_vmulx_f16(__rev0_212, __noswap_splat_laneq_f16(__rev1_212, __p2_212)); \ - __ret_212 = __builtin_shufflevector(__ret_212, __ret_212, 3, 2, 1, 0); \ - __ret_212; \ +#define vmulx_laneq_f16(__p0_304, __p1_304, __p2_304) __extension__ ({ \ + float16x4_t __s0_304 = __p0_304; \ + float16x8_t __s1_304 = __p1_304; \ + float16x4_t __rev0_304; __rev0_304 = __builtin_shufflevector(__s0_304, __s0_304, 3, 2, 1, 0); \ + float16x8_t __rev1_304; __rev1_304 = __builtin_shufflevector(__s1_304, __s1_304, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_304; \ + __ret_304 = __noswap_vmulx_f16(__rev0_304, __noswap_splat_laneq_f16(__rev1_304, __p2_304)); \ + __ret_304 = __builtin_shufflevector(__ret_304, __ret_304, 3, 2, 1, 0); \ + __ret_304; \ }) #endif @@ -44336,54 +46072,54 @@ __ai int32x2_t __noswap_vusdot_s32(int32x2_t __p0, uint8x8_t __p1, int8x8_t __p2 #endif #ifdef __LITTLE_ENDIAN__ -#define vusdotq_lane_s32(__p0_213, __p1_213, __p2_213, __p3_213) __extension__ ({ \ - int32x4_t __s0_213 = __p0_213; \ - uint8x16_t __s1_213 = __p1_213; \ - int8x8_t __s2_213 = __p2_213; \ - int32x4_t __ret_213; \ -int8x8_t __reint_213 = __s2_213; \ - __ret_213 = vusdotq_s32(__s0_213, __s1_213, (int8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_213, __p3_213))); \ - __ret_213; \ +#define vusdotq_lane_s32(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ + int32x4_t __s0_305 = __p0_305; \ + uint8x16_t __s1_305 = __p1_305; \ + int8x8_t __s2_305 = __p2_305; \ + int32x4_t __ret_305; \ +int8x8_t __reint_305 = __s2_305; \ + __ret_305 = vusdotq_s32(__s0_305, __s1_305, (int8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_305, __p3_305))); \ + __ret_305; \ }) #else -#define vusdotq_lane_s32(__p0_214, __p1_214, __p2_214, __p3_214) __extension__ ({ \ - int32x4_t __s0_214 = __p0_214; \ - uint8x16_t __s1_214 = __p1_214; \ - int8x8_t __s2_214 = __p2_214; \ - int32x4_t __rev0_214; __rev0_214 = __builtin_shufflevector(__s0_214, __s0_214, 3, 2, 1, 0); \ - uint8x16_t __rev1_214; __rev1_214 = __builtin_shufflevector(__s1_214, __s1_214, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_214; __rev2_214 = __builtin_shufflevector(__s2_214, __s2_214, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_214; \ -int8x8_t __reint_214 = __rev2_214; \ - __ret_214 = __noswap_vusdotq_s32(__rev0_214, __rev1_214, (int8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_214, __p3_214))); \ - __ret_214 = __builtin_shufflevector(__ret_214, __ret_214, 3, 2, 1, 0); \ - __ret_214; \ +#define vusdotq_lane_s32(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ + int32x4_t __s0_306 = __p0_306; \ + uint8x16_t __s1_306 = __p1_306; \ + int8x8_t __s2_306 = __p2_306; \ + int32x4_t __rev0_306; __rev0_306 = __builtin_shufflevector(__s0_306, __s0_306, 3, 2, 1, 0); \ + uint8x16_t __rev1_306; __rev1_306 = __builtin_shufflevector(__s1_306, __s1_306, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_306; __rev2_306 = __builtin_shufflevector(__s2_306, __s2_306, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_306; \ +int8x8_t __reint_306 = __rev2_306; \ + __ret_306 = __noswap_vusdotq_s32(__rev0_306, __rev1_306, (int8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_306, __p3_306))); \ + __ret_306 = __builtin_shufflevector(__ret_306, __ret_306, 3, 2, 1, 0); \ + __ret_306; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdot_lane_s32(__p0_215, __p1_215, __p2_215, __p3_215) __extension__ ({ \ - int32x2_t __s0_215 = __p0_215; \ - uint8x8_t __s1_215 = __p1_215; \ - int8x8_t __s2_215 = __p2_215; \ - int32x2_t __ret_215; \ -int8x8_t __reint_215 = __s2_215; \ - __ret_215 = vusdot_s32(__s0_215, __s1_215, (int8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_215, __p3_215))); \ - __ret_215; \ +#define vusdot_lane_s32(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ + int32x2_t __s0_307 = __p0_307; \ + uint8x8_t __s1_307 = __p1_307; \ + int8x8_t __s2_307 = __p2_307; \ + int32x2_t __ret_307; \ +int8x8_t __reint_307 = __s2_307; \ + __ret_307 = vusdot_s32(__s0_307, __s1_307, (int8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_307, __p3_307))); \ + __ret_307; \ }) #else -#define vusdot_lane_s32(__p0_216, __p1_216, __p2_216, __p3_216) __extension__ ({ \ - int32x2_t __s0_216 = __p0_216; \ - uint8x8_t __s1_216 = __p1_216; \ - int8x8_t __s2_216 = __p2_216; \ - int32x2_t __rev0_216; __rev0_216 = __builtin_shufflevector(__s0_216, __s0_216, 1, 0); \ - uint8x8_t __rev1_216; __rev1_216 = __builtin_shufflevector(__s1_216, __s1_216, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_216; __rev2_216 = __builtin_shufflevector(__s2_216, __s2_216, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_216; \ -int8x8_t __reint_216 = __rev2_216; \ - __ret_216 = __noswap_vusdot_s32(__rev0_216, __rev1_216, (int8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_216, __p3_216))); \ - __ret_216 = __builtin_shufflevector(__ret_216, __ret_216, 1, 0); \ - __ret_216; \ +#define vusdot_lane_s32(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ + int32x2_t __s0_308 = __p0_308; \ + uint8x8_t __s1_308 = __p1_308; \ + int8x8_t __s2_308 = __p2_308; \ + int32x2_t __rev0_308; __rev0_308 = __builtin_shufflevector(__s0_308, __s0_308, 1, 0); \ + uint8x8_t __rev1_308; __rev1_308 = __builtin_shufflevector(__s1_308, __s1_308, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_308; __rev2_308 = __builtin_shufflevector(__s2_308, __s2_308, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_308; \ +int8x8_t __reint_308 = __rev2_308; \ + __ret_308 = __noswap_vusdot_s32(__rev0_308, __rev1_308, (int8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_308, __p3_308))); \ + __ret_308 = __builtin_shufflevector(__ret_308, __ret_308, 1, 0); \ + __ret_308; \ }) #endif @@ -44480,98 +46216,98 @@ __ai int16x4_t vqrdmlah_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_lane_s32(__p0_217, __p1_217, __p2_217, __p3_217) __extension__ ({ \ - int32x4_t __s0_217 = __p0_217; \ - int32x4_t __s1_217 = __p1_217; \ - int32x2_t __s2_217 = __p2_217; \ - int32x4_t __ret_217; \ - __ret_217 = vqaddq_s32(__s0_217, vqrdmulhq_s32(__s1_217, splatq_lane_s32(__s2_217, __p3_217))); \ - __ret_217; \ +#define vqrdmlahq_lane_s32(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ + int32x4_t __s0_309 = __p0_309; \ + int32x4_t __s1_309 = __p1_309; \ + int32x2_t __s2_309 = __p2_309; \ + int32x4_t __ret_309; \ + __ret_309 = vqaddq_s32(__s0_309, vqrdmulhq_s32(__s1_309, splatq_lane_s32(__s2_309, __p3_309))); \ + __ret_309; \ }) #else -#define vqrdmlahq_lane_s32(__p0_218, __p1_218, __p2_218, __p3_218) __extension__ ({ \ - int32x4_t __s0_218 = __p0_218; \ - int32x4_t __s1_218 = __p1_218; \ - int32x2_t __s2_218 = __p2_218; \ - int32x4_t __rev0_218; __rev0_218 = __builtin_shufflevector(__s0_218, __s0_218, 3, 2, 1, 0); \ - int32x4_t __rev1_218; __rev1_218 = __builtin_shufflevector(__s1_218, __s1_218, 3, 2, 1, 0); \ - int32x2_t __rev2_218; __rev2_218 = __builtin_shufflevector(__s2_218, __s2_218, 1, 0); \ - int32x4_t __ret_218; \ - __ret_218 = __noswap_vqaddq_s32(__rev0_218, __noswap_vqrdmulhq_s32(__rev1_218, __noswap_splatq_lane_s32(__rev2_218, __p3_218))); \ - __ret_218 = __builtin_shufflevector(__ret_218, __ret_218, 3, 2, 1, 0); \ - __ret_218; \ +#define vqrdmlahq_lane_s32(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ + int32x4_t __s0_310 = __p0_310; \ + int32x4_t __s1_310 = __p1_310; \ + int32x2_t __s2_310 = __p2_310; \ + int32x4_t __rev0_310; __rev0_310 = __builtin_shufflevector(__s0_310, __s0_310, 3, 2, 1, 0); \ + int32x4_t __rev1_310; __rev1_310 = __builtin_shufflevector(__s1_310, __s1_310, 3, 2, 1, 0); \ + int32x2_t __rev2_310; __rev2_310 = __builtin_shufflevector(__s2_310, __s2_310, 1, 0); \ + int32x4_t __ret_310; \ + __ret_310 = __noswap_vqaddq_s32(__rev0_310, __noswap_vqrdmulhq_s32(__rev1_310, __noswap_splatq_lane_s32(__rev2_310, __p3_310))); \ + __ret_310 = __builtin_shufflevector(__ret_310, __ret_310, 3, 2, 1, 0); \ + __ret_310; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_lane_s16(__p0_219, __p1_219, __p2_219, __p3_219) __extension__ ({ \ - int16x8_t __s0_219 = __p0_219; \ - int16x8_t __s1_219 = __p1_219; \ - int16x4_t __s2_219 = __p2_219; \ - int16x8_t __ret_219; \ - __ret_219 = vqaddq_s16(__s0_219, vqrdmulhq_s16(__s1_219, splatq_lane_s16(__s2_219, __p3_219))); \ - __ret_219; \ +#define vqrdmlahq_lane_s16(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ + int16x8_t __s0_311 = __p0_311; \ + int16x8_t __s1_311 = __p1_311; \ + int16x4_t __s2_311 = __p2_311; \ + int16x8_t __ret_311; \ + __ret_311 = vqaddq_s16(__s0_311, vqrdmulhq_s16(__s1_311, splatq_lane_s16(__s2_311, __p3_311))); \ + __ret_311; \ }) #else -#define vqrdmlahq_lane_s16(__p0_220, __p1_220, __p2_220, __p3_220) __extension__ ({ \ - int16x8_t __s0_220 = __p0_220; \ - int16x8_t __s1_220 = __p1_220; \ - int16x4_t __s2_220 = __p2_220; \ - int16x8_t __rev0_220; __rev0_220 = __builtin_shufflevector(__s0_220, __s0_220, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_220; __rev1_220 = __builtin_shufflevector(__s1_220, __s1_220, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_220; __rev2_220 = __builtin_shufflevector(__s2_220, __s2_220, 3, 2, 1, 0); \ - int16x8_t __ret_220; \ - __ret_220 = __noswap_vqaddq_s16(__rev0_220, __noswap_vqrdmulhq_s16(__rev1_220, __noswap_splatq_lane_s16(__rev2_220, __p3_220))); \ - __ret_220 = __builtin_shufflevector(__ret_220, __ret_220, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_220; \ +#define vqrdmlahq_lane_s16(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ + int16x8_t __s0_312 = __p0_312; \ + int16x8_t __s1_312 = __p1_312; \ + int16x4_t __s2_312 = __p2_312; \ + int16x8_t __rev0_312; __rev0_312 = __builtin_shufflevector(__s0_312, __s0_312, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_312; __rev1_312 = __builtin_shufflevector(__s1_312, __s1_312, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_312; __rev2_312 = __builtin_shufflevector(__s2_312, __s2_312, 3, 2, 1, 0); \ + int16x8_t __ret_312; \ + __ret_312 = __noswap_vqaddq_s16(__rev0_312, __noswap_vqrdmulhq_s16(__rev1_312, __noswap_splatq_lane_s16(__rev2_312, __p3_312))); \ + __ret_312 = __builtin_shufflevector(__ret_312, __ret_312, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_312; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_lane_s32(__p0_221, __p1_221, __p2_221, __p3_221) __extension__ ({ \ - int32x2_t __s0_221 = __p0_221; \ - int32x2_t __s1_221 = __p1_221; \ - int32x2_t __s2_221 = __p2_221; \ - int32x2_t __ret_221; \ - __ret_221 = vqadd_s32(__s0_221, vqrdmulh_s32(__s1_221, splat_lane_s32(__s2_221, __p3_221))); \ - __ret_221; \ +#define vqrdmlah_lane_s32(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ + int32x2_t __s0_313 = __p0_313; \ + int32x2_t __s1_313 = __p1_313; \ + int32x2_t __s2_313 = __p2_313; \ + int32x2_t __ret_313; \ + __ret_313 = vqadd_s32(__s0_313, vqrdmulh_s32(__s1_313, splat_lane_s32(__s2_313, __p3_313))); \ + __ret_313; \ }) #else -#define vqrdmlah_lane_s32(__p0_222, __p1_222, __p2_222, __p3_222) __extension__ ({ \ - int32x2_t __s0_222 = __p0_222; \ - int32x2_t __s1_222 = __p1_222; \ - int32x2_t __s2_222 = __p2_222; \ - int32x2_t __rev0_222; __rev0_222 = __builtin_shufflevector(__s0_222, __s0_222, 1, 0); \ - int32x2_t __rev1_222; __rev1_222 = __builtin_shufflevector(__s1_222, __s1_222, 1, 0); \ - int32x2_t __rev2_222; __rev2_222 = __builtin_shufflevector(__s2_222, __s2_222, 1, 0); \ - int32x2_t __ret_222; \ - __ret_222 = __noswap_vqadd_s32(__rev0_222, __noswap_vqrdmulh_s32(__rev1_222, __noswap_splat_lane_s32(__rev2_222, __p3_222))); \ - __ret_222 = __builtin_shufflevector(__ret_222, __ret_222, 1, 0); \ - __ret_222; \ +#define vqrdmlah_lane_s32(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ + int32x2_t __s0_314 = __p0_314; \ + int32x2_t __s1_314 = __p1_314; \ + int32x2_t __s2_314 = __p2_314; \ + int32x2_t __rev0_314; __rev0_314 = __builtin_shufflevector(__s0_314, __s0_314, 1, 0); \ + int32x2_t __rev1_314; __rev1_314 = __builtin_shufflevector(__s1_314, __s1_314, 1, 0); \ + int32x2_t __rev2_314; __rev2_314 = __builtin_shufflevector(__s2_314, __s2_314, 1, 0); \ + int32x2_t __ret_314; \ + __ret_314 = __noswap_vqadd_s32(__rev0_314, __noswap_vqrdmulh_s32(__rev1_314, __noswap_splat_lane_s32(__rev2_314, __p3_314))); \ + __ret_314 = __builtin_shufflevector(__ret_314, __ret_314, 1, 0); \ + __ret_314; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_lane_s16(__p0_223, __p1_223, __p2_223, __p3_223) __extension__ ({ \ - int16x4_t __s0_223 = __p0_223; \ - int16x4_t __s1_223 = __p1_223; \ - int16x4_t __s2_223 = __p2_223; \ - int16x4_t __ret_223; \ - __ret_223 = vqadd_s16(__s0_223, vqrdmulh_s16(__s1_223, splat_lane_s16(__s2_223, __p3_223))); \ - __ret_223; \ +#define vqrdmlah_lane_s16(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ + int16x4_t __s0_315 = __p0_315; \ + int16x4_t __s1_315 = __p1_315; \ + int16x4_t __s2_315 = __p2_315; \ + int16x4_t __ret_315; \ + __ret_315 = vqadd_s16(__s0_315, vqrdmulh_s16(__s1_315, splat_lane_s16(__s2_315, __p3_315))); \ + __ret_315; \ }) #else -#define vqrdmlah_lane_s16(__p0_224, __p1_224, __p2_224, __p3_224) __extension__ ({ \ - int16x4_t __s0_224 = __p0_224; \ - int16x4_t __s1_224 = __p1_224; \ - int16x4_t __s2_224 = __p2_224; \ - int16x4_t __rev0_224; __rev0_224 = __builtin_shufflevector(__s0_224, __s0_224, 3, 2, 1, 0); \ - int16x4_t __rev1_224; __rev1_224 = __builtin_shufflevector(__s1_224, __s1_224, 3, 2, 1, 0); \ - int16x4_t __rev2_224; __rev2_224 = __builtin_shufflevector(__s2_224, __s2_224, 3, 2, 1, 0); \ - int16x4_t __ret_224; \ - __ret_224 = __noswap_vqadd_s16(__rev0_224, __noswap_vqrdmulh_s16(__rev1_224, __noswap_splat_lane_s16(__rev2_224, __p3_224))); \ - __ret_224 = __builtin_shufflevector(__ret_224, __ret_224, 3, 2, 1, 0); \ - __ret_224; \ +#define vqrdmlah_lane_s16(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ + int16x4_t __s0_316 = __p0_316; \ + int16x4_t __s1_316 = __p1_316; \ + int16x4_t __s2_316 = __p2_316; \ + int16x4_t __rev0_316; __rev0_316 = __builtin_shufflevector(__s0_316, __s0_316, 3, 2, 1, 0); \ + int16x4_t __rev1_316; __rev1_316 = __builtin_shufflevector(__s1_316, __s1_316, 3, 2, 1, 0); \ + int16x4_t __rev2_316; __rev2_316 = __builtin_shufflevector(__s2_316, __s2_316, 3, 2, 1, 0); \ + int16x4_t __ret_316; \ + __ret_316 = __noswap_vqadd_s16(__rev0_316, __noswap_vqrdmulh_s16(__rev1_316, __noswap_splat_lane_s16(__rev2_316, __p3_316))); \ + __ret_316 = __builtin_shufflevector(__ret_316, __ret_316, 3, 2, 1, 0); \ + __ret_316; \ }) #endif @@ -44648,292 +46384,292 @@ __ai int16x4_t vqrdmlsh_s16(int16x4_t __p0, int16x4_t __p1, int16x4_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_lane_s32(__p0_225, __p1_225, __p2_225, __p3_225) __extension__ ({ \ - int32x4_t __s0_225 = __p0_225; \ - int32x4_t __s1_225 = __p1_225; \ - int32x2_t __s2_225 = __p2_225; \ - int32x4_t __ret_225; \ - __ret_225 = vqsubq_s32(__s0_225, vqrdmulhq_s32(__s1_225, splatq_lane_s32(__s2_225, __p3_225))); \ - __ret_225; \ +#define vqrdmlshq_lane_s32(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ + int32x4_t __s0_317 = __p0_317; \ + int32x4_t __s1_317 = __p1_317; \ + int32x2_t __s2_317 = __p2_317; \ + int32x4_t __ret_317; \ + __ret_317 = vqsubq_s32(__s0_317, vqrdmulhq_s32(__s1_317, splatq_lane_s32(__s2_317, __p3_317))); \ + __ret_317; \ }) #else -#define vqrdmlshq_lane_s32(__p0_226, __p1_226, __p2_226, __p3_226) __extension__ ({ \ - int32x4_t __s0_226 = __p0_226; \ - int32x4_t __s1_226 = __p1_226; \ - int32x2_t __s2_226 = __p2_226; \ - int32x4_t __rev0_226; __rev0_226 = __builtin_shufflevector(__s0_226, __s0_226, 3, 2, 1, 0); \ - int32x4_t __rev1_226; __rev1_226 = __builtin_shufflevector(__s1_226, __s1_226, 3, 2, 1, 0); \ - int32x2_t __rev2_226; __rev2_226 = __builtin_shufflevector(__s2_226, __s2_226, 1, 0); \ - int32x4_t __ret_226; \ - __ret_226 = __noswap_vqsubq_s32(__rev0_226, __noswap_vqrdmulhq_s32(__rev1_226, __noswap_splatq_lane_s32(__rev2_226, __p3_226))); \ - __ret_226 = __builtin_shufflevector(__ret_226, __ret_226, 3, 2, 1, 0); \ - __ret_226; \ +#define vqrdmlshq_lane_s32(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ + int32x4_t __s0_318 = __p0_318; \ + int32x4_t __s1_318 = __p1_318; \ + int32x2_t __s2_318 = __p2_318; \ + int32x4_t __rev0_318; __rev0_318 = __builtin_shufflevector(__s0_318, __s0_318, 3, 2, 1, 0); \ + int32x4_t __rev1_318; __rev1_318 = __builtin_shufflevector(__s1_318, __s1_318, 3, 2, 1, 0); \ + int32x2_t __rev2_318; __rev2_318 = __builtin_shufflevector(__s2_318, __s2_318, 1, 0); \ + int32x4_t __ret_318; \ + __ret_318 = __noswap_vqsubq_s32(__rev0_318, __noswap_vqrdmulhq_s32(__rev1_318, __noswap_splatq_lane_s32(__rev2_318, __p3_318))); \ + __ret_318 = __builtin_shufflevector(__ret_318, __ret_318, 3, 2, 1, 0); \ + __ret_318; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_lane_s16(__p0_227, __p1_227, __p2_227, __p3_227) __extension__ ({ \ - int16x8_t __s0_227 = __p0_227; \ - int16x8_t __s1_227 = __p1_227; \ - int16x4_t __s2_227 = __p2_227; \ - int16x8_t __ret_227; \ - __ret_227 = vqsubq_s16(__s0_227, vqrdmulhq_s16(__s1_227, splatq_lane_s16(__s2_227, __p3_227))); \ - __ret_227; \ +#define vqrdmlshq_lane_s16(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ + int16x8_t __s0_319 = __p0_319; \ + int16x8_t __s1_319 = __p1_319; \ + int16x4_t __s2_319 = __p2_319; \ + int16x8_t __ret_319; \ + __ret_319 = vqsubq_s16(__s0_319, vqrdmulhq_s16(__s1_319, splatq_lane_s16(__s2_319, __p3_319))); \ + __ret_319; \ }) #else -#define vqrdmlshq_lane_s16(__p0_228, __p1_228, __p2_228, __p3_228) __extension__ ({ \ - int16x8_t __s0_228 = __p0_228; \ - int16x8_t __s1_228 = __p1_228; \ - int16x4_t __s2_228 = __p2_228; \ - int16x8_t __rev0_228; __rev0_228 = __builtin_shufflevector(__s0_228, __s0_228, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_228; __rev1_228 = __builtin_shufflevector(__s1_228, __s1_228, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_228; __rev2_228 = __builtin_shufflevector(__s2_228, __s2_228, 3, 2, 1, 0); \ - int16x8_t __ret_228; \ - __ret_228 = __noswap_vqsubq_s16(__rev0_228, __noswap_vqrdmulhq_s16(__rev1_228, __noswap_splatq_lane_s16(__rev2_228, __p3_228))); \ - __ret_228 = __builtin_shufflevector(__ret_228, __ret_228, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_228; \ +#define vqrdmlshq_lane_s16(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ + int16x8_t __s0_320 = __p0_320; \ + int16x8_t __s1_320 = __p1_320; \ + int16x4_t __s2_320 = __p2_320; \ + int16x8_t __rev0_320; __rev0_320 = __builtin_shufflevector(__s0_320, __s0_320, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_320; __rev1_320 = __builtin_shufflevector(__s1_320, __s1_320, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_320; __rev2_320 = __builtin_shufflevector(__s2_320, __s2_320, 3, 2, 1, 0); \ + int16x8_t __ret_320; \ + __ret_320 = __noswap_vqsubq_s16(__rev0_320, __noswap_vqrdmulhq_s16(__rev1_320, __noswap_splatq_lane_s16(__rev2_320, __p3_320))); \ + __ret_320 = __builtin_shufflevector(__ret_320, __ret_320, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_320; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_lane_s32(__p0_229, __p1_229, __p2_229, __p3_229) __extension__ ({ \ - int32x2_t __s0_229 = __p0_229; \ - int32x2_t __s1_229 = __p1_229; \ - int32x2_t __s2_229 = __p2_229; \ - int32x2_t __ret_229; \ - __ret_229 = vqsub_s32(__s0_229, vqrdmulh_s32(__s1_229, splat_lane_s32(__s2_229, __p3_229))); \ - __ret_229; \ +#define vqrdmlsh_lane_s32(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ + int32x2_t __s0_321 = __p0_321; \ + int32x2_t __s1_321 = __p1_321; \ + int32x2_t __s2_321 = __p2_321; \ + int32x2_t __ret_321; \ + __ret_321 = vqsub_s32(__s0_321, vqrdmulh_s32(__s1_321, splat_lane_s32(__s2_321, __p3_321))); \ + __ret_321; \ }) #else -#define vqrdmlsh_lane_s32(__p0_230, __p1_230, __p2_230, __p3_230) __extension__ ({ \ - int32x2_t __s0_230 = __p0_230; \ - int32x2_t __s1_230 = __p1_230; \ - int32x2_t __s2_230 = __p2_230; \ - int32x2_t __rev0_230; __rev0_230 = __builtin_shufflevector(__s0_230, __s0_230, 1, 0); \ - int32x2_t __rev1_230; __rev1_230 = __builtin_shufflevector(__s1_230, __s1_230, 1, 0); \ - int32x2_t __rev2_230; __rev2_230 = __builtin_shufflevector(__s2_230, __s2_230, 1, 0); \ - int32x2_t __ret_230; \ - __ret_230 = __noswap_vqsub_s32(__rev0_230, __noswap_vqrdmulh_s32(__rev1_230, __noswap_splat_lane_s32(__rev2_230, __p3_230))); \ - __ret_230 = __builtin_shufflevector(__ret_230, __ret_230, 1, 0); \ - __ret_230; \ +#define vqrdmlsh_lane_s32(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ + int32x2_t __s0_322 = __p0_322; \ + int32x2_t __s1_322 = __p1_322; \ + int32x2_t __s2_322 = __p2_322; \ + int32x2_t __rev0_322; __rev0_322 = __builtin_shufflevector(__s0_322, __s0_322, 1, 0); \ + int32x2_t __rev1_322; __rev1_322 = __builtin_shufflevector(__s1_322, __s1_322, 1, 0); \ + int32x2_t __rev2_322; __rev2_322 = __builtin_shufflevector(__s2_322, __s2_322, 1, 0); \ + int32x2_t __ret_322; \ + __ret_322 = __noswap_vqsub_s32(__rev0_322, __noswap_vqrdmulh_s32(__rev1_322, __noswap_splat_lane_s32(__rev2_322, __p3_322))); \ + __ret_322 = __builtin_shufflevector(__ret_322, __ret_322, 1, 0); \ + __ret_322; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_lane_s16(__p0_231, __p1_231, __p2_231, __p3_231) __extension__ ({ \ - int16x4_t __s0_231 = __p0_231; \ - int16x4_t __s1_231 = __p1_231; \ - int16x4_t __s2_231 = __p2_231; \ - int16x4_t __ret_231; \ - __ret_231 = vqsub_s16(__s0_231, vqrdmulh_s16(__s1_231, splat_lane_s16(__s2_231, __p3_231))); \ - __ret_231; \ +#define vqrdmlsh_lane_s16(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ + int16x4_t __s0_323 = __p0_323; \ + int16x4_t __s1_323 = __p1_323; \ + int16x4_t __s2_323 = __p2_323; \ + int16x4_t __ret_323; \ + __ret_323 = vqsub_s16(__s0_323, vqrdmulh_s16(__s1_323, splat_lane_s16(__s2_323, __p3_323))); \ + __ret_323; \ }) #else -#define vqrdmlsh_lane_s16(__p0_232, __p1_232, __p2_232, __p3_232) __extension__ ({ \ - int16x4_t __s0_232 = __p0_232; \ - int16x4_t __s1_232 = __p1_232; \ - int16x4_t __s2_232 = __p2_232; \ - int16x4_t __rev0_232; __rev0_232 = __builtin_shufflevector(__s0_232, __s0_232, 3, 2, 1, 0); \ - int16x4_t __rev1_232; __rev1_232 = __builtin_shufflevector(__s1_232, __s1_232, 3, 2, 1, 0); \ - int16x4_t __rev2_232; __rev2_232 = __builtin_shufflevector(__s2_232, __s2_232, 3, 2, 1, 0); \ - int16x4_t __ret_232; \ - __ret_232 = __noswap_vqsub_s16(__rev0_232, __noswap_vqrdmulh_s16(__rev1_232, __noswap_splat_lane_s16(__rev2_232, __p3_232))); \ - __ret_232 = __builtin_shufflevector(__ret_232, __ret_232, 3, 2, 1, 0); \ - __ret_232; \ +#define vqrdmlsh_lane_s16(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ + int16x4_t __s0_324 = __p0_324; \ + int16x4_t __s1_324 = __p1_324; \ + int16x4_t __s2_324 = __p2_324; \ + int16x4_t __rev0_324; __rev0_324 = __builtin_shufflevector(__s0_324, __s0_324, 3, 2, 1, 0); \ + int16x4_t __rev1_324; __rev1_324 = __builtin_shufflevector(__s1_324, __s1_324, 3, 2, 1, 0); \ + int16x4_t __rev2_324; __rev2_324 = __builtin_shufflevector(__s2_324, __s2_324, 3, 2, 1, 0); \ + int16x4_t __ret_324; \ + __ret_324 = __noswap_vqsub_s16(__rev0_324, __noswap_vqrdmulh_s16(__rev1_324, __noswap_splat_lane_s16(__rev2_324, __p3_324))); \ + __ret_324 = __builtin_shufflevector(__ret_324, __ret_324, 3, 2, 1, 0); \ + __ret_324; \ }) #endif #endif #if defined(__ARM_FEATURE_QRDMX) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s32(__p0_233, __p1_233, __p2_233, __p3_233) __extension__ ({ \ - int32x4_t __s0_233 = __p0_233; \ - int32x4_t __s1_233 = __p1_233; \ - int32x4_t __s2_233 = __p2_233; \ - int32x4_t __ret_233; \ - __ret_233 = vqaddq_s32(__s0_233, vqrdmulhq_s32(__s1_233, splatq_laneq_s32(__s2_233, __p3_233))); \ - __ret_233; \ +#define vqrdmlahq_laneq_s32(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ + int32x4_t __s0_325 = __p0_325; \ + int32x4_t __s1_325 = __p1_325; \ + int32x4_t __s2_325 = __p2_325; \ + int32x4_t __ret_325; \ + __ret_325 = vqaddq_s32(__s0_325, vqrdmulhq_s32(__s1_325, splatq_laneq_s32(__s2_325, __p3_325))); \ + __ret_325; \ }) #else -#define vqrdmlahq_laneq_s32(__p0_234, __p1_234, __p2_234, __p3_234) __extension__ ({ \ - int32x4_t __s0_234 = __p0_234; \ - int32x4_t __s1_234 = __p1_234; \ - int32x4_t __s2_234 = __p2_234; \ - int32x4_t __rev0_234; __rev0_234 = __builtin_shufflevector(__s0_234, __s0_234, 3, 2, 1, 0); \ - int32x4_t __rev1_234; __rev1_234 = __builtin_shufflevector(__s1_234, __s1_234, 3, 2, 1, 0); \ - int32x4_t __rev2_234; __rev2_234 = __builtin_shufflevector(__s2_234, __s2_234, 3, 2, 1, 0); \ - int32x4_t __ret_234; \ - __ret_234 = __noswap_vqaddq_s32(__rev0_234, __noswap_vqrdmulhq_s32(__rev1_234, __noswap_splatq_laneq_s32(__rev2_234, __p3_234))); \ - __ret_234 = __builtin_shufflevector(__ret_234, __ret_234, 3, 2, 1, 0); \ - __ret_234; \ +#define vqrdmlahq_laneq_s32(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ + int32x4_t __s0_326 = __p0_326; \ + int32x4_t __s1_326 = __p1_326; \ + int32x4_t __s2_326 = __p2_326; \ + int32x4_t __rev0_326; __rev0_326 = __builtin_shufflevector(__s0_326, __s0_326, 3, 2, 1, 0); \ + int32x4_t __rev1_326; __rev1_326 = __builtin_shufflevector(__s1_326, __s1_326, 3, 2, 1, 0); \ + int32x4_t __rev2_326; __rev2_326 = __builtin_shufflevector(__s2_326, __s2_326, 3, 2, 1, 0); \ + int32x4_t __ret_326; \ + __ret_326 = __noswap_vqaddq_s32(__rev0_326, __noswap_vqrdmulhq_s32(__rev1_326, __noswap_splatq_laneq_s32(__rev2_326, __p3_326))); \ + __ret_326 = __builtin_shufflevector(__ret_326, __ret_326, 3, 2, 1, 0); \ + __ret_326; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahq_laneq_s16(__p0_235, __p1_235, __p2_235, __p3_235) __extension__ ({ \ - int16x8_t __s0_235 = __p0_235; \ - int16x8_t __s1_235 = __p1_235; \ - int16x8_t __s2_235 = __p2_235; \ - int16x8_t __ret_235; \ - __ret_235 = vqaddq_s16(__s0_235, vqrdmulhq_s16(__s1_235, splatq_laneq_s16(__s2_235, __p3_235))); \ - __ret_235; \ +#define vqrdmlahq_laneq_s16(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ + int16x8_t __s0_327 = __p0_327; \ + int16x8_t __s1_327 = __p1_327; \ + int16x8_t __s2_327 = __p2_327; \ + int16x8_t __ret_327; \ + __ret_327 = vqaddq_s16(__s0_327, vqrdmulhq_s16(__s1_327, splatq_laneq_s16(__s2_327, __p3_327))); \ + __ret_327; \ }) #else -#define vqrdmlahq_laneq_s16(__p0_236, __p1_236, __p2_236, __p3_236) __extension__ ({ \ - int16x8_t __s0_236 = __p0_236; \ - int16x8_t __s1_236 = __p1_236; \ - int16x8_t __s2_236 = __p2_236; \ - int16x8_t __rev0_236; __rev0_236 = __builtin_shufflevector(__s0_236, __s0_236, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_236; __rev1_236 = __builtin_shufflevector(__s1_236, __s1_236, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_236; __rev2_236 = __builtin_shufflevector(__s2_236, __s2_236, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_236; \ - __ret_236 = __noswap_vqaddq_s16(__rev0_236, __noswap_vqrdmulhq_s16(__rev1_236, __noswap_splatq_laneq_s16(__rev2_236, __p3_236))); \ - __ret_236 = __builtin_shufflevector(__ret_236, __ret_236, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_236; \ +#define vqrdmlahq_laneq_s16(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ + int16x8_t __s0_328 = __p0_328; \ + int16x8_t __s1_328 = __p1_328; \ + int16x8_t __s2_328 = __p2_328; \ + int16x8_t __rev0_328; __rev0_328 = __builtin_shufflevector(__s0_328, __s0_328, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_328; __rev1_328 = __builtin_shufflevector(__s1_328, __s1_328, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_328; __rev2_328 = __builtin_shufflevector(__s2_328, __s2_328, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_328; \ + __ret_328 = __noswap_vqaddq_s16(__rev0_328, __noswap_vqrdmulhq_s16(__rev1_328, __noswap_splatq_laneq_s16(__rev2_328, __p3_328))); \ + __ret_328 = __builtin_shufflevector(__ret_328, __ret_328, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_328; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s32(__p0_237, __p1_237, __p2_237, __p3_237) __extension__ ({ \ - int32x2_t __s0_237 = __p0_237; \ - int32x2_t __s1_237 = __p1_237; \ - int32x4_t __s2_237 = __p2_237; \ - int32x2_t __ret_237; \ - __ret_237 = vqadd_s32(__s0_237, vqrdmulh_s32(__s1_237, splat_laneq_s32(__s2_237, __p3_237))); \ - __ret_237; \ +#define vqrdmlah_laneq_s32(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ + int32x2_t __s0_329 = __p0_329; \ + int32x2_t __s1_329 = __p1_329; \ + int32x4_t __s2_329 = __p2_329; \ + int32x2_t __ret_329; \ + __ret_329 = vqadd_s32(__s0_329, vqrdmulh_s32(__s1_329, splat_laneq_s32(__s2_329, __p3_329))); \ + __ret_329; \ }) #else -#define vqrdmlah_laneq_s32(__p0_238, __p1_238, __p2_238, __p3_238) __extension__ ({ \ - int32x2_t __s0_238 = __p0_238; \ - int32x2_t __s1_238 = __p1_238; \ - int32x4_t __s2_238 = __p2_238; \ - int32x2_t __rev0_238; __rev0_238 = __builtin_shufflevector(__s0_238, __s0_238, 1, 0); \ - int32x2_t __rev1_238; __rev1_238 = __builtin_shufflevector(__s1_238, __s1_238, 1, 0); \ - int32x4_t __rev2_238; __rev2_238 = __builtin_shufflevector(__s2_238, __s2_238, 3, 2, 1, 0); \ - int32x2_t __ret_238; \ - __ret_238 = __noswap_vqadd_s32(__rev0_238, __noswap_vqrdmulh_s32(__rev1_238, __noswap_splat_laneq_s32(__rev2_238, __p3_238))); \ - __ret_238 = __builtin_shufflevector(__ret_238, __ret_238, 1, 0); \ - __ret_238; \ +#define vqrdmlah_laneq_s32(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ + int32x2_t __s0_330 = __p0_330; \ + int32x2_t __s1_330 = __p1_330; \ + int32x4_t __s2_330 = __p2_330; \ + int32x2_t __rev0_330; __rev0_330 = __builtin_shufflevector(__s0_330, __s0_330, 1, 0); \ + int32x2_t __rev1_330; __rev1_330 = __builtin_shufflevector(__s1_330, __s1_330, 1, 0); \ + int32x4_t __rev2_330; __rev2_330 = __builtin_shufflevector(__s2_330, __s2_330, 3, 2, 1, 0); \ + int32x2_t __ret_330; \ + __ret_330 = __noswap_vqadd_s32(__rev0_330, __noswap_vqrdmulh_s32(__rev1_330, __noswap_splat_laneq_s32(__rev2_330, __p3_330))); \ + __ret_330 = __builtin_shufflevector(__ret_330, __ret_330, 1, 0); \ + __ret_330; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlah_laneq_s16(__p0_239, __p1_239, __p2_239, __p3_239) __extension__ ({ \ - int16x4_t __s0_239 = __p0_239; \ - int16x4_t __s1_239 = __p1_239; \ - int16x8_t __s2_239 = __p2_239; \ - int16x4_t __ret_239; \ - __ret_239 = vqadd_s16(__s0_239, vqrdmulh_s16(__s1_239, splat_laneq_s16(__s2_239, __p3_239))); \ - __ret_239; \ +#define vqrdmlah_laneq_s16(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ + int16x4_t __s0_331 = __p0_331; \ + int16x4_t __s1_331 = __p1_331; \ + int16x8_t __s2_331 = __p2_331; \ + int16x4_t __ret_331; \ + __ret_331 = vqadd_s16(__s0_331, vqrdmulh_s16(__s1_331, splat_laneq_s16(__s2_331, __p3_331))); \ + __ret_331; \ }) #else -#define vqrdmlah_laneq_s16(__p0_240, __p1_240, __p2_240, __p3_240) __extension__ ({ \ - int16x4_t __s0_240 = __p0_240; \ - int16x4_t __s1_240 = __p1_240; \ - int16x8_t __s2_240 = __p2_240; \ - int16x4_t __rev0_240; __rev0_240 = __builtin_shufflevector(__s0_240, __s0_240, 3, 2, 1, 0); \ - int16x4_t __rev1_240; __rev1_240 = __builtin_shufflevector(__s1_240, __s1_240, 3, 2, 1, 0); \ - int16x8_t __rev2_240; __rev2_240 = __builtin_shufflevector(__s2_240, __s2_240, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_240; \ - __ret_240 = __noswap_vqadd_s16(__rev0_240, __noswap_vqrdmulh_s16(__rev1_240, __noswap_splat_laneq_s16(__rev2_240, __p3_240))); \ - __ret_240 = __builtin_shufflevector(__ret_240, __ret_240, 3, 2, 1, 0); \ - __ret_240; \ +#define vqrdmlah_laneq_s16(__p0_332, __p1_332, __p2_332, __p3_332) __extension__ ({ \ + int16x4_t __s0_332 = __p0_332; \ + int16x4_t __s1_332 = __p1_332; \ + int16x8_t __s2_332 = __p2_332; \ + int16x4_t __rev0_332; __rev0_332 = __builtin_shufflevector(__s0_332, __s0_332, 3, 2, 1, 0); \ + int16x4_t __rev1_332; __rev1_332 = __builtin_shufflevector(__s1_332, __s1_332, 3, 2, 1, 0); \ + int16x8_t __rev2_332; __rev2_332 = __builtin_shufflevector(__s2_332, __s2_332, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_332; \ + __ret_332 = __noswap_vqadd_s16(__rev0_332, __noswap_vqrdmulh_s16(__rev1_332, __noswap_splat_laneq_s16(__rev2_332, __p3_332))); \ + __ret_332 = __builtin_shufflevector(__ret_332, __ret_332, 3, 2, 1, 0); \ + __ret_332; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s32(__p0_241, __p1_241, __p2_241, __p3_241) __extension__ ({ \ - int32x4_t __s0_241 = __p0_241; \ - int32x4_t __s1_241 = __p1_241; \ - int32x4_t __s2_241 = __p2_241; \ - int32x4_t __ret_241; \ - __ret_241 = vqsubq_s32(__s0_241, vqrdmulhq_s32(__s1_241, splatq_laneq_s32(__s2_241, __p3_241))); \ - __ret_241; \ +#define vqrdmlshq_laneq_s32(__p0_333, __p1_333, __p2_333, __p3_333) __extension__ ({ \ + int32x4_t __s0_333 = __p0_333; \ + int32x4_t __s1_333 = __p1_333; \ + int32x4_t __s2_333 = __p2_333; \ + int32x4_t __ret_333; \ + __ret_333 = vqsubq_s32(__s0_333, vqrdmulhq_s32(__s1_333, splatq_laneq_s32(__s2_333, __p3_333))); \ + __ret_333; \ }) #else -#define vqrdmlshq_laneq_s32(__p0_242, __p1_242, __p2_242, __p3_242) __extension__ ({ \ - int32x4_t __s0_242 = __p0_242; \ - int32x4_t __s1_242 = __p1_242; \ - int32x4_t __s2_242 = __p2_242; \ - int32x4_t __rev0_242; __rev0_242 = __builtin_shufflevector(__s0_242, __s0_242, 3, 2, 1, 0); \ - int32x4_t __rev1_242; __rev1_242 = __builtin_shufflevector(__s1_242, __s1_242, 3, 2, 1, 0); \ - int32x4_t __rev2_242; __rev2_242 = __builtin_shufflevector(__s2_242, __s2_242, 3, 2, 1, 0); \ - int32x4_t __ret_242; \ - __ret_242 = __noswap_vqsubq_s32(__rev0_242, __noswap_vqrdmulhq_s32(__rev1_242, __noswap_splatq_laneq_s32(__rev2_242, __p3_242))); \ - __ret_242 = __builtin_shufflevector(__ret_242, __ret_242, 3, 2, 1, 0); \ - __ret_242; \ +#define vqrdmlshq_laneq_s32(__p0_334, __p1_334, __p2_334, __p3_334) __extension__ ({ \ + int32x4_t __s0_334 = __p0_334; \ + int32x4_t __s1_334 = __p1_334; \ + int32x4_t __s2_334 = __p2_334; \ + int32x4_t __rev0_334; __rev0_334 = __builtin_shufflevector(__s0_334, __s0_334, 3, 2, 1, 0); \ + int32x4_t __rev1_334; __rev1_334 = __builtin_shufflevector(__s1_334, __s1_334, 3, 2, 1, 0); \ + int32x4_t __rev2_334; __rev2_334 = __builtin_shufflevector(__s2_334, __s2_334, 3, 2, 1, 0); \ + int32x4_t __ret_334; \ + __ret_334 = __noswap_vqsubq_s32(__rev0_334, __noswap_vqrdmulhq_s32(__rev1_334, __noswap_splatq_laneq_s32(__rev2_334, __p3_334))); \ + __ret_334 = __builtin_shufflevector(__ret_334, __ret_334, 3, 2, 1, 0); \ + __ret_334; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshq_laneq_s16(__p0_243, __p1_243, __p2_243, __p3_243) __extension__ ({ \ - int16x8_t __s0_243 = __p0_243; \ - int16x8_t __s1_243 = __p1_243; \ - int16x8_t __s2_243 = __p2_243; \ - int16x8_t __ret_243; \ - __ret_243 = vqsubq_s16(__s0_243, vqrdmulhq_s16(__s1_243, splatq_laneq_s16(__s2_243, __p3_243))); \ - __ret_243; \ +#define vqrdmlshq_laneq_s16(__p0_335, __p1_335, __p2_335, __p3_335) __extension__ ({ \ + int16x8_t __s0_335 = __p0_335; \ + int16x8_t __s1_335 = __p1_335; \ + int16x8_t __s2_335 = __p2_335; \ + int16x8_t __ret_335; \ + __ret_335 = vqsubq_s16(__s0_335, vqrdmulhq_s16(__s1_335, splatq_laneq_s16(__s2_335, __p3_335))); \ + __ret_335; \ }) #else -#define vqrdmlshq_laneq_s16(__p0_244, __p1_244, __p2_244, __p3_244) __extension__ ({ \ - int16x8_t __s0_244 = __p0_244; \ - int16x8_t __s1_244 = __p1_244; \ - int16x8_t __s2_244 = __p2_244; \ - int16x8_t __rev0_244; __rev0_244 = __builtin_shufflevector(__s0_244, __s0_244, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_244; __rev1_244 = __builtin_shufflevector(__s1_244, __s1_244, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_244; __rev2_244 = __builtin_shufflevector(__s2_244, __s2_244, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_244; \ - __ret_244 = __noswap_vqsubq_s16(__rev0_244, __noswap_vqrdmulhq_s16(__rev1_244, __noswap_splatq_laneq_s16(__rev2_244, __p3_244))); \ - __ret_244 = __builtin_shufflevector(__ret_244, __ret_244, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_244; \ +#define vqrdmlshq_laneq_s16(__p0_336, __p1_336, __p2_336, __p3_336) __extension__ ({ \ + int16x8_t __s0_336 = __p0_336; \ + int16x8_t __s1_336 = __p1_336; \ + int16x8_t __s2_336 = __p2_336; \ + int16x8_t __rev0_336; __rev0_336 = __builtin_shufflevector(__s0_336, __s0_336, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_336; __rev1_336 = __builtin_shufflevector(__s1_336, __s1_336, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_336; __rev2_336 = __builtin_shufflevector(__s2_336, __s2_336, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_336; \ + __ret_336 = __noswap_vqsubq_s16(__rev0_336, __noswap_vqrdmulhq_s16(__rev1_336, __noswap_splatq_laneq_s16(__rev2_336, __p3_336))); \ + __ret_336 = __builtin_shufflevector(__ret_336, __ret_336, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_336; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s32(__p0_245, __p1_245, __p2_245, __p3_245) __extension__ ({ \ - int32x2_t __s0_245 = __p0_245; \ - int32x2_t __s1_245 = __p1_245; \ - int32x4_t __s2_245 = __p2_245; \ - int32x2_t __ret_245; \ - __ret_245 = vqsub_s32(__s0_245, vqrdmulh_s32(__s1_245, splat_laneq_s32(__s2_245, __p3_245))); \ - __ret_245; \ +#define vqrdmlsh_laneq_s32(__p0_337, __p1_337, __p2_337, __p3_337) __extension__ ({ \ + int32x2_t __s0_337 = __p0_337; \ + int32x2_t __s1_337 = __p1_337; \ + int32x4_t __s2_337 = __p2_337; \ + int32x2_t __ret_337; \ + __ret_337 = vqsub_s32(__s0_337, vqrdmulh_s32(__s1_337, splat_laneq_s32(__s2_337, __p3_337))); \ + __ret_337; \ }) #else -#define vqrdmlsh_laneq_s32(__p0_246, __p1_246, __p2_246, __p3_246) __extension__ ({ \ - int32x2_t __s0_246 = __p0_246; \ - int32x2_t __s1_246 = __p1_246; \ - int32x4_t __s2_246 = __p2_246; \ - int32x2_t __rev0_246; __rev0_246 = __builtin_shufflevector(__s0_246, __s0_246, 1, 0); \ - int32x2_t __rev1_246; __rev1_246 = __builtin_shufflevector(__s1_246, __s1_246, 1, 0); \ - int32x4_t __rev2_246; __rev2_246 = __builtin_shufflevector(__s2_246, __s2_246, 3, 2, 1, 0); \ - int32x2_t __ret_246; \ - __ret_246 = __noswap_vqsub_s32(__rev0_246, __noswap_vqrdmulh_s32(__rev1_246, __noswap_splat_laneq_s32(__rev2_246, __p3_246))); \ - __ret_246 = __builtin_shufflevector(__ret_246, __ret_246, 1, 0); \ - __ret_246; \ +#define vqrdmlsh_laneq_s32(__p0_338, __p1_338, __p2_338, __p3_338) __extension__ ({ \ + int32x2_t __s0_338 = __p0_338; \ + int32x2_t __s1_338 = __p1_338; \ + int32x4_t __s2_338 = __p2_338; \ + int32x2_t __rev0_338; __rev0_338 = __builtin_shufflevector(__s0_338, __s0_338, 1, 0); \ + int32x2_t __rev1_338; __rev1_338 = __builtin_shufflevector(__s1_338, __s1_338, 1, 0); \ + int32x4_t __rev2_338; __rev2_338 = __builtin_shufflevector(__s2_338, __s2_338, 3, 2, 1, 0); \ + int32x2_t __ret_338; \ + __ret_338 = __noswap_vqsub_s32(__rev0_338, __noswap_vqrdmulh_s32(__rev1_338, __noswap_splat_laneq_s32(__rev2_338, __p3_338))); \ + __ret_338 = __builtin_shufflevector(__ret_338, __ret_338, 1, 0); \ + __ret_338; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlsh_laneq_s16(__p0_247, __p1_247, __p2_247, __p3_247) __extension__ ({ \ - int16x4_t __s0_247 = __p0_247; \ - int16x4_t __s1_247 = __p1_247; \ - int16x8_t __s2_247 = __p2_247; \ - int16x4_t __ret_247; \ - __ret_247 = vqsub_s16(__s0_247, vqrdmulh_s16(__s1_247, splat_laneq_s16(__s2_247, __p3_247))); \ - __ret_247; \ +#define vqrdmlsh_laneq_s16(__p0_339, __p1_339, __p2_339, __p3_339) __extension__ ({ \ + int16x4_t __s0_339 = __p0_339; \ + int16x4_t __s1_339 = __p1_339; \ + int16x8_t __s2_339 = __p2_339; \ + int16x4_t __ret_339; \ + __ret_339 = vqsub_s16(__s0_339, vqrdmulh_s16(__s1_339, splat_laneq_s16(__s2_339, __p3_339))); \ + __ret_339; \ }) #else -#define vqrdmlsh_laneq_s16(__p0_248, __p1_248, __p2_248, __p3_248) __extension__ ({ \ - int16x4_t __s0_248 = __p0_248; \ - int16x4_t __s1_248 = __p1_248; \ - int16x8_t __s2_248 = __p2_248; \ - int16x4_t __rev0_248; __rev0_248 = __builtin_shufflevector(__s0_248, __s0_248, 3, 2, 1, 0); \ - int16x4_t __rev1_248; __rev1_248 = __builtin_shufflevector(__s1_248, __s1_248, 3, 2, 1, 0); \ - int16x8_t __rev2_248; __rev2_248 = __builtin_shufflevector(__s2_248, __s2_248, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_248; \ - __ret_248 = __noswap_vqsub_s16(__rev0_248, __noswap_vqrdmulh_s16(__rev1_248, __noswap_splat_laneq_s16(__rev2_248, __p3_248))); \ - __ret_248 = __builtin_shufflevector(__ret_248, __ret_248, 3, 2, 1, 0); \ - __ret_248; \ +#define vqrdmlsh_laneq_s16(__p0_340, __p1_340, __p2_340, __p3_340) __extension__ ({ \ + int16x4_t __s0_340 = __p0_340; \ + int16x4_t __s1_340 = __p1_340; \ + int16x8_t __s2_340 = __p2_340; \ + int16x4_t __rev0_340; __rev0_340 = __builtin_shufflevector(__s0_340, __s0_340, 3, 2, 1, 0); \ + int16x4_t __rev1_340; __rev1_340 = __builtin_shufflevector(__s1_340, __s1_340, 3, 2, 1, 0); \ + int16x8_t __rev2_340; __rev2_340 = __builtin_shufflevector(__s2_340, __s2_340, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_340; \ + __ret_340 = __noswap_vqsub_s16(__rev0_340, __noswap_vqrdmulh_s16(__rev1_340, __noswap_splat_laneq_s16(__rev2_340, __p3_340))); \ + __ret_340 = __builtin_shufflevector(__ret_340, __ret_340, 3, 2, 1, 0); \ + __ret_340; \ }) #endif @@ -45860,9 +47596,9 @@ __ai uint64_t vceqd_u64(uint64_t __p0, uint64_t __p1) { __ret = (uint64_t) __builtin_neon_vceqd_u64(__p0, __p1); return __ret; } -__ai int64_t vceqd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqd_s64(__p0, __p1); +__ai uint64_t vceqd_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqd_s64(__p0, __p1); return __ret; } __ai uint64_t vceqd_f64(float64_t __p0, float64_t __p1) { @@ -45896,22 +47632,6 @@ __ai uint64x1_t vceqz_p64(poly64x1_t __p0) { __ret = (uint64x1_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 19); return __ret; } -#ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__p0, 17); - return __ret; -} -#else -__ai uint16x4_t vceqz_p16(poly16x4_t __p0) { - poly16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __ret; - __ret = (uint16x4_t) __builtin_neon_vceqz_v((int8x8_t)__rev0, 17); - __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqzq_p8(poly8x16_t __p0) { uint8x16_t __ret; @@ -45944,22 +47664,6 @@ __ai uint64x2_t vceqzq_p64(poly64x2_t __p0) { } #endif -#ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__p0, 49); - return __ret; -} -#else -__ai uint16x8_t vceqzq_p16(poly16x8_t __p0) { - poly16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret; - __ret = (uint16x8_t) __builtin_neon_vceqzq_v((int8x16_t)__rev0, 49); - __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret; -} -#endif - #ifdef __LITTLE_ENDIAN__ __ai uint8x16_t vceqzq_u8(uint8x16_t __p0) { uint8x16_t __ret; @@ -46252,9 +47956,9 @@ __ai uint64_t vceqzd_u64(uint64_t __p0) { __ret = (uint64_t) __builtin_neon_vceqzd_u64(__p0); return __ret; } -__ai int64_t vceqzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vceqzd_s64(__p0); +__ai uint64_t vceqzd_s64(int64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vceqzd_s64(__p0); return __ret; } __ai uint64_t vceqzd_f64(float64_t __p0) { @@ -46333,9 +48037,9 @@ __ai uint64x1_t vcge_s64(int64x1_t __p0, int64x1_t __p1) { __ret = (uint64x1_t)(__p0 >= __p1); return __ret; } -__ai int64_t vcged_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcged_s64(__p0, __p1); +__ai uint64_t vcged_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcged_s64(__p0, __p1); return __ret; } __ai uint64_t vcged_u64(uint64_t __p0, uint64_t __p1) { @@ -46523,9 +48227,9 @@ __ai uint16x4_t vcgez_s16(int16x4_t __p0) { } #endif -__ai int64_t vcgezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgezd_s64(__p0); +__ai uint64_t vcgezd_s64(int64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgezd_s64(__p0); return __ret; } __ai uint64_t vcgezd_f64(float64_t __p0) { @@ -46604,9 +48308,9 @@ __ai uint64x1_t vcgt_s64(int64x1_t __p0, int64x1_t __p1) { __ret = (uint64x1_t)(__p0 > __p1); return __ret; } -__ai int64_t vcgtd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtd_s64(__p0, __p1); +__ai uint64_t vcgtd_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtd_s64(__p0, __p1); return __ret; } __ai uint64_t vcgtd_u64(uint64_t __p0, uint64_t __p1) { @@ -46794,9 +48498,9 @@ __ai uint16x4_t vcgtz_s16(int16x4_t __p0) { } #endif -__ai int64_t vcgtzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcgtzd_s64(__p0); +__ai uint64_t vcgtzd_s64(int64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcgtzd_s64(__p0); return __ret; } __ai uint64_t vcgtzd_f64(float64_t __p0) { @@ -46880,9 +48584,9 @@ __ai uint64_t vcled_u64(uint64_t __p0, uint64_t __p1) { __ret = (uint64_t) __builtin_neon_vcled_u64(__p0, __p1); return __ret; } -__ai int64_t vcled_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcled_s64(__p0, __p1); +__ai uint64_t vcled_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcled_s64(__p0, __p1); return __ret; } __ai uint64_t vcled_f64(float64_t __p0, float64_t __p1) { @@ -47065,9 +48769,9 @@ __ai uint16x4_t vclez_s16(int16x4_t __p0) { } #endif -__ai int64_t vclezd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vclezd_s64(__p0); +__ai uint64_t vclezd_s64(int64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vclezd_s64(__p0); return __ret; } __ai uint64_t vclezd_f64(float64_t __p0) { @@ -47151,9 +48855,9 @@ __ai uint64_t vcltd_u64(uint64_t __p0, uint64_t __p1) { __ret = (uint64_t) __builtin_neon_vcltd_u64(__p0, __p1); return __ret; } -__ai int64_t vcltd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltd_s64(__p0, __p1); +__ai uint64_t vcltd_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltd_s64(__p0, __p1); return __ret; } __ai uint64_t vcltd_f64(float64_t __p0, float64_t __p1) { @@ -47336,9 +49040,9 @@ __ai uint16x4_t vcltz_s16(int16x4_t __p0) { } #endif -__ai int64_t vcltzd_s64(int64_t __p0) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vcltzd_s64(__p0); +__ai uint64_t vcltzd_s64(int64_t __p0) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vcltzd_s64(__p0); return __ret; } __ai uint64_t vcltzd_f64(float64_t __p0) { @@ -47382,892 +49086,892 @@ __ai float64x2_t vcombine_f64(float64x1_t __p0, float64x1_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p8(__p0_249, __p1_249, __p2_249, __p3_249) __extension__ ({ \ - poly8x16_t __s0_249 = __p0_249; \ - poly8x8_t __s2_249 = __p2_249; \ - poly8x16_t __ret_249; \ - __ret_249 = vsetq_lane_p8(vget_lane_p8(__s2_249, __p3_249), __s0_249, __p1_249); \ - __ret_249; \ +#define vcopyq_lane_p8(__p0_341, __p1_341, __p2_341, __p3_341) __extension__ ({ \ + poly8x16_t __s0_341 = __p0_341; \ + poly8x8_t __s2_341 = __p2_341; \ + poly8x16_t __ret_341; \ + __ret_341 = vsetq_lane_p8(vget_lane_p8(__s2_341, __p3_341), __s0_341, __p1_341); \ + __ret_341; \ }) #else -#define vcopyq_lane_p8(__p0_250, __p1_250, __p2_250, __p3_250) __extension__ ({ \ - poly8x16_t __s0_250 = __p0_250; \ - poly8x8_t __s2_250 = __p2_250; \ - poly8x16_t __rev0_250; __rev0_250 = __builtin_shufflevector(__s0_250, __s0_250, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_250; __rev2_250 = __builtin_shufflevector(__s2_250, __s2_250, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_250; \ - __ret_250 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_250, __p3_250), __rev0_250, __p1_250); \ - __ret_250 = __builtin_shufflevector(__ret_250, __ret_250, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_250; \ +#define vcopyq_lane_p8(__p0_342, __p1_342, __p2_342, __p3_342) __extension__ ({ \ + poly8x16_t __s0_342 = __p0_342; \ + poly8x8_t __s2_342 = __p2_342; \ + poly8x16_t __rev0_342; __rev0_342 = __builtin_shufflevector(__s0_342, __s0_342, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev2_342; __rev2_342 = __builtin_shufflevector(__s2_342, __s2_342, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_342; \ + __ret_342 = __noswap_vsetq_lane_p8(__noswap_vget_lane_p8(__rev2_342, __p3_342), __rev0_342, __p1_342); \ + __ret_342 = __builtin_shufflevector(__ret_342, __ret_342, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_342; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p16(__p0_251, __p1_251, __p2_251, __p3_251) __extension__ ({ \ - poly16x8_t __s0_251 = __p0_251; \ - poly16x4_t __s2_251 = __p2_251; \ - poly16x8_t __ret_251; \ - __ret_251 = vsetq_lane_p16(vget_lane_p16(__s2_251, __p3_251), __s0_251, __p1_251); \ - __ret_251; \ +#define vcopyq_lane_p16(__p0_343, __p1_343, __p2_343, __p3_343) __extension__ ({ \ + poly16x8_t __s0_343 = __p0_343; \ + poly16x4_t __s2_343 = __p2_343; \ + poly16x8_t __ret_343; \ + __ret_343 = vsetq_lane_p16(vget_lane_p16(__s2_343, __p3_343), __s0_343, __p1_343); \ + __ret_343; \ }) #else -#define vcopyq_lane_p16(__p0_252, __p1_252, __p2_252, __p3_252) __extension__ ({ \ - poly16x8_t __s0_252 = __p0_252; \ - poly16x4_t __s2_252 = __p2_252; \ - poly16x8_t __rev0_252; __rev0_252 = __builtin_shufflevector(__s0_252, __s0_252, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __rev2_252; __rev2_252 = __builtin_shufflevector(__s2_252, __s2_252, 3, 2, 1, 0); \ - poly16x8_t __ret_252; \ - __ret_252 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_252, __p3_252), __rev0_252, __p1_252); \ - __ret_252 = __builtin_shufflevector(__ret_252, __ret_252, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_252; \ +#define vcopyq_lane_p16(__p0_344, __p1_344, __p2_344, __p3_344) __extension__ ({ \ + poly16x8_t __s0_344 = __p0_344; \ + poly16x4_t __s2_344 = __p2_344; \ + poly16x8_t __rev0_344; __rev0_344 = __builtin_shufflevector(__s0_344, __s0_344, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __rev2_344; __rev2_344 = __builtin_shufflevector(__s2_344, __s2_344, 3, 2, 1, 0); \ + poly16x8_t __ret_344; \ + __ret_344 = __noswap_vsetq_lane_p16(__noswap_vget_lane_p16(__rev2_344, __p3_344), __rev0_344, __p1_344); \ + __ret_344 = __builtin_shufflevector(__ret_344, __ret_344, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_344; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u8(__p0_253, __p1_253, __p2_253, __p3_253) __extension__ ({ \ - uint8x16_t __s0_253 = __p0_253; \ - uint8x8_t __s2_253 = __p2_253; \ - uint8x16_t __ret_253; \ - __ret_253 = vsetq_lane_u8(vget_lane_u8(__s2_253, __p3_253), __s0_253, __p1_253); \ - __ret_253; \ +#define vcopyq_lane_u8(__p0_345, __p1_345, __p2_345, __p3_345) __extension__ ({ \ + uint8x16_t __s0_345 = __p0_345; \ + uint8x8_t __s2_345 = __p2_345; \ + uint8x16_t __ret_345; \ + __ret_345 = vsetq_lane_u8(vget_lane_u8(__s2_345, __p3_345), __s0_345, __p1_345); \ + __ret_345; \ }) #else -#define vcopyq_lane_u8(__p0_254, __p1_254, __p2_254, __p3_254) __extension__ ({ \ - uint8x16_t __s0_254 = __p0_254; \ - uint8x8_t __s2_254 = __p2_254; \ - uint8x16_t __rev0_254; __rev0_254 = __builtin_shufflevector(__s0_254, __s0_254, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_254; __rev2_254 = __builtin_shufflevector(__s2_254, __s2_254, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_254; \ - __ret_254 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_254, __p3_254), __rev0_254, __p1_254); \ - __ret_254 = __builtin_shufflevector(__ret_254, __ret_254, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_254; \ +#define vcopyq_lane_u8(__p0_346, __p1_346, __p2_346, __p3_346) __extension__ ({ \ + uint8x16_t __s0_346 = __p0_346; \ + uint8x8_t __s2_346 = __p2_346; \ + uint8x16_t __rev0_346; __rev0_346 = __builtin_shufflevector(__s0_346, __s0_346, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_346; __rev2_346 = __builtin_shufflevector(__s2_346, __s2_346, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_346; \ + __ret_346 = __noswap_vsetq_lane_u8(__noswap_vget_lane_u8(__rev2_346, __p3_346), __rev0_346, __p1_346); \ + __ret_346 = __builtin_shufflevector(__ret_346, __ret_346, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_346; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u32(__p0_255, __p1_255, __p2_255, __p3_255) __extension__ ({ \ - uint32x4_t __s0_255 = __p0_255; \ - uint32x2_t __s2_255 = __p2_255; \ - uint32x4_t __ret_255; \ - __ret_255 = vsetq_lane_u32(vget_lane_u32(__s2_255, __p3_255), __s0_255, __p1_255); \ - __ret_255; \ +#define vcopyq_lane_u32(__p0_347, __p1_347, __p2_347, __p3_347) __extension__ ({ \ + uint32x4_t __s0_347 = __p0_347; \ + uint32x2_t __s2_347 = __p2_347; \ + uint32x4_t __ret_347; \ + __ret_347 = vsetq_lane_u32(vget_lane_u32(__s2_347, __p3_347), __s0_347, __p1_347); \ + __ret_347; \ }) #else -#define vcopyq_lane_u32(__p0_256, __p1_256, __p2_256, __p3_256) __extension__ ({ \ - uint32x4_t __s0_256 = __p0_256; \ - uint32x2_t __s2_256 = __p2_256; \ - uint32x4_t __rev0_256; __rev0_256 = __builtin_shufflevector(__s0_256, __s0_256, 3, 2, 1, 0); \ - uint32x2_t __rev2_256; __rev2_256 = __builtin_shufflevector(__s2_256, __s2_256, 1, 0); \ - uint32x4_t __ret_256; \ - __ret_256 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_256, __p3_256), __rev0_256, __p1_256); \ - __ret_256 = __builtin_shufflevector(__ret_256, __ret_256, 3, 2, 1, 0); \ - __ret_256; \ +#define vcopyq_lane_u32(__p0_348, __p1_348, __p2_348, __p3_348) __extension__ ({ \ + uint32x4_t __s0_348 = __p0_348; \ + uint32x2_t __s2_348 = __p2_348; \ + uint32x4_t __rev0_348; __rev0_348 = __builtin_shufflevector(__s0_348, __s0_348, 3, 2, 1, 0); \ + uint32x2_t __rev2_348; __rev2_348 = __builtin_shufflevector(__s2_348, __s2_348, 1, 0); \ + uint32x4_t __ret_348; \ + __ret_348 = __noswap_vsetq_lane_u32(__noswap_vget_lane_u32(__rev2_348, __p3_348), __rev0_348, __p1_348); \ + __ret_348 = __builtin_shufflevector(__ret_348, __ret_348, 3, 2, 1, 0); \ + __ret_348; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u64(__p0_257, __p1_257, __p2_257, __p3_257) __extension__ ({ \ - uint64x2_t __s0_257 = __p0_257; \ - uint64x1_t __s2_257 = __p2_257; \ - uint64x2_t __ret_257; \ - __ret_257 = vsetq_lane_u64(vget_lane_u64(__s2_257, __p3_257), __s0_257, __p1_257); \ - __ret_257; \ +#define vcopyq_lane_u64(__p0_349, __p1_349, __p2_349, __p3_349) __extension__ ({ \ + uint64x2_t __s0_349 = __p0_349; \ + uint64x1_t __s2_349 = __p2_349; \ + uint64x2_t __ret_349; \ + __ret_349 = vsetq_lane_u64(vget_lane_u64(__s2_349, __p3_349), __s0_349, __p1_349); \ + __ret_349; \ }) #else -#define vcopyq_lane_u64(__p0_258, __p1_258, __p2_258, __p3_258) __extension__ ({ \ - uint64x2_t __s0_258 = __p0_258; \ - uint64x1_t __s2_258 = __p2_258; \ - uint64x2_t __rev0_258; __rev0_258 = __builtin_shufflevector(__s0_258, __s0_258, 1, 0); \ - uint64x2_t __ret_258; \ - __ret_258 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_258, __p3_258), __rev0_258, __p1_258); \ - __ret_258 = __builtin_shufflevector(__ret_258, __ret_258, 1, 0); \ - __ret_258; \ +#define vcopyq_lane_u64(__p0_350, __p1_350, __p2_350, __p3_350) __extension__ ({ \ + uint64x2_t __s0_350 = __p0_350; \ + uint64x1_t __s2_350 = __p2_350; \ + uint64x2_t __rev0_350; __rev0_350 = __builtin_shufflevector(__s0_350, __s0_350, 1, 0); \ + uint64x2_t __ret_350; \ + __ret_350 = __noswap_vsetq_lane_u64(vget_lane_u64(__s2_350, __p3_350), __rev0_350, __p1_350); \ + __ret_350 = __builtin_shufflevector(__ret_350, __ret_350, 1, 0); \ + __ret_350; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_u16(__p0_259, __p1_259, __p2_259, __p3_259) __extension__ ({ \ - uint16x8_t __s0_259 = __p0_259; \ - uint16x4_t __s2_259 = __p2_259; \ - uint16x8_t __ret_259; \ - __ret_259 = vsetq_lane_u16(vget_lane_u16(__s2_259, __p3_259), __s0_259, __p1_259); \ - __ret_259; \ +#define vcopyq_lane_u16(__p0_351, __p1_351, __p2_351, __p3_351) __extension__ ({ \ + uint16x8_t __s0_351 = __p0_351; \ + uint16x4_t __s2_351 = __p2_351; \ + uint16x8_t __ret_351; \ + __ret_351 = vsetq_lane_u16(vget_lane_u16(__s2_351, __p3_351), __s0_351, __p1_351); \ + __ret_351; \ }) #else -#define vcopyq_lane_u16(__p0_260, __p1_260, __p2_260, __p3_260) __extension__ ({ \ - uint16x8_t __s0_260 = __p0_260; \ - uint16x4_t __s2_260 = __p2_260; \ - uint16x8_t __rev0_260; __rev0_260 = __builtin_shufflevector(__s0_260, __s0_260, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_260; __rev2_260 = __builtin_shufflevector(__s2_260, __s2_260, 3, 2, 1, 0); \ - uint16x8_t __ret_260; \ - __ret_260 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_260, __p3_260), __rev0_260, __p1_260); \ - __ret_260 = __builtin_shufflevector(__ret_260, __ret_260, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_260; \ +#define vcopyq_lane_u16(__p0_352, __p1_352, __p2_352, __p3_352) __extension__ ({ \ + uint16x8_t __s0_352 = __p0_352; \ + uint16x4_t __s2_352 = __p2_352; \ + uint16x8_t __rev0_352; __rev0_352 = __builtin_shufflevector(__s0_352, __s0_352, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2_352; __rev2_352 = __builtin_shufflevector(__s2_352, __s2_352, 3, 2, 1, 0); \ + uint16x8_t __ret_352; \ + __ret_352 = __noswap_vsetq_lane_u16(__noswap_vget_lane_u16(__rev2_352, __p3_352), __rev0_352, __p1_352); \ + __ret_352 = __builtin_shufflevector(__ret_352, __ret_352, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_352; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s8(__p0_261, __p1_261, __p2_261, __p3_261) __extension__ ({ \ - int8x16_t __s0_261 = __p0_261; \ - int8x8_t __s2_261 = __p2_261; \ - int8x16_t __ret_261; \ - __ret_261 = vsetq_lane_s8(vget_lane_s8(__s2_261, __p3_261), __s0_261, __p1_261); \ - __ret_261; \ +#define vcopyq_lane_s8(__p0_353, __p1_353, __p2_353, __p3_353) __extension__ ({ \ + int8x16_t __s0_353 = __p0_353; \ + int8x8_t __s2_353 = __p2_353; \ + int8x16_t __ret_353; \ + __ret_353 = vsetq_lane_s8(vget_lane_s8(__s2_353, __p3_353), __s0_353, __p1_353); \ + __ret_353; \ }) #else -#define vcopyq_lane_s8(__p0_262, __p1_262, __p2_262, __p3_262) __extension__ ({ \ - int8x16_t __s0_262 = __p0_262; \ - int8x8_t __s2_262 = __p2_262; \ - int8x16_t __rev0_262; __rev0_262 = __builtin_shufflevector(__s0_262, __s0_262, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_262; __rev2_262 = __builtin_shufflevector(__s2_262, __s2_262, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_262; \ - __ret_262 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_262, __p3_262), __rev0_262, __p1_262); \ - __ret_262 = __builtin_shufflevector(__ret_262, __ret_262, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_262; \ +#define vcopyq_lane_s8(__p0_354, __p1_354, __p2_354, __p3_354) __extension__ ({ \ + int8x16_t __s0_354 = __p0_354; \ + int8x8_t __s2_354 = __p2_354; \ + int8x16_t __rev0_354; __rev0_354 = __builtin_shufflevector(__s0_354, __s0_354, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_354; __rev2_354 = __builtin_shufflevector(__s2_354, __s2_354, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_354; \ + __ret_354 = __noswap_vsetq_lane_s8(__noswap_vget_lane_s8(__rev2_354, __p3_354), __rev0_354, __p1_354); \ + __ret_354 = __builtin_shufflevector(__ret_354, __ret_354, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_354; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f32(__p0_263, __p1_263, __p2_263, __p3_263) __extension__ ({ \ - float32x4_t __s0_263 = __p0_263; \ - float32x2_t __s2_263 = __p2_263; \ - float32x4_t __ret_263; \ - __ret_263 = vsetq_lane_f32(vget_lane_f32(__s2_263, __p3_263), __s0_263, __p1_263); \ - __ret_263; \ +#define vcopyq_lane_f32(__p0_355, __p1_355, __p2_355, __p3_355) __extension__ ({ \ + float32x4_t __s0_355 = __p0_355; \ + float32x2_t __s2_355 = __p2_355; \ + float32x4_t __ret_355; \ + __ret_355 = vsetq_lane_f32(vget_lane_f32(__s2_355, __p3_355), __s0_355, __p1_355); \ + __ret_355; \ }) #else -#define vcopyq_lane_f32(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ - float32x4_t __s0_264 = __p0_264; \ - float32x2_t __s2_264 = __p2_264; \ - float32x4_t __rev0_264; __rev0_264 = __builtin_shufflevector(__s0_264, __s0_264, 3, 2, 1, 0); \ - float32x2_t __rev2_264; __rev2_264 = __builtin_shufflevector(__s2_264, __s2_264, 1, 0); \ - float32x4_t __ret_264; \ - __ret_264 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_264, __p3_264), __rev0_264, __p1_264); \ - __ret_264 = __builtin_shufflevector(__ret_264, __ret_264, 3, 2, 1, 0); \ - __ret_264; \ +#define vcopyq_lane_f32(__p0_356, __p1_356, __p2_356, __p3_356) __extension__ ({ \ + float32x4_t __s0_356 = __p0_356; \ + float32x2_t __s2_356 = __p2_356; \ + float32x4_t __rev0_356; __rev0_356 = __builtin_shufflevector(__s0_356, __s0_356, 3, 2, 1, 0); \ + float32x2_t __rev2_356; __rev2_356 = __builtin_shufflevector(__s2_356, __s2_356, 1, 0); \ + float32x4_t __ret_356; \ + __ret_356 = __noswap_vsetq_lane_f32(__noswap_vget_lane_f32(__rev2_356, __p3_356), __rev0_356, __p1_356); \ + __ret_356 = __builtin_shufflevector(__ret_356, __ret_356, 3, 2, 1, 0); \ + __ret_356; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s32(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ - int32x4_t __s0_265 = __p0_265; \ - int32x2_t __s2_265 = __p2_265; \ - int32x4_t __ret_265; \ - __ret_265 = vsetq_lane_s32(vget_lane_s32(__s2_265, __p3_265), __s0_265, __p1_265); \ - __ret_265; \ +#define vcopyq_lane_s32(__p0_357, __p1_357, __p2_357, __p3_357) __extension__ ({ \ + int32x4_t __s0_357 = __p0_357; \ + int32x2_t __s2_357 = __p2_357; \ + int32x4_t __ret_357; \ + __ret_357 = vsetq_lane_s32(vget_lane_s32(__s2_357, __p3_357), __s0_357, __p1_357); \ + __ret_357; \ }) #else -#define vcopyq_lane_s32(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ - int32x4_t __s0_266 = __p0_266; \ - int32x2_t __s2_266 = __p2_266; \ - int32x4_t __rev0_266; __rev0_266 = __builtin_shufflevector(__s0_266, __s0_266, 3, 2, 1, 0); \ - int32x2_t __rev2_266; __rev2_266 = __builtin_shufflevector(__s2_266, __s2_266, 1, 0); \ - int32x4_t __ret_266; \ - __ret_266 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_266, __p3_266), __rev0_266, __p1_266); \ - __ret_266 = __builtin_shufflevector(__ret_266, __ret_266, 3, 2, 1, 0); \ - __ret_266; \ +#define vcopyq_lane_s32(__p0_358, __p1_358, __p2_358, __p3_358) __extension__ ({ \ + int32x4_t __s0_358 = __p0_358; \ + int32x2_t __s2_358 = __p2_358; \ + int32x4_t __rev0_358; __rev0_358 = __builtin_shufflevector(__s0_358, __s0_358, 3, 2, 1, 0); \ + int32x2_t __rev2_358; __rev2_358 = __builtin_shufflevector(__s2_358, __s2_358, 1, 0); \ + int32x4_t __ret_358; \ + __ret_358 = __noswap_vsetq_lane_s32(__noswap_vget_lane_s32(__rev2_358, __p3_358), __rev0_358, __p1_358); \ + __ret_358 = __builtin_shufflevector(__ret_358, __ret_358, 3, 2, 1, 0); \ + __ret_358; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s64(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ - int64x2_t __s0_267 = __p0_267; \ - int64x1_t __s2_267 = __p2_267; \ - int64x2_t __ret_267; \ - __ret_267 = vsetq_lane_s64(vget_lane_s64(__s2_267, __p3_267), __s0_267, __p1_267); \ - __ret_267; \ +#define vcopyq_lane_s64(__p0_359, __p1_359, __p2_359, __p3_359) __extension__ ({ \ + int64x2_t __s0_359 = __p0_359; \ + int64x1_t __s2_359 = __p2_359; \ + int64x2_t __ret_359; \ + __ret_359 = vsetq_lane_s64(vget_lane_s64(__s2_359, __p3_359), __s0_359, __p1_359); \ + __ret_359; \ }) #else -#define vcopyq_lane_s64(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ - int64x2_t __s0_268 = __p0_268; \ - int64x1_t __s2_268 = __p2_268; \ - int64x2_t __rev0_268; __rev0_268 = __builtin_shufflevector(__s0_268, __s0_268, 1, 0); \ - int64x2_t __ret_268; \ - __ret_268 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_268, __p3_268), __rev0_268, __p1_268); \ - __ret_268 = __builtin_shufflevector(__ret_268, __ret_268, 1, 0); \ - __ret_268; \ +#define vcopyq_lane_s64(__p0_360, __p1_360, __p2_360, __p3_360) __extension__ ({ \ + int64x2_t __s0_360 = __p0_360; \ + int64x1_t __s2_360 = __p2_360; \ + int64x2_t __rev0_360; __rev0_360 = __builtin_shufflevector(__s0_360, __s0_360, 1, 0); \ + int64x2_t __ret_360; \ + __ret_360 = __noswap_vsetq_lane_s64(vget_lane_s64(__s2_360, __p3_360), __rev0_360, __p1_360); \ + __ret_360 = __builtin_shufflevector(__ret_360, __ret_360, 1, 0); \ + __ret_360; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_s16(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ - int16x8_t __s0_269 = __p0_269; \ - int16x4_t __s2_269 = __p2_269; \ - int16x8_t __ret_269; \ - __ret_269 = vsetq_lane_s16(vget_lane_s16(__s2_269, __p3_269), __s0_269, __p1_269); \ - __ret_269; \ +#define vcopyq_lane_s16(__p0_361, __p1_361, __p2_361, __p3_361) __extension__ ({ \ + int16x8_t __s0_361 = __p0_361; \ + int16x4_t __s2_361 = __p2_361; \ + int16x8_t __ret_361; \ + __ret_361 = vsetq_lane_s16(vget_lane_s16(__s2_361, __p3_361), __s0_361, __p1_361); \ + __ret_361; \ }) #else -#define vcopyq_lane_s16(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ - int16x8_t __s0_270 = __p0_270; \ - int16x4_t __s2_270 = __p2_270; \ - int16x8_t __rev0_270; __rev0_270 = __builtin_shufflevector(__s0_270, __s0_270, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_270; __rev2_270 = __builtin_shufflevector(__s2_270, __s2_270, 3, 2, 1, 0); \ - int16x8_t __ret_270; \ - __ret_270 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_270, __p3_270), __rev0_270, __p1_270); \ - __ret_270 = __builtin_shufflevector(__ret_270, __ret_270, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_270; \ +#define vcopyq_lane_s16(__p0_362, __p1_362, __p2_362, __p3_362) __extension__ ({ \ + int16x8_t __s0_362 = __p0_362; \ + int16x4_t __s2_362 = __p2_362; \ + int16x8_t __rev0_362; __rev0_362 = __builtin_shufflevector(__s0_362, __s0_362, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_362; __rev2_362 = __builtin_shufflevector(__s2_362, __s2_362, 3, 2, 1, 0); \ + int16x8_t __ret_362; \ + __ret_362 = __noswap_vsetq_lane_s16(__noswap_vget_lane_s16(__rev2_362, __p3_362), __rev0_362, __p1_362); \ + __ret_362 = __builtin_shufflevector(__ret_362, __ret_362, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_362; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p8(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ - poly8x8_t __s0_271 = __p0_271; \ - poly8x8_t __s2_271 = __p2_271; \ - poly8x8_t __ret_271; \ - __ret_271 = vset_lane_p8(vget_lane_p8(__s2_271, __p3_271), __s0_271, __p1_271); \ - __ret_271; \ +#define vcopy_lane_p8(__p0_363, __p1_363, __p2_363, __p3_363) __extension__ ({ \ + poly8x8_t __s0_363 = __p0_363; \ + poly8x8_t __s2_363 = __p2_363; \ + poly8x8_t __ret_363; \ + __ret_363 = vset_lane_p8(vget_lane_p8(__s2_363, __p3_363), __s0_363, __p1_363); \ + __ret_363; \ }) #else -#define vcopy_lane_p8(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ - poly8x8_t __s0_272 = __p0_272; \ - poly8x8_t __s2_272 = __p2_272; \ - poly8x8_t __rev0_272; __rev0_272 = __builtin_shufflevector(__s0_272, __s0_272, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __rev2_272; __rev2_272 = __builtin_shufflevector(__s2_272, __s2_272, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_272; \ - __ret_272 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_272, __p3_272), __rev0_272, __p1_272); \ - __ret_272 = __builtin_shufflevector(__ret_272, __ret_272, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_272; \ +#define vcopy_lane_p8(__p0_364, __p1_364, __p2_364, __p3_364) __extension__ ({ \ + poly8x8_t __s0_364 = __p0_364; \ + poly8x8_t __s2_364 = __p2_364; \ + poly8x8_t __rev0_364; __rev0_364 = __builtin_shufflevector(__s0_364, __s0_364, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __rev2_364; __rev2_364 = __builtin_shufflevector(__s2_364, __s2_364, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_364; \ + __ret_364 = __noswap_vset_lane_p8(__noswap_vget_lane_p8(__rev2_364, __p3_364), __rev0_364, __p1_364); \ + __ret_364 = __builtin_shufflevector(__ret_364, __ret_364, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_364; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_p16(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ - poly16x4_t __s0_273 = __p0_273; \ - poly16x4_t __s2_273 = __p2_273; \ - poly16x4_t __ret_273; \ - __ret_273 = vset_lane_p16(vget_lane_p16(__s2_273, __p3_273), __s0_273, __p1_273); \ - __ret_273; \ +#define vcopy_lane_p16(__p0_365, __p1_365, __p2_365, __p3_365) __extension__ ({ \ + poly16x4_t __s0_365 = __p0_365; \ + poly16x4_t __s2_365 = __p2_365; \ + poly16x4_t __ret_365; \ + __ret_365 = vset_lane_p16(vget_lane_p16(__s2_365, __p3_365), __s0_365, __p1_365); \ + __ret_365; \ }) #else -#define vcopy_lane_p16(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ - poly16x4_t __s0_274 = __p0_274; \ - poly16x4_t __s2_274 = __p2_274; \ - poly16x4_t __rev0_274; __rev0_274 = __builtin_shufflevector(__s0_274, __s0_274, 3, 2, 1, 0); \ - poly16x4_t __rev2_274; __rev2_274 = __builtin_shufflevector(__s2_274, __s2_274, 3, 2, 1, 0); \ - poly16x4_t __ret_274; \ - __ret_274 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_274, __p3_274), __rev0_274, __p1_274); \ - __ret_274 = __builtin_shufflevector(__ret_274, __ret_274, 3, 2, 1, 0); \ - __ret_274; \ +#define vcopy_lane_p16(__p0_366, __p1_366, __p2_366, __p3_366) __extension__ ({ \ + poly16x4_t __s0_366 = __p0_366; \ + poly16x4_t __s2_366 = __p2_366; \ + poly16x4_t __rev0_366; __rev0_366 = __builtin_shufflevector(__s0_366, __s0_366, 3, 2, 1, 0); \ + poly16x4_t __rev2_366; __rev2_366 = __builtin_shufflevector(__s2_366, __s2_366, 3, 2, 1, 0); \ + poly16x4_t __ret_366; \ + __ret_366 = __noswap_vset_lane_p16(__noswap_vget_lane_p16(__rev2_366, __p3_366), __rev0_366, __p1_366); \ + __ret_366 = __builtin_shufflevector(__ret_366, __ret_366, 3, 2, 1, 0); \ + __ret_366; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u8(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ - uint8x8_t __s0_275 = __p0_275; \ - uint8x8_t __s2_275 = __p2_275; \ - uint8x8_t __ret_275; \ - __ret_275 = vset_lane_u8(vget_lane_u8(__s2_275, __p3_275), __s0_275, __p1_275); \ - __ret_275; \ +#define vcopy_lane_u8(__p0_367, __p1_367, __p2_367, __p3_367) __extension__ ({ \ + uint8x8_t __s0_367 = __p0_367; \ + uint8x8_t __s2_367 = __p2_367; \ + uint8x8_t __ret_367; \ + __ret_367 = vset_lane_u8(vget_lane_u8(__s2_367, __p3_367), __s0_367, __p1_367); \ + __ret_367; \ }) #else -#define vcopy_lane_u8(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ - uint8x8_t __s0_276 = __p0_276; \ - uint8x8_t __s2_276 = __p2_276; \ - uint8x8_t __rev0_276; __rev0_276 = __builtin_shufflevector(__s0_276, __s0_276, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_276; __rev2_276 = __builtin_shufflevector(__s2_276, __s2_276, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_276; \ - __ret_276 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_276, __p3_276), __rev0_276, __p1_276); \ - __ret_276 = __builtin_shufflevector(__ret_276, __ret_276, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_276; \ +#define vcopy_lane_u8(__p0_368, __p1_368, __p2_368, __p3_368) __extension__ ({ \ + uint8x8_t __s0_368 = __p0_368; \ + uint8x8_t __s2_368 = __p2_368; \ + uint8x8_t __rev0_368; __rev0_368 = __builtin_shufflevector(__s0_368, __s0_368, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_368; __rev2_368 = __builtin_shufflevector(__s2_368, __s2_368, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_368; \ + __ret_368 = __noswap_vset_lane_u8(__noswap_vget_lane_u8(__rev2_368, __p3_368), __rev0_368, __p1_368); \ + __ret_368 = __builtin_shufflevector(__ret_368, __ret_368, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_368; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u32(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ - uint32x2_t __s0_277 = __p0_277; \ - uint32x2_t __s2_277 = __p2_277; \ - uint32x2_t __ret_277; \ - __ret_277 = vset_lane_u32(vget_lane_u32(__s2_277, __p3_277), __s0_277, __p1_277); \ - __ret_277; \ +#define vcopy_lane_u32(__p0_369, __p1_369, __p2_369, __p3_369) __extension__ ({ \ + uint32x2_t __s0_369 = __p0_369; \ + uint32x2_t __s2_369 = __p2_369; \ + uint32x2_t __ret_369; \ + __ret_369 = vset_lane_u32(vget_lane_u32(__s2_369, __p3_369), __s0_369, __p1_369); \ + __ret_369; \ }) #else -#define vcopy_lane_u32(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ - uint32x2_t __s0_278 = __p0_278; \ - uint32x2_t __s2_278 = __p2_278; \ - uint32x2_t __rev0_278; __rev0_278 = __builtin_shufflevector(__s0_278, __s0_278, 1, 0); \ - uint32x2_t __rev2_278; __rev2_278 = __builtin_shufflevector(__s2_278, __s2_278, 1, 0); \ - uint32x2_t __ret_278; \ - __ret_278 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_278, __p3_278), __rev0_278, __p1_278); \ - __ret_278 = __builtin_shufflevector(__ret_278, __ret_278, 1, 0); \ - __ret_278; \ +#define vcopy_lane_u32(__p0_370, __p1_370, __p2_370, __p3_370) __extension__ ({ \ + uint32x2_t __s0_370 = __p0_370; \ + uint32x2_t __s2_370 = __p2_370; \ + uint32x2_t __rev0_370; __rev0_370 = __builtin_shufflevector(__s0_370, __s0_370, 1, 0); \ + uint32x2_t __rev2_370; __rev2_370 = __builtin_shufflevector(__s2_370, __s2_370, 1, 0); \ + uint32x2_t __ret_370; \ + __ret_370 = __noswap_vset_lane_u32(__noswap_vget_lane_u32(__rev2_370, __p3_370), __rev0_370, __p1_370); \ + __ret_370 = __builtin_shufflevector(__ret_370, __ret_370, 1, 0); \ + __ret_370; \ }) #endif -#define vcopy_lane_u64(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ - uint64x1_t __s0_279 = __p0_279; \ - uint64x1_t __s2_279 = __p2_279; \ - uint64x1_t __ret_279; \ - __ret_279 = vset_lane_u64(vget_lane_u64(__s2_279, __p3_279), __s0_279, __p1_279); \ - __ret_279; \ +#define vcopy_lane_u64(__p0_371, __p1_371, __p2_371, __p3_371) __extension__ ({ \ + uint64x1_t __s0_371 = __p0_371; \ + uint64x1_t __s2_371 = __p2_371; \ + uint64x1_t __ret_371; \ + __ret_371 = vset_lane_u64(vget_lane_u64(__s2_371, __p3_371), __s0_371, __p1_371); \ + __ret_371; \ }) #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_u16(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ - uint16x4_t __s0_280 = __p0_280; \ - uint16x4_t __s2_280 = __p2_280; \ - uint16x4_t __ret_280; \ - __ret_280 = vset_lane_u16(vget_lane_u16(__s2_280, __p3_280), __s0_280, __p1_280); \ - __ret_280; \ +#define vcopy_lane_u16(__p0_372, __p1_372, __p2_372, __p3_372) __extension__ ({ \ + uint16x4_t __s0_372 = __p0_372; \ + uint16x4_t __s2_372 = __p2_372; \ + uint16x4_t __ret_372; \ + __ret_372 = vset_lane_u16(vget_lane_u16(__s2_372, __p3_372), __s0_372, __p1_372); \ + __ret_372; \ }) #else -#define vcopy_lane_u16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ - uint16x4_t __s0_281 = __p0_281; \ - uint16x4_t __s2_281 = __p2_281; \ - uint16x4_t __rev0_281; __rev0_281 = __builtin_shufflevector(__s0_281, __s0_281, 3, 2, 1, 0); \ - uint16x4_t __rev2_281; __rev2_281 = __builtin_shufflevector(__s2_281, __s2_281, 3, 2, 1, 0); \ - uint16x4_t __ret_281; \ - __ret_281 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_281, __p3_281), __rev0_281, __p1_281); \ - __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, 3, 2, 1, 0); \ - __ret_281; \ +#define vcopy_lane_u16(__p0_373, __p1_373, __p2_373, __p3_373) __extension__ ({ \ + uint16x4_t __s0_373 = __p0_373; \ + uint16x4_t __s2_373 = __p2_373; \ + uint16x4_t __rev0_373; __rev0_373 = __builtin_shufflevector(__s0_373, __s0_373, 3, 2, 1, 0); \ + uint16x4_t __rev2_373; __rev2_373 = __builtin_shufflevector(__s2_373, __s2_373, 3, 2, 1, 0); \ + uint16x4_t __ret_373; \ + __ret_373 = __noswap_vset_lane_u16(__noswap_vget_lane_u16(__rev2_373, __p3_373), __rev0_373, __p1_373); \ + __ret_373 = __builtin_shufflevector(__ret_373, __ret_373, 3, 2, 1, 0); \ + __ret_373; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s8(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ - int8x8_t __s0_282 = __p0_282; \ - int8x8_t __s2_282 = __p2_282; \ - int8x8_t __ret_282; \ - __ret_282 = vset_lane_s8(vget_lane_s8(__s2_282, __p3_282), __s0_282, __p1_282); \ - __ret_282; \ +#define vcopy_lane_s8(__p0_374, __p1_374, __p2_374, __p3_374) __extension__ ({ \ + int8x8_t __s0_374 = __p0_374; \ + int8x8_t __s2_374 = __p2_374; \ + int8x8_t __ret_374; \ + __ret_374 = vset_lane_s8(vget_lane_s8(__s2_374, __p3_374), __s0_374, __p1_374); \ + __ret_374; \ }) #else -#define vcopy_lane_s8(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ - int8x8_t __s0_283 = __p0_283; \ - int8x8_t __s2_283 = __p2_283; \ - int8x8_t __rev0_283; __rev0_283 = __builtin_shufflevector(__s0_283, __s0_283, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_283; \ - __ret_283 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_283, __p3_283), __rev0_283, __p1_283); \ - __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_283; \ +#define vcopy_lane_s8(__p0_375, __p1_375, __p2_375, __p3_375) __extension__ ({ \ + int8x8_t __s0_375 = __p0_375; \ + int8x8_t __s2_375 = __p2_375; \ + int8x8_t __rev0_375; __rev0_375 = __builtin_shufflevector(__s0_375, __s0_375, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __rev2_375; __rev2_375 = __builtin_shufflevector(__s2_375, __s2_375, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_375; \ + __ret_375 = __noswap_vset_lane_s8(__noswap_vget_lane_s8(__rev2_375, __p3_375), __rev0_375, __p1_375); \ + __ret_375 = __builtin_shufflevector(__ret_375, __ret_375, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_375; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_f32(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ - float32x2_t __s0_284 = __p0_284; \ - float32x2_t __s2_284 = __p2_284; \ - float32x2_t __ret_284; \ - __ret_284 = vset_lane_f32(vget_lane_f32(__s2_284, __p3_284), __s0_284, __p1_284); \ - __ret_284; \ +#define vcopy_lane_f32(__p0_376, __p1_376, __p2_376, __p3_376) __extension__ ({ \ + float32x2_t __s0_376 = __p0_376; \ + float32x2_t __s2_376 = __p2_376; \ + float32x2_t __ret_376; \ + __ret_376 = vset_lane_f32(vget_lane_f32(__s2_376, __p3_376), __s0_376, __p1_376); \ + __ret_376; \ }) #else -#define vcopy_lane_f32(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ - float32x2_t __s0_285 = __p0_285; \ - float32x2_t __s2_285 = __p2_285; \ - float32x2_t __rev0_285; __rev0_285 = __builtin_shufflevector(__s0_285, __s0_285, 1, 0); \ - float32x2_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, 1, 0); \ - float32x2_t __ret_285; \ - __ret_285 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_285, __p3_285), __rev0_285, __p1_285); \ - __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, 1, 0); \ - __ret_285; \ +#define vcopy_lane_f32(__p0_377, __p1_377, __p2_377, __p3_377) __extension__ ({ \ + float32x2_t __s0_377 = __p0_377; \ + float32x2_t __s2_377 = __p2_377; \ + float32x2_t __rev0_377; __rev0_377 = __builtin_shufflevector(__s0_377, __s0_377, 1, 0); \ + float32x2_t __rev2_377; __rev2_377 = __builtin_shufflevector(__s2_377, __s2_377, 1, 0); \ + float32x2_t __ret_377; \ + __ret_377 = __noswap_vset_lane_f32(__noswap_vget_lane_f32(__rev2_377, __p3_377), __rev0_377, __p1_377); \ + __ret_377 = __builtin_shufflevector(__ret_377, __ret_377, 1, 0); \ + __ret_377; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s32(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ - int32x2_t __s0_286 = __p0_286; \ - int32x2_t __s2_286 = __p2_286; \ - int32x2_t __ret_286; \ - __ret_286 = vset_lane_s32(vget_lane_s32(__s2_286, __p3_286), __s0_286, __p1_286); \ - __ret_286; \ +#define vcopy_lane_s32(__p0_378, __p1_378, __p2_378, __p3_378) __extension__ ({ \ + int32x2_t __s0_378 = __p0_378; \ + int32x2_t __s2_378 = __p2_378; \ + int32x2_t __ret_378; \ + __ret_378 = vset_lane_s32(vget_lane_s32(__s2_378, __p3_378), __s0_378, __p1_378); \ + __ret_378; \ }) #else -#define vcopy_lane_s32(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ - int32x2_t __s0_287 = __p0_287; \ - int32x2_t __s2_287 = __p2_287; \ - int32x2_t __rev0_287; __rev0_287 = __builtin_shufflevector(__s0_287, __s0_287, 1, 0); \ - int32x2_t __rev2_287; __rev2_287 = __builtin_shufflevector(__s2_287, __s2_287, 1, 0); \ - int32x2_t __ret_287; \ - __ret_287 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_287, __p3_287), __rev0_287, __p1_287); \ - __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, 1, 0); \ - __ret_287; \ +#define vcopy_lane_s32(__p0_379, __p1_379, __p2_379, __p3_379) __extension__ ({ \ + int32x2_t __s0_379 = __p0_379; \ + int32x2_t __s2_379 = __p2_379; \ + int32x2_t __rev0_379; __rev0_379 = __builtin_shufflevector(__s0_379, __s0_379, 1, 0); \ + int32x2_t __rev2_379; __rev2_379 = __builtin_shufflevector(__s2_379, __s2_379, 1, 0); \ + int32x2_t __ret_379; \ + __ret_379 = __noswap_vset_lane_s32(__noswap_vget_lane_s32(__rev2_379, __p3_379), __rev0_379, __p1_379); \ + __ret_379 = __builtin_shufflevector(__ret_379, __ret_379, 1, 0); \ + __ret_379; \ }) #endif -#define vcopy_lane_s64(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ - int64x1_t __s0_288 = __p0_288; \ - int64x1_t __s2_288 = __p2_288; \ - int64x1_t __ret_288; \ - __ret_288 = vset_lane_s64(vget_lane_s64(__s2_288, __p3_288), __s0_288, __p1_288); \ - __ret_288; \ +#define vcopy_lane_s64(__p0_380, __p1_380, __p2_380, __p3_380) __extension__ ({ \ + int64x1_t __s0_380 = __p0_380; \ + int64x1_t __s2_380 = __p2_380; \ + int64x1_t __ret_380; \ + __ret_380 = vset_lane_s64(vget_lane_s64(__s2_380, __p3_380), __s0_380, __p1_380); \ + __ret_380; \ }) #ifdef __LITTLE_ENDIAN__ -#define vcopy_lane_s16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ - int16x4_t __s0_289 = __p0_289; \ - int16x4_t __s2_289 = __p2_289; \ - int16x4_t __ret_289; \ - __ret_289 = vset_lane_s16(vget_lane_s16(__s2_289, __p3_289), __s0_289, __p1_289); \ - __ret_289; \ +#define vcopy_lane_s16(__p0_381, __p1_381, __p2_381, __p3_381) __extension__ ({ \ + int16x4_t __s0_381 = __p0_381; \ + int16x4_t __s2_381 = __p2_381; \ + int16x4_t __ret_381; \ + __ret_381 = vset_lane_s16(vget_lane_s16(__s2_381, __p3_381), __s0_381, __p1_381); \ + __ret_381; \ }) #else -#define vcopy_lane_s16(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ - int16x4_t __s0_290 = __p0_290; \ - int16x4_t __s2_290 = __p2_290; \ - int16x4_t __rev0_290; __rev0_290 = __builtin_shufflevector(__s0_290, __s0_290, 3, 2, 1, 0); \ - int16x4_t __rev2_290; __rev2_290 = __builtin_shufflevector(__s2_290, __s2_290, 3, 2, 1, 0); \ - int16x4_t __ret_290; \ - __ret_290 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_290, __p3_290), __rev0_290, __p1_290); \ - __ret_290 = __builtin_shufflevector(__ret_290, __ret_290, 3, 2, 1, 0); \ - __ret_290; \ +#define vcopy_lane_s16(__p0_382, __p1_382, __p2_382, __p3_382) __extension__ ({ \ + int16x4_t __s0_382 = __p0_382; \ + int16x4_t __s2_382 = __p2_382; \ + int16x4_t __rev0_382; __rev0_382 = __builtin_shufflevector(__s0_382, __s0_382, 3, 2, 1, 0); \ + int16x4_t __rev2_382; __rev2_382 = __builtin_shufflevector(__s2_382, __s2_382, 3, 2, 1, 0); \ + int16x4_t __ret_382; \ + __ret_382 = __noswap_vset_lane_s16(__noswap_vget_lane_s16(__rev2_382, __p3_382), __rev0_382, __p1_382); \ + __ret_382 = __builtin_shufflevector(__ret_382, __ret_382, 3, 2, 1, 0); \ + __ret_382; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p8(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ - poly8x16_t __s0_291 = __p0_291; \ - poly8x16_t __s2_291 = __p2_291; \ - poly8x16_t __ret_291; \ - __ret_291 = vsetq_lane_p8(vgetq_lane_p8(__s2_291, __p3_291), __s0_291, __p1_291); \ - __ret_291; \ +#define vcopyq_laneq_p8(__p0_383, __p1_383, __p2_383, __p3_383) __extension__ ({ \ + poly8x16_t __s0_383 = __p0_383; \ + poly8x16_t __s2_383 = __p2_383; \ + poly8x16_t __ret_383; \ + __ret_383 = vsetq_lane_p8(vgetq_lane_p8(__s2_383, __p3_383), __s0_383, __p1_383); \ + __ret_383; \ }) #else -#define vcopyq_laneq_p8(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ - poly8x16_t __s0_292 = __p0_292; \ - poly8x16_t __s2_292 = __p2_292; \ - poly8x16_t __rev0_292; __rev0_292 = __builtin_shufflevector(__s0_292, __s0_292, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_292; __rev2_292 = __builtin_shufflevector(__s2_292, __s2_292, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_292; \ - __ret_292 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_292, __p3_292), __rev0_292, __p1_292); \ - __ret_292 = __builtin_shufflevector(__ret_292, __ret_292, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_292; \ +#define vcopyq_laneq_p8(__p0_384, __p1_384, __p2_384, __p3_384) __extension__ ({ \ + poly8x16_t __s0_384 = __p0_384; \ + poly8x16_t __s2_384 = __p2_384; \ + poly8x16_t __rev0_384; __rev0_384 = __builtin_shufflevector(__s0_384, __s0_384, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_384; __rev2_384 = __builtin_shufflevector(__s2_384, __s2_384, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_384; \ + __ret_384 = __noswap_vsetq_lane_p8(__noswap_vgetq_lane_p8(__rev2_384, __p3_384), __rev0_384, __p1_384); \ + __ret_384 = __builtin_shufflevector(__ret_384, __ret_384, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_384; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p16(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ - poly16x8_t __s0_293 = __p0_293; \ - poly16x8_t __s2_293 = __p2_293; \ - poly16x8_t __ret_293; \ - __ret_293 = vsetq_lane_p16(vgetq_lane_p16(__s2_293, __p3_293), __s0_293, __p1_293); \ - __ret_293; \ +#define vcopyq_laneq_p16(__p0_385, __p1_385, __p2_385, __p3_385) __extension__ ({ \ + poly16x8_t __s0_385 = __p0_385; \ + poly16x8_t __s2_385 = __p2_385; \ + poly16x8_t __ret_385; \ + __ret_385 = vsetq_lane_p16(vgetq_lane_p16(__s2_385, __p3_385), __s0_385, __p1_385); \ + __ret_385; \ }) #else -#define vcopyq_laneq_p16(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ - poly16x8_t __s0_294 = __p0_294; \ - poly16x8_t __s2_294 = __p2_294; \ - poly16x8_t __rev0_294; __rev0_294 = __builtin_shufflevector(__s0_294, __s0_294, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __rev2_294; __rev2_294 = __builtin_shufflevector(__s2_294, __s2_294, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret_294; \ - __ret_294 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_294, __p3_294), __rev0_294, __p1_294); \ - __ret_294 = __builtin_shufflevector(__ret_294, __ret_294, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_294; \ +#define vcopyq_laneq_p16(__p0_386, __p1_386, __p2_386, __p3_386) __extension__ ({ \ + poly16x8_t __s0_386 = __p0_386; \ + poly16x8_t __s2_386 = __p2_386; \ + poly16x8_t __rev0_386; __rev0_386 = __builtin_shufflevector(__s0_386, __s0_386, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __rev2_386; __rev2_386 = __builtin_shufflevector(__s2_386, __s2_386, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret_386; \ + __ret_386 = __noswap_vsetq_lane_p16(__noswap_vgetq_lane_p16(__rev2_386, __p3_386), __rev0_386, __p1_386); \ + __ret_386 = __builtin_shufflevector(__ret_386, __ret_386, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_386; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u8(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ - uint8x16_t __s0_295 = __p0_295; \ - uint8x16_t __s2_295 = __p2_295; \ - uint8x16_t __ret_295; \ - __ret_295 = vsetq_lane_u8(vgetq_lane_u8(__s2_295, __p3_295), __s0_295, __p1_295); \ - __ret_295; \ +#define vcopyq_laneq_u8(__p0_387, __p1_387, __p2_387, __p3_387) __extension__ ({ \ + uint8x16_t __s0_387 = __p0_387; \ + uint8x16_t __s2_387 = __p2_387; \ + uint8x16_t __ret_387; \ + __ret_387 = vsetq_lane_u8(vgetq_lane_u8(__s2_387, __p3_387), __s0_387, __p1_387); \ + __ret_387; \ }) #else -#define vcopyq_laneq_u8(__p0_296, __p1_296, __p2_296, __p3_296) __extension__ ({ \ - uint8x16_t __s0_296 = __p0_296; \ - uint8x16_t __s2_296 = __p2_296; \ - uint8x16_t __rev0_296; __rev0_296 = __builtin_shufflevector(__s0_296, __s0_296, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_296; __rev2_296 = __builtin_shufflevector(__s2_296, __s2_296, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_296; \ - __ret_296 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_296, __p3_296), __rev0_296, __p1_296); \ - __ret_296 = __builtin_shufflevector(__ret_296, __ret_296, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_296; \ +#define vcopyq_laneq_u8(__p0_388, __p1_388, __p2_388, __p3_388) __extension__ ({ \ + uint8x16_t __s0_388 = __p0_388; \ + uint8x16_t __s2_388 = __p2_388; \ + uint8x16_t __rev0_388; __rev0_388 = __builtin_shufflevector(__s0_388, __s0_388, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_388; __rev2_388 = __builtin_shufflevector(__s2_388, __s2_388, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_388; \ + __ret_388 = __noswap_vsetq_lane_u8(__noswap_vgetq_lane_u8(__rev2_388, __p3_388), __rev0_388, __p1_388); \ + __ret_388 = __builtin_shufflevector(__ret_388, __ret_388, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_388; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u32(__p0_297, __p1_297, __p2_297, __p3_297) __extension__ ({ \ - uint32x4_t __s0_297 = __p0_297; \ - uint32x4_t __s2_297 = __p2_297; \ - uint32x4_t __ret_297; \ - __ret_297 = vsetq_lane_u32(vgetq_lane_u32(__s2_297, __p3_297), __s0_297, __p1_297); \ - __ret_297; \ +#define vcopyq_laneq_u32(__p0_389, __p1_389, __p2_389, __p3_389) __extension__ ({ \ + uint32x4_t __s0_389 = __p0_389; \ + uint32x4_t __s2_389 = __p2_389; \ + uint32x4_t __ret_389; \ + __ret_389 = vsetq_lane_u32(vgetq_lane_u32(__s2_389, __p3_389), __s0_389, __p1_389); \ + __ret_389; \ }) #else -#define vcopyq_laneq_u32(__p0_298, __p1_298, __p2_298, __p3_298) __extension__ ({ \ - uint32x4_t __s0_298 = __p0_298; \ - uint32x4_t __s2_298 = __p2_298; \ - uint32x4_t __rev0_298; __rev0_298 = __builtin_shufflevector(__s0_298, __s0_298, 3, 2, 1, 0); \ - uint32x4_t __rev2_298; __rev2_298 = __builtin_shufflevector(__s2_298, __s2_298, 3, 2, 1, 0); \ - uint32x4_t __ret_298; \ - __ret_298 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_298, __p3_298), __rev0_298, __p1_298); \ - __ret_298 = __builtin_shufflevector(__ret_298, __ret_298, 3, 2, 1, 0); \ - __ret_298; \ +#define vcopyq_laneq_u32(__p0_390, __p1_390, __p2_390, __p3_390) __extension__ ({ \ + uint32x4_t __s0_390 = __p0_390; \ + uint32x4_t __s2_390 = __p2_390; \ + uint32x4_t __rev0_390; __rev0_390 = __builtin_shufflevector(__s0_390, __s0_390, 3, 2, 1, 0); \ + uint32x4_t __rev2_390; __rev2_390 = __builtin_shufflevector(__s2_390, __s2_390, 3, 2, 1, 0); \ + uint32x4_t __ret_390; \ + __ret_390 = __noswap_vsetq_lane_u32(__noswap_vgetq_lane_u32(__rev2_390, __p3_390), __rev0_390, __p1_390); \ + __ret_390 = __builtin_shufflevector(__ret_390, __ret_390, 3, 2, 1, 0); \ + __ret_390; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u64(__p0_299, __p1_299, __p2_299, __p3_299) __extension__ ({ \ - uint64x2_t __s0_299 = __p0_299; \ - uint64x2_t __s2_299 = __p2_299; \ - uint64x2_t __ret_299; \ - __ret_299 = vsetq_lane_u64(vgetq_lane_u64(__s2_299, __p3_299), __s0_299, __p1_299); \ - __ret_299; \ +#define vcopyq_laneq_u64(__p0_391, __p1_391, __p2_391, __p3_391) __extension__ ({ \ + uint64x2_t __s0_391 = __p0_391; \ + uint64x2_t __s2_391 = __p2_391; \ + uint64x2_t __ret_391; \ + __ret_391 = vsetq_lane_u64(vgetq_lane_u64(__s2_391, __p3_391), __s0_391, __p1_391); \ + __ret_391; \ }) #else -#define vcopyq_laneq_u64(__p0_300, __p1_300, __p2_300, __p3_300) __extension__ ({ \ - uint64x2_t __s0_300 = __p0_300; \ - uint64x2_t __s2_300 = __p2_300; \ - uint64x2_t __rev0_300; __rev0_300 = __builtin_shufflevector(__s0_300, __s0_300, 1, 0); \ - uint64x2_t __rev2_300; __rev2_300 = __builtin_shufflevector(__s2_300, __s2_300, 1, 0); \ - uint64x2_t __ret_300; \ - __ret_300 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_300, __p3_300), __rev0_300, __p1_300); \ - __ret_300 = __builtin_shufflevector(__ret_300, __ret_300, 1, 0); \ - __ret_300; \ +#define vcopyq_laneq_u64(__p0_392, __p1_392, __p2_392, __p3_392) __extension__ ({ \ + uint64x2_t __s0_392 = __p0_392; \ + uint64x2_t __s2_392 = __p2_392; \ + uint64x2_t __rev0_392; __rev0_392 = __builtin_shufflevector(__s0_392, __s0_392, 1, 0); \ + uint64x2_t __rev2_392; __rev2_392 = __builtin_shufflevector(__s2_392, __s2_392, 1, 0); \ + uint64x2_t __ret_392; \ + __ret_392 = __noswap_vsetq_lane_u64(__noswap_vgetq_lane_u64(__rev2_392, __p3_392), __rev0_392, __p1_392); \ + __ret_392 = __builtin_shufflevector(__ret_392, __ret_392, 1, 0); \ + __ret_392; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_u16(__p0_301, __p1_301, __p2_301, __p3_301) __extension__ ({ \ - uint16x8_t __s0_301 = __p0_301; \ - uint16x8_t __s2_301 = __p2_301; \ - uint16x8_t __ret_301; \ - __ret_301 = vsetq_lane_u16(vgetq_lane_u16(__s2_301, __p3_301), __s0_301, __p1_301); \ - __ret_301; \ +#define vcopyq_laneq_u16(__p0_393, __p1_393, __p2_393, __p3_393) __extension__ ({ \ + uint16x8_t __s0_393 = __p0_393; \ + uint16x8_t __s2_393 = __p2_393; \ + uint16x8_t __ret_393; \ + __ret_393 = vsetq_lane_u16(vgetq_lane_u16(__s2_393, __p3_393), __s0_393, __p1_393); \ + __ret_393; \ }) #else -#define vcopyq_laneq_u16(__p0_302, __p1_302, __p2_302, __p3_302) __extension__ ({ \ - uint16x8_t __s0_302 = __p0_302; \ - uint16x8_t __s2_302 = __p2_302; \ - uint16x8_t __rev0_302; __rev0_302 = __builtin_shufflevector(__s0_302, __s0_302, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_302; __rev2_302 = __builtin_shufflevector(__s2_302, __s2_302, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_302; \ - __ret_302 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_302, __p3_302), __rev0_302, __p1_302); \ - __ret_302 = __builtin_shufflevector(__ret_302, __ret_302, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_302; \ +#define vcopyq_laneq_u16(__p0_394, __p1_394, __p2_394, __p3_394) __extension__ ({ \ + uint16x8_t __s0_394 = __p0_394; \ + uint16x8_t __s2_394 = __p2_394; \ + uint16x8_t __rev0_394; __rev0_394 = __builtin_shufflevector(__s0_394, __s0_394, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_394; __rev2_394 = __builtin_shufflevector(__s2_394, __s2_394, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_394; \ + __ret_394 = __noswap_vsetq_lane_u16(__noswap_vgetq_lane_u16(__rev2_394, __p3_394), __rev0_394, __p1_394); \ + __ret_394 = __builtin_shufflevector(__ret_394, __ret_394, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_394; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s8(__p0_303, __p1_303, __p2_303, __p3_303) __extension__ ({ \ - int8x16_t __s0_303 = __p0_303; \ - int8x16_t __s2_303 = __p2_303; \ - int8x16_t __ret_303; \ - __ret_303 = vsetq_lane_s8(vgetq_lane_s8(__s2_303, __p3_303), __s0_303, __p1_303); \ - __ret_303; \ +#define vcopyq_laneq_s8(__p0_395, __p1_395, __p2_395, __p3_395) __extension__ ({ \ + int8x16_t __s0_395 = __p0_395; \ + int8x16_t __s2_395 = __p2_395; \ + int8x16_t __ret_395; \ + __ret_395 = vsetq_lane_s8(vgetq_lane_s8(__s2_395, __p3_395), __s0_395, __p1_395); \ + __ret_395; \ }) #else -#define vcopyq_laneq_s8(__p0_304, __p1_304, __p2_304, __p3_304) __extension__ ({ \ - int8x16_t __s0_304 = __p0_304; \ - int8x16_t __s2_304 = __p2_304; \ - int8x16_t __rev0_304; __rev0_304 = __builtin_shufflevector(__s0_304, __s0_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_304; __rev2_304 = __builtin_shufflevector(__s2_304, __s2_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_304; \ - __ret_304 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_304, __p3_304), __rev0_304, __p1_304); \ - __ret_304 = __builtin_shufflevector(__ret_304, __ret_304, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_304; \ +#define vcopyq_laneq_s8(__p0_396, __p1_396, __p2_396, __p3_396) __extension__ ({ \ + int8x16_t __s0_396 = __p0_396; \ + int8x16_t __s2_396 = __p2_396; \ + int8x16_t __rev0_396; __rev0_396 = __builtin_shufflevector(__s0_396, __s0_396, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_396; __rev2_396 = __builtin_shufflevector(__s2_396, __s2_396, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_396; \ + __ret_396 = __noswap_vsetq_lane_s8(__noswap_vgetq_lane_s8(__rev2_396, __p3_396), __rev0_396, __p1_396); \ + __ret_396 = __builtin_shufflevector(__ret_396, __ret_396, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_396; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f32(__p0_305, __p1_305, __p2_305, __p3_305) __extension__ ({ \ - float32x4_t __s0_305 = __p0_305; \ - float32x4_t __s2_305 = __p2_305; \ - float32x4_t __ret_305; \ - __ret_305 = vsetq_lane_f32(vgetq_lane_f32(__s2_305, __p3_305), __s0_305, __p1_305); \ - __ret_305; \ +#define vcopyq_laneq_f32(__p0_397, __p1_397, __p2_397, __p3_397) __extension__ ({ \ + float32x4_t __s0_397 = __p0_397; \ + float32x4_t __s2_397 = __p2_397; \ + float32x4_t __ret_397; \ + __ret_397 = vsetq_lane_f32(vgetq_lane_f32(__s2_397, __p3_397), __s0_397, __p1_397); \ + __ret_397; \ }) #else -#define vcopyq_laneq_f32(__p0_306, __p1_306, __p2_306, __p3_306) __extension__ ({ \ - float32x4_t __s0_306 = __p0_306; \ - float32x4_t __s2_306 = __p2_306; \ - float32x4_t __rev0_306; __rev0_306 = __builtin_shufflevector(__s0_306, __s0_306, 3, 2, 1, 0); \ - float32x4_t __rev2_306; __rev2_306 = __builtin_shufflevector(__s2_306, __s2_306, 3, 2, 1, 0); \ - float32x4_t __ret_306; \ - __ret_306 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_306, __p3_306), __rev0_306, __p1_306); \ - __ret_306 = __builtin_shufflevector(__ret_306, __ret_306, 3, 2, 1, 0); \ - __ret_306; \ +#define vcopyq_laneq_f32(__p0_398, __p1_398, __p2_398, __p3_398) __extension__ ({ \ + float32x4_t __s0_398 = __p0_398; \ + float32x4_t __s2_398 = __p2_398; \ + float32x4_t __rev0_398; __rev0_398 = __builtin_shufflevector(__s0_398, __s0_398, 3, 2, 1, 0); \ + float32x4_t __rev2_398; __rev2_398 = __builtin_shufflevector(__s2_398, __s2_398, 3, 2, 1, 0); \ + float32x4_t __ret_398; \ + __ret_398 = __noswap_vsetq_lane_f32(__noswap_vgetq_lane_f32(__rev2_398, __p3_398), __rev0_398, __p1_398); \ + __ret_398 = __builtin_shufflevector(__ret_398, __ret_398, 3, 2, 1, 0); \ + __ret_398; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s32(__p0_307, __p1_307, __p2_307, __p3_307) __extension__ ({ \ - int32x4_t __s0_307 = __p0_307; \ - int32x4_t __s2_307 = __p2_307; \ - int32x4_t __ret_307; \ - __ret_307 = vsetq_lane_s32(vgetq_lane_s32(__s2_307, __p3_307), __s0_307, __p1_307); \ - __ret_307; \ +#define vcopyq_laneq_s32(__p0_399, __p1_399, __p2_399, __p3_399) __extension__ ({ \ + int32x4_t __s0_399 = __p0_399; \ + int32x4_t __s2_399 = __p2_399; \ + int32x4_t __ret_399; \ + __ret_399 = vsetq_lane_s32(vgetq_lane_s32(__s2_399, __p3_399), __s0_399, __p1_399); \ + __ret_399; \ }) #else -#define vcopyq_laneq_s32(__p0_308, __p1_308, __p2_308, __p3_308) __extension__ ({ \ - int32x4_t __s0_308 = __p0_308; \ - int32x4_t __s2_308 = __p2_308; \ - int32x4_t __rev0_308; __rev0_308 = __builtin_shufflevector(__s0_308, __s0_308, 3, 2, 1, 0); \ - int32x4_t __rev2_308; __rev2_308 = __builtin_shufflevector(__s2_308, __s2_308, 3, 2, 1, 0); \ - int32x4_t __ret_308; \ - __ret_308 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_308, __p3_308), __rev0_308, __p1_308); \ - __ret_308 = __builtin_shufflevector(__ret_308, __ret_308, 3, 2, 1, 0); \ - __ret_308; \ +#define vcopyq_laneq_s32(__p0_400, __p1_400, __p2_400, __p3_400) __extension__ ({ \ + int32x4_t __s0_400 = __p0_400; \ + int32x4_t __s2_400 = __p2_400; \ + int32x4_t __rev0_400; __rev0_400 = __builtin_shufflevector(__s0_400, __s0_400, 3, 2, 1, 0); \ + int32x4_t __rev2_400; __rev2_400 = __builtin_shufflevector(__s2_400, __s2_400, 3, 2, 1, 0); \ + int32x4_t __ret_400; \ + __ret_400 = __noswap_vsetq_lane_s32(__noswap_vgetq_lane_s32(__rev2_400, __p3_400), __rev0_400, __p1_400); \ + __ret_400 = __builtin_shufflevector(__ret_400, __ret_400, 3, 2, 1, 0); \ + __ret_400; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s64(__p0_309, __p1_309, __p2_309, __p3_309) __extension__ ({ \ - int64x2_t __s0_309 = __p0_309; \ - int64x2_t __s2_309 = __p2_309; \ - int64x2_t __ret_309; \ - __ret_309 = vsetq_lane_s64(vgetq_lane_s64(__s2_309, __p3_309), __s0_309, __p1_309); \ - __ret_309; \ +#define vcopyq_laneq_s64(__p0_401, __p1_401, __p2_401, __p3_401) __extension__ ({ \ + int64x2_t __s0_401 = __p0_401; \ + int64x2_t __s2_401 = __p2_401; \ + int64x2_t __ret_401; \ + __ret_401 = vsetq_lane_s64(vgetq_lane_s64(__s2_401, __p3_401), __s0_401, __p1_401); \ + __ret_401; \ }) #else -#define vcopyq_laneq_s64(__p0_310, __p1_310, __p2_310, __p3_310) __extension__ ({ \ - int64x2_t __s0_310 = __p0_310; \ - int64x2_t __s2_310 = __p2_310; \ - int64x2_t __rev0_310; __rev0_310 = __builtin_shufflevector(__s0_310, __s0_310, 1, 0); \ - int64x2_t __rev2_310; __rev2_310 = __builtin_shufflevector(__s2_310, __s2_310, 1, 0); \ - int64x2_t __ret_310; \ - __ret_310 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_310, __p3_310), __rev0_310, __p1_310); \ - __ret_310 = __builtin_shufflevector(__ret_310, __ret_310, 1, 0); \ - __ret_310; \ +#define vcopyq_laneq_s64(__p0_402, __p1_402, __p2_402, __p3_402) __extension__ ({ \ + int64x2_t __s0_402 = __p0_402; \ + int64x2_t __s2_402 = __p2_402; \ + int64x2_t __rev0_402; __rev0_402 = __builtin_shufflevector(__s0_402, __s0_402, 1, 0); \ + int64x2_t __rev2_402; __rev2_402 = __builtin_shufflevector(__s2_402, __s2_402, 1, 0); \ + int64x2_t __ret_402; \ + __ret_402 = __noswap_vsetq_lane_s64(__noswap_vgetq_lane_s64(__rev2_402, __p3_402), __rev0_402, __p1_402); \ + __ret_402 = __builtin_shufflevector(__ret_402, __ret_402, 1, 0); \ + __ret_402; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_s16(__p0_311, __p1_311, __p2_311, __p3_311) __extension__ ({ \ - int16x8_t __s0_311 = __p0_311; \ - int16x8_t __s2_311 = __p2_311; \ - int16x8_t __ret_311; \ - __ret_311 = vsetq_lane_s16(vgetq_lane_s16(__s2_311, __p3_311), __s0_311, __p1_311); \ - __ret_311; \ +#define vcopyq_laneq_s16(__p0_403, __p1_403, __p2_403, __p3_403) __extension__ ({ \ + int16x8_t __s0_403 = __p0_403; \ + int16x8_t __s2_403 = __p2_403; \ + int16x8_t __ret_403; \ + __ret_403 = vsetq_lane_s16(vgetq_lane_s16(__s2_403, __p3_403), __s0_403, __p1_403); \ + __ret_403; \ }) #else -#define vcopyq_laneq_s16(__p0_312, __p1_312, __p2_312, __p3_312) __extension__ ({ \ - int16x8_t __s0_312 = __p0_312; \ - int16x8_t __s2_312 = __p2_312; \ - int16x8_t __rev0_312; __rev0_312 = __builtin_shufflevector(__s0_312, __s0_312, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_312; __rev2_312 = __builtin_shufflevector(__s2_312, __s2_312, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_312; \ - __ret_312 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_312, __p3_312), __rev0_312, __p1_312); \ - __ret_312 = __builtin_shufflevector(__ret_312, __ret_312, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_312; \ +#define vcopyq_laneq_s16(__p0_404, __p1_404, __p2_404, __p3_404) __extension__ ({ \ + int16x8_t __s0_404 = __p0_404; \ + int16x8_t __s2_404 = __p2_404; \ + int16x8_t __rev0_404; __rev0_404 = __builtin_shufflevector(__s0_404, __s0_404, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_404; __rev2_404 = __builtin_shufflevector(__s2_404, __s2_404, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_404; \ + __ret_404 = __noswap_vsetq_lane_s16(__noswap_vgetq_lane_s16(__rev2_404, __p3_404), __rev0_404, __p1_404); \ + __ret_404 = __builtin_shufflevector(__ret_404, __ret_404, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_404; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p8(__p0_313, __p1_313, __p2_313, __p3_313) __extension__ ({ \ - poly8x8_t __s0_313 = __p0_313; \ - poly8x16_t __s2_313 = __p2_313; \ - poly8x8_t __ret_313; \ - __ret_313 = vset_lane_p8(vgetq_lane_p8(__s2_313, __p3_313), __s0_313, __p1_313); \ - __ret_313; \ +#define vcopy_laneq_p8(__p0_405, __p1_405, __p2_405, __p3_405) __extension__ ({ \ + poly8x8_t __s0_405 = __p0_405; \ + poly8x16_t __s2_405 = __p2_405; \ + poly8x8_t __ret_405; \ + __ret_405 = vset_lane_p8(vgetq_lane_p8(__s2_405, __p3_405), __s0_405, __p1_405); \ + __ret_405; \ }) #else -#define vcopy_laneq_p8(__p0_314, __p1_314, __p2_314, __p3_314) __extension__ ({ \ - poly8x8_t __s0_314 = __p0_314; \ - poly8x16_t __s2_314 = __p2_314; \ - poly8x8_t __rev0_314; __rev0_314 = __builtin_shufflevector(__s0_314, __s0_314, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __rev2_314; __rev2_314 = __builtin_shufflevector(__s2_314, __s2_314, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_314; \ - __ret_314 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_314, __p3_314), __rev0_314, __p1_314); \ - __ret_314 = __builtin_shufflevector(__ret_314, __ret_314, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_314; \ +#define vcopy_laneq_p8(__p0_406, __p1_406, __p2_406, __p3_406) __extension__ ({ \ + poly8x8_t __s0_406 = __p0_406; \ + poly8x16_t __s2_406 = __p2_406; \ + poly8x8_t __rev0_406; __rev0_406 = __builtin_shufflevector(__s0_406, __s0_406, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __rev2_406; __rev2_406 = __builtin_shufflevector(__s2_406, __s2_406, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_406; \ + __ret_406 = __noswap_vset_lane_p8(__noswap_vgetq_lane_p8(__rev2_406, __p3_406), __rev0_406, __p1_406); \ + __ret_406 = __builtin_shufflevector(__ret_406, __ret_406, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_406; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p16(__p0_315, __p1_315, __p2_315, __p3_315) __extension__ ({ \ - poly16x4_t __s0_315 = __p0_315; \ - poly16x8_t __s2_315 = __p2_315; \ - poly16x4_t __ret_315; \ - __ret_315 = vset_lane_p16(vgetq_lane_p16(__s2_315, __p3_315), __s0_315, __p1_315); \ - __ret_315; \ +#define vcopy_laneq_p16(__p0_407, __p1_407, __p2_407, __p3_407) __extension__ ({ \ + poly16x4_t __s0_407 = __p0_407; \ + poly16x8_t __s2_407 = __p2_407; \ + poly16x4_t __ret_407; \ + __ret_407 = vset_lane_p16(vgetq_lane_p16(__s2_407, __p3_407), __s0_407, __p1_407); \ + __ret_407; \ }) #else -#define vcopy_laneq_p16(__p0_316, __p1_316, __p2_316, __p3_316) __extension__ ({ \ - poly16x4_t __s0_316 = __p0_316; \ - poly16x8_t __s2_316 = __p2_316; \ - poly16x4_t __rev0_316; __rev0_316 = __builtin_shufflevector(__s0_316, __s0_316, 3, 2, 1, 0); \ - poly16x8_t __rev2_316; __rev2_316 = __builtin_shufflevector(__s2_316, __s2_316, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __ret_316; \ - __ret_316 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_316, __p3_316), __rev0_316, __p1_316); \ - __ret_316 = __builtin_shufflevector(__ret_316, __ret_316, 3, 2, 1, 0); \ - __ret_316; \ +#define vcopy_laneq_p16(__p0_408, __p1_408, __p2_408, __p3_408) __extension__ ({ \ + poly16x4_t __s0_408 = __p0_408; \ + poly16x8_t __s2_408 = __p2_408; \ + poly16x4_t __rev0_408; __rev0_408 = __builtin_shufflevector(__s0_408, __s0_408, 3, 2, 1, 0); \ + poly16x8_t __rev2_408; __rev2_408 = __builtin_shufflevector(__s2_408, __s2_408, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __ret_408; \ + __ret_408 = __noswap_vset_lane_p16(__noswap_vgetq_lane_p16(__rev2_408, __p3_408), __rev0_408, __p1_408); \ + __ret_408 = __builtin_shufflevector(__ret_408, __ret_408, 3, 2, 1, 0); \ + __ret_408; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u8(__p0_317, __p1_317, __p2_317, __p3_317) __extension__ ({ \ - uint8x8_t __s0_317 = __p0_317; \ - uint8x16_t __s2_317 = __p2_317; \ - uint8x8_t __ret_317; \ - __ret_317 = vset_lane_u8(vgetq_lane_u8(__s2_317, __p3_317), __s0_317, __p1_317); \ - __ret_317; \ +#define vcopy_laneq_u8(__p0_409, __p1_409, __p2_409, __p3_409) __extension__ ({ \ + uint8x8_t __s0_409 = __p0_409; \ + uint8x16_t __s2_409 = __p2_409; \ + uint8x8_t __ret_409; \ + __ret_409 = vset_lane_u8(vgetq_lane_u8(__s2_409, __p3_409), __s0_409, __p1_409); \ + __ret_409; \ }) #else -#define vcopy_laneq_u8(__p0_318, __p1_318, __p2_318, __p3_318) __extension__ ({ \ - uint8x8_t __s0_318 = __p0_318; \ - uint8x16_t __s2_318 = __p2_318; \ - uint8x8_t __rev0_318; __rev0_318 = __builtin_shufflevector(__s0_318, __s0_318, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_318; __rev2_318 = __builtin_shufflevector(__s2_318, __s2_318, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_318; \ - __ret_318 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_318, __p3_318), __rev0_318, __p1_318); \ - __ret_318 = __builtin_shufflevector(__ret_318, __ret_318, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_318; \ +#define vcopy_laneq_u8(__p0_410, __p1_410, __p2_410, __p3_410) __extension__ ({ \ + uint8x8_t __s0_410 = __p0_410; \ + uint8x16_t __s2_410 = __p2_410; \ + uint8x8_t __rev0_410; __rev0_410 = __builtin_shufflevector(__s0_410, __s0_410, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_410; __rev2_410 = __builtin_shufflevector(__s2_410, __s2_410, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_410; \ + __ret_410 = __noswap_vset_lane_u8(__noswap_vgetq_lane_u8(__rev2_410, __p3_410), __rev0_410, __p1_410); \ + __ret_410 = __builtin_shufflevector(__ret_410, __ret_410, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_410; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u32(__p0_319, __p1_319, __p2_319, __p3_319) __extension__ ({ \ - uint32x2_t __s0_319 = __p0_319; \ - uint32x4_t __s2_319 = __p2_319; \ - uint32x2_t __ret_319; \ - __ret_319 = vset_lane_u32(vgetq_lane_u32(__s2_319, __p3_319), __s0_319, __p1_319); \ - __ret_319; \ +#define vcopy_laneq_u32(__p0_411, __p1_411, __p2_411, __p3_411) __extension__ ({ \ + uint32x2_t __s0_411 = __p0_411; \ + uint32x4_t __s2_411 = __p2_411; \ + uint32x2_t __ret_411; \ + __ret_411 = vset_lane_u32(vgetq_lane_u32(__s2_411, __p3_411), __s0_411, __p1_411); \ + __ret_411; \ }) #else -#define vcopy_laneq_u32(__p0_320, __p1_320, __p2_320, __p3_320) __extension__ ({ \ - uint32x2_t __s0_320 = __p0_320; \ - uint32x4_t __s2_320 = __p2_320; \ - uint32x2_t __rev0_320; __rev0_320 = __builtin_shufflevector(__s0_320, __s0_320, 1, 0); \ - uint32x4_t __rev2_320; __rev2_320 = __builtin_shufflevector(__s2_320, __s2_320, 3, 2, 1, 0); \ - uint32x2_t __ret_320; \ - __ret_320 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_320, __p3_320), __rev0_320, __p1_320); \ - __ret_320 = __builtin_shufflevector(__ret_320, __ret_320, 1, 0); \ - __ret_320; \ +#define vcopy_laneq_u32(__p0_412, __p1_412, __p2_412, __p3_412) __extension__ ({ \ + uint32x2_t __s0_412 = __p0_412; \ + uint32x4_t __s2_412 = __p2_412; \ + uint32x2_t __rev0_412; __rev0_412 = __builtin_shufflevector(__s0_412, __s0_412, 1, 0); \ + uint32x4_t __rev2_412; __rev2_412 = __builtin_shufflevector(__s2_412, __s2_412, 3, 2, 1, 0); \ + uint32x2_t __ret_412; \ + __ret_412 = __noswap_vset_lane_u32(__noswap_vgetq_lane_u32(__rev2_412, __p3_412), __rev0_412, __p1_412); \ + __ret_412 = __builtin_shufflevector(__ret_412, __ret_412, 1, 0); \ + __ret_412; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u64(__p0_321, __p1_321, __p2_321, __p3_321) __extension__ ({ \ - uint64x1_t __s0_321 = __p0_321; \ - uint64x2_t __s2_321 = __p2_321; \ - uint64x1_t __ret_321; \ - __ret_321 = vset_lane_u64(vgetq_lane_u64(__s2_321, __p3_321), __s0_321, __p1_321); \ - __ret_321; \ +#define vcopy_laneq_u64(__p0_413, __p1_413, __p2_413, __p3_413) __extension__ ({ \ + uint64x1_t __s0_413 = __p0_413; \ + uint64x2_t __s2_413 = __p2_413; \ + uint64x1_t __ret_413; \ + __ret_413 = vset_lane_u64(vgetq_lane_u64(__s2_413, __p3_413), __s0_413, __p1_413); \ + __ret_413; \ }) #else -#define vcopy_laneq_u64(__p0_322, __p1_322, __p2_322, __p3_322) __extension__ ({ \ - uint64x1_t __s0_322 = __p0_322; \ - uint64x2_t __s2_322 = __p2_322; \ - uint64x2_t __rev2_322; __rev2_322 = __builtin_shufflevector(__s2_322, __s2_322, 1, 0); \ - uint64x1_t __ret_322; \ - __ret_322 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_322, __p3_322), __s0_322, __p1_322); \ - __ret_322; \ +#define vcopy_laneq_u64(__p0_414, __p1_414, __p2_414, __p3_414) __extension__ ({ \ + uint64x1_t __s0_414 = __p0_414; \ + uint64x2_t __s2_414 = __p2_414; \ + uint64x2_t __rev2_414; __rev2_414 = __builtin_shufflevector(__s2_414, __s2_414, 1, 0); \ + uint64x1_t __ret_414; \ + __ret_414 = vset_lane_u64(__noswap_vgetq_lane_u64(__rev2_414, __p3_414), __s0_414, __p1_414); \ + __ret_414; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_u16(__p0_323, __p1_323, __p2_323, __p3_323) __extension__ ({ \ - uint16x4_t __s0_323 = __p0_323; \ - uint16x8_t __s2_323 = __p2_323; \ - uint16x4_t __ret_323; \ - __ret_323 = vset_lane_u16(vgetq_lane_u16(__s2_323, __p3_323), __s0_323, __p1_323); \ - __ret_323; \ +#define vcopy_laneq_u16(__p0_415, __p1_415, __p2_415, __p3_415) __extension__ ({ \ + uint16x4_t __s0_415 = __p0_415; \ + uint16x8_t __s2_415 = __p2_415; \ + uint16x4_t __ret_415; \ + __ret_415 = vset_lane_u16(vgetq_lane_u16(__s2_415, __p3_415), __s0_415, __p1_415); \ + __ret_415; \ }) #else -#define vcopy_laneq_u16(__p0_324, __p1_324, __p2_324, __p3_324) __extension__ ({ \ - uint16x4_t __s0_324 = __p0_324; \ - uint16x8_t __s2_324 = __p2_324; \ - uint16x4_t __rev0_324; __rev0_324 = __builtin_shufflevector(__s0_324, __s0_324, 3, 2, 1, 0); \ - uint16x8_t __rev2_324; __rev2_324 = __builtin_shufflevector(__s2_324, __s2_324, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_324; \ - __ret_324 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_324, __p3_324), __rev0_324, __p1_324); \ - __ret_324 = __builtin_shufflevector(__ret_324, __ret_324, 3, 2, 1, 0); \ - __ret_324; \ +#define vcopy_laneq_u16(__p0_416, __p1_416, __p2_416, __p3_416) __extension__ ({ \ + uint16x4_t __s0_416 = __p0_416; \ + uint16x8_t __s2_416 = __p2_416; \ + uint16x4_t __rev0_416; __rev0_416 = __builtin_shufflevector(__s0_416, __s0_416, 3, 2, 1, 0); \ + uint16x8_t __rev2_416; __rev2_416 = __builtin_shufflevector(__s2_416, __s2_416, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_416; \ + __ret_416 = __noswap_vset_lane_u16(__noswap_vgetq_lane_u16(__rev2_416, __p3_416), __rev0_416, __p1_416); \ + __ret_416 = __builtin_shufflevector(__ret_416, __ret_416, 3, 2, 1, 0); \ + __ret_416; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s8(__p0_325, __p1_325, __p2_325, __p3_325) __extension__ ({ \ - int8x8_t __s0_325 = __p0_325; \ - int8x16_t __s2_325 = __p2_325; \ - int8x8_t __ret_325; \ - __ret_325 = vset_lane_s8(vgetq_lane_s8(__s2_325, __p3_325), __s0_325, __p1_325); \ - __ret_325; \ +#define vcopy_laneq_s8(__p0_417, __p1_417, __p2_417, __p3_417) __extension__ ({ \ + int8x8_t __s0_417 = __p0_417; \ + int8x16_t __s2_417 = __p2_417; \ + int8x8_t __ret_417; \ + __ret_417 = vset_lane_s8(vgetq_lane_s8(__s2_417, __p3_417), __s0_417, __p1_417); \ + __ret_417; \ }) #else -#define vcopy_laneq_s8(__p0_326, __p1_326, __p2_326, __p3_326) __extension__ ({ \ - int8x8_t __s0_326 = __p0_326; \ - int8x16_t __s2_326 = __p2_326; \ - int8x8_t __rev0_326; __rev0_326 = __builtin_shufflevector(__s0_326, __s0_326, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_326; __rev2_326 = __builtin_shufflevector(__s2_326, __s2_326, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_326; \ - __ret_326 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_326, __p3_326), __rev0_326, __p1_326); \ - __ret_326 = __builtin_shufflevector(__ret_326, __ret_326, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_326; \ +#define vcopy_laneq_s8(__p0_418, __p1_418, __p2_418, __p3_418) __extension__ ({ \ + int8x8_t __s0_418 = __p0_418; \ + int8x16_t __s2_418 = __p2_418; \ + int8x8_t __rev0_418; __rev0_418 = __builtin_shufflevector(__s0_418, __s0_418, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_418; __rev2_418 = __builtin_shufflevector(__s2_418, __s2_418, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_418; \ + __ret_418 = __noswap_vset_lane_s8(__noswap_vgetq_lane_s8(__rev2_418, __p3_418), __rev0_418, __p1_418); \ + __ret_418 = __builtin_shufflevector(__ret_418, __ret_418, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_418; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f32(__p0_327, __p1_327, __p2_327, __p3_327) __extension__ ({ \ - float32x2_t __s0_327 = __p0_327; \ - float32x4_t __s2_327 = __p2_327; \ - float32x2_t __ret_327; \ - __ret_327 = vset_lane_f32(vgetq_lane_f32(__s2_327, __p3_327), __s0_327, __p1_327); \ - __ret_327; \ +#define vcopy_laneq_f32(__p0_419, __p1_419, __p2_419, __p3_419) __extension__ ({ \ + float32x2_t __s0_419 = __p0_419; \ + float32x4_t __s2_419 = __p2_419; \ + float32x2_t __ret_419; \ + __ret_419 = vset_lane_f32(vgetq_lane_f32(__s2_419, __p3_419), __s0_419, __p1_419); \ + __ret_419; \ }) #else -#define vcopy_laneq_f32(__p0_328, __p1_328, __p2_328, __p3_328) __extension__ ({ \ - float32x2_t __s0_328 = __p0_328; \ - float32x4_t __s2_328 = __p2_328; \ - float32x2_t __rev0_328; __rev0_328 = __builtin_shufflevector(__s0_328, __s0_328, 1, 0); \ - float32x4_t __rev2_328; __rev2_328 = __builtin_shufflevector(__s2_328, __s2_328, 3, 2, 1, 0); \ - float32x2_t __ret_328; \ - __ret_328 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_328, __p3_328), __rev0_328, __p1_328); \ - __ret_328 = __builtin_shufflevector(__ret_328, __ret_328, 1, 0); \ - __ret_328; \ +#define vcopy_laneq_f32(__p0_420, __p1_420, __p2_420, __p3_420) __extension__ ({ \ + float32x2_t __s0_420 = __p0_420; \ + float32x4_t __s2_420 = __p2_420; \ + float32x2_t __rev0_420; __rev0_420 = __builtin_shufflevector(__s0_420, __s0_420, 1, 0); \ + float32x4_t __rev2_420; __rev2_420 = __builtin_shufflevector(__s2_420, __s2_420, 3, 2, 1, 0); \ + float32x2_t __ret_420; \ + __ret_420 = __noswap_vset_lane_f32(__noswap_vgetq_lane_f32(__rev2_420, __p3_420), __rev0_420, __p1_420); \ + __ret_420 = __builtin_shufflevector(__ret_420, __ret_420, 1, 0); \ + __ret_420; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s32(__p0_329, __p1_329, __p2_329, __p3_329) __extension__ ({ \ - int32x2_t __s0_329 = __p0_329; \ - int32x4_t __s2_329 = __p2_329; \ - int32x2_t __ret_329; \ - __ret_329 = vset_lane_s32(vgetq_lane_s32(__s2_329, __p3_329), __s0_329, __p1_329); \ - __ret_329; \ +#define vcopy_laneq_s32(__p0_421, __p1_421, __p2_421, __p3_421) __extension__ ({ \ + int32x2_t __s0_421 = __p0_421; \ + int32x4_t __s2_421 = __p2_421; \ + int32x2_t __ret_421; \ + __ret_421 = vset_lane_s32(vgetq_lane_s32(__s2_421, __p3_421), __s0_421, __p1_421); \ + __ret_421; \ }) #else -#define vcopy_laneq_s32(__p0_330, __p1_330, __p2_330, __p3_330) __extension__ ({ \ - int32x2_t __s0_330 = __p0_330; \ - int32x4_t __s2_330 = __p2_330; \ - int32x2_t __rev0_330; __rev0_330 = __builtin_shufflevector(__s0_330, __s0_330, 1, 0); \ - int32x4_t __rev2_330; __rev2_330 = __builtin_shufflevector(__s2_330, __s2_330, 3, 2, 1, 0); \ - int32x2_t __ret_330; \ - __ret_330 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_330, __p3_330), __rev0_330, __p1_330); \ - __ret_330 = __builtin_shufflevector(__ret_330, __ret_330, 1, 0); \ - __ret_330; \ +#define vcopy_laneq_s32(__p0_422, __p1_422, __p2_422, __p3_422) __extension__ ({ \ + int32x2_t __s0_422 = __p0_422; \ + int32x4_t __s2_422 = __p2_422; \ + int32x2_t __rev0_422; __rev0_422 = __builtin_shufflevector(__s0_422, __s0_422, 1, 0); \ + int32x4_t __rev2_422; __rev2_422 = __builtin_shufflevector(__s2_422, __s2_422, 3, 2, 1, 0); \ + int32x2_t __ret_422; \ + __ret_422 = __noswap_vset_lane_s32(__noswap_vgetq_lane_s32(__rev2_422, __p3_422), __rev0_422, __p1_422); \ + __ret_422 = __builtin_shufflevector(__ret_422, __ret_422, 1, 0); \ + __ret_422; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s64(__p0_331, __p1_331, __p2_331, __p3_331) __extension__ ({ \ - int64x1_t __s0_331 = __p0_331; \ - int64x2_t __s2_331 = __p2_331; \ - int64x1_t __ret_331; \ - __ret_331 = vset_lane_s64(vgetq_lane_s64(__s2_331, __p3_331), __s0_331, __p1_331); \ - __ret_331; \ +#define vcopy_laneq_s64(__p0_423, __p1_423, __p2_423, __p3_423) __extension__ ({ \ + int64x1_t __s0_423 = __p0_423; \ + int64x2_t __s2_423 = __p2_423; \ + int64x1_t __ret_423; \ + __ret_423 = vset_lane_s64(vgetq_lane_s64(__s2_423, __p3_423), __s0_423, __p1_423); \ + __ret_423; \ }) #else -#define vcopy_laneq_s64(__p0_332, __p1_332, __p2_332, __p3_332) __extension__ ({ \ - int64x1_t __s0_332 = __p0_332; \ - int64x2_t __s2_332 = __p2_332; \ - int64x2_t __rev2_332; __rev2_332 = __builtin_shufflevector(__s2_332, __s2_332, 1, 0); \ - int64x1_t __ret_332; \ - __ret_332 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_332, __p3_332), __s0_332, __p1_332); \ - __ret_332; \ +#define vcopy_laneq_s64(__p0_424, __p1_424, __p2_424, __p3_424) __extension__ ({ \ + int64x1_t __s0_424 = __p0_424; \ + int64x2_t __s2_424 = __p2_424; \ + int64x2_t __rev2_424; __rev2_424 = __builtin_shufflevector(__s2_424, __s2_424, 1, 0); \ + int64x1_t __ret_424; \ + __ret_424 = vset_lane_s64(__noswap_vgetq_lane_s64(__rev2_424, __p3_424), __s0_424, __p1_424); \ + __ret_424; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_s16(__p0_333, __p1_333, __p2_333, __p3_333) __extension__ ({ \ - int16x4_t __s0_333 = __p0_333; \ - int16x8_t __s2_333 = __p2_333; \ - int16x4_t __ret_333; \ - __ret_333 = vset_lane_s16(vgetq_lane_s16(__s2_333, __p3_333), __s0_333, __p1_333); \ - __ret_333; \ +#define vcopy_laneq_s16(__p0_425, __p1_425, __p2_425, __p3_425) __extension__ ({ \ + int16x4_t __s0_425 = __p0_425; \ + int16x8_t __s2_425 = __p2_425; \ + int16x4_t __ret_425; \ + __ret_425 = vset_lane_s16(vgetq_lane_s16(__s2_425, __p3_425), __s0_425, __p1_425); \ + __ret_425; \ }) #else -#define vcopy_laneq_s16(__p0_334, __p1_334, __p2_334, __p3_334) __extension__ ({ \ - int16x4_t __s0_334 = __p0_334; \ - int16x8_t __s2_334 = __p2_334; \ - int16x4_t __rev0_334; __rev0_334 = __builtin_shufflevector(__s0_334, __s0_334, 3, 2, 1, 0); \ - int16x8_t __rev2_334; __rev2_334 = __builtin_shufflevector(__s2_334, __s2_334, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_334; \ - __ret_334 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_334, __p3_334), __rev0_334, __p1_334); \ - __ret_334 = __builtin_shufflevector(__ret_334, __ret_334, 3, 2, 1, 0); \ - __ret_334; \ +#define vcopy_laneq_s16(__p0_426, __p1_426, __p2_426, __p3_426) __extension__ ({ \ + int16x4_t __s0_426 = __p0_426; \ + int16x8_t __s2_426 = __p2_426; \ + int16x4_t __rev0_426; __rev0_426 = __builtin_shufflevector(__s0_426, __s0_426, 3, 2, 1, 0); \ + int16x8_t __rev2_426; __rev2_426 = __builtin_shufflevector(__s2_426, __s2_426, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_426; \ + __ret_426 = __noswap_vset_lane_s16(__noswap_vgetq_lane_s16(__rev2_426, __p3_426), __rev0_426, __p1_426); \ + __ret_426 = __builtin_shufflevector(__ret_426, __ret_426, 3, 2, 1, 0); \ + __ret_426; \ }) #endif @@ -49009,85 +50713,85 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { }) #endif -#define vdup_lane_p64(__p0_335, __p1_335) __extension__ ({ \ - poly64x1_t __s0_335 = __p0_335; \ - poly64x1_t __ret_335; \ - __ret_335 = splat_lane_p64(__s0_335, __p1_335); \ - __ret_335; \ +#define vdup_lane_p64(__p0_427, __p1_427) __extension__ ({ \ + poly64x1_t __s0_427 = __p0_427; \ + poly64x1_t __ret_427; \ + __ret_427 = splat_lane_p64(__s0_427, __p1_427); \ + __ret_427; \ }) #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_p64(__p0_336, __p1_336) __extension__ ({ \ - poly64x1_t __s0_336 = __p0_336; \ - poly64x2_t __ret_336; \ - __ret_336 = splatq_lane_p64(__s0_336, __p1_336); \ - __ret_336; \ +#define vdupq_lane_p64(__p0_428, __p1_428) __extension__ ({ \ + poly64x1_t __s0_428 = __p0_428; \ + poly64x2_t __ret_428; \ + __ret_428 = splatq_lane_p64(__s0_428, __p1_428); \ + __ret_428; \ }) #else -#define vdupq_lane_p64(__p0_337, __p1_337) __extension__ ({ \ - poly64x1_t __s0_337 = __p0_337; \ - poly64x2_t __ret_337; \ - __ret_337 = __noswap_splatq_lane_p64(__s0_337, __p1_337); \ - __ret_337 = __builtin_shufflevector(__ret_337, __ret_337, 1, 0); \ - __ret_337; \ +#define vdupq_lane_p64(__p0_429, __p1_429) __extension__ ({ \ + poly64x1_t __s0_429 = __p0_429; \ + poly64x2_t __ret_429; \ + __ret_429 = __noswap_splatq_lane_p64(__s0_429, __p1_429); \ + __ret_429 = __builtin_shufflevector(__ret_429, __ret_429, 1, 0); \ + __ret_429; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_f64(__p0_338, __p1_338) __extension__ ({ \ - float64x1_t __s0_338 = __p0_338; \ - float64x2_t __ret_338; \ - __ret_338 = splatq_lane_f64(__s0_338, __p1_338); \ - __ret_338; \ +#define vdupq_lane_f64(__p0_430, __p1_430) __extension__ ({ \ + float64x1_t __s0_430 = __p0_430; \ + float64x2_t __ret_430; \ + __ret_430 = splatq_lane_f64(__s0_430, __p1_430); \ + __ret_430; \ }) #else -#define vdupq_lane_f64(__p0_339, __p1_339) __extension__ ({ \ - float64x1_t __s0_339 = __p0_339; \ - float64x2_t __ret_339; \ - __ret_339 = __noswap_splatq_lane_f64(__s0_339, __p1_339); \ - __ret_339 = __builtin_shufflevector(__ret_339, __ret_339, 1, 0); \ - __ret_339; \ +#define vdupq_lane_f64(__p0_431, __p1_431) __extension__ ({ \ + float64x1_t __s0_431 = __p0_431; \ + float64x2_t __ret_431; \ + __ret_431 = __noswap_splatq_lane_f64(__s0_431, __p1_431); \ + __ret_431 = __builtin_shufflevector(__ret_431, __ret_431, 1, 0); \ + __ret_431; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_lane_f16(__p0_340, __p1_340) __extension__ ({ \ - float16x4_t __s0_340 = __p0_340; \ - float16x8_t __ret_340; \ - __ret_340 = splatq_lane_f16(__s0_340, __p1_340); \ - __ret_340; \ +#define vdupq_lane_f16(__p0_432, __p1_432) __extension__ ({ \ + float16x4_t __s0_432 = __p0_432; \ + float16x8_t __ret_432; \ + __ret_432 = splatq_lane_f16(__s0_432, __p1_432); \ + __ret_432; \ }) #else -#define vdupq_lane_f16(__p0_341, __p1_341) __extension__ ({ \ - float16x4_t __s0_341 = __p0_341; \ - float16x4_t __rev0_341; __rev0_341 = __builtin_shufflevector(__s0_341, __s0_341, 3, 2, 1, 0); \ - float16x8_t __ret_341; \ - __ret_341 = __noswap_splatq_lane_f16(__rev0_341, __p1_341); \ - __ret_341 = __builtin_shufflevector(__ret_341, __ret_341, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_341; \ +#define vdupq_lane_f16(__p0_433, __p1_433) __extension__ ({ \ + float16x4_t __s0_433 = __p0_433; \ + float16x4_t __rev0_433; __rev0_433 = __builtin_shufflevector(__s0_433, __s0_433, 3, 2, 1, 0); \ + float16x8_t __ret_433; \ + __ret_433 = __noswap_splatq_lane_f16(__rev0_433, __p1_433); \ + __ret_433 = __builtin_shufflevector(__ret_433, __ret_433, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_433; \ }) #endif -#define vdup_lane_f64(__p0_342, __p1_342) __extension__ ({ \ - float64x1_t __s0_342 = __p0_342; \ - float64x1_t __ret_342; \ - __ret_342 = splat_lane_f64(__s0_342, __p1_342); \ - __ret_342; \ +#define vdup_lane_f64(__p0_434, __p1_434) __extension__ ({ \ + float64x1_t __s0_434 = __p0_434; \ + float64x1_t __ret_434; \ + __ret_434 = splat_lane_f64(__s0_434, __p1_434); \ + __ret_434; \ }) #ifdef __LITTLE_ENDIAN__ -#define vdup_lane_f16(__p0_343, __p1_343) __extension__ ({ \ - float16x4_t __s0_343 = __p0_343; \ - float16x4_t __ret_343; \ - __ret_343 = splat_lane_f16(__s0_343, __p1_343); \ - __ret_343; \ +#define vdup_lane_f16(__p0_435, __p1_435) __extension__ ({ \ + float16x4_t __s0_435 = __p0_435; \ + float16x4_t __ret_435; \ + __ret_435 = splat_lane_f16(__s0_435, __p1_435); \ + __ret_435; \ }) #else -#define vdup_lane_f16(__p0_344, __p1_344) __extension__ ({ \ - float16x4_t __s0_344 = __p0_344; \ - float16x4_t __rev0_344; __rev0_344 = __builtin_shufflevector(__s0_344, __s0_344, 3, 2, 1, 0); \ - float16x4_t __ret_344; \ - __ret_344 = __noswap_splat_lane_f16(__rev0_344, __p1_344); \ - __ret_344 = __builtin_shufflevector(__ret_344, __ret_344, 3, 2, 1, 0); \ - __ret_344; \ +#define vdup_lane_f16(__p0_436, __p1_436) __extension__ ({ \ + float16x4_t __s0_436 = __p0_436; \ + float16x4_t __rev0_436; __rev0_436 = __builtin_shufflevector(__s0_436, __s0_436, 3, 2, 1, 0); \ + float16x4_t __ret_436; \ + __ret_436 = __noswap_splat_lane_f16(__rev0_436, __p1_436); \ + __ret_436 = __builtin_shufflevector(__ret_436, __ret_436, 3, 2, 1, 0); \ + __ret_436; \ }) #endif @@ -49296,502 +51000,502 @@ __ai float32x2_t vdiv_f32(float32x2_t __p0, float32x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p8(__p0_345, __p1_345) __extension__ ({ \ - poly8x16_t __s0_345 = __p0_345; \ - poly8x8_t __ret_345; \ - __ret_345 = splat_laneq_p8(__s0_345, __p1_345); \ - __ret_345; \ +#define vdup_laneq_p8(__p0_437, __p1_437) __extension__ ({ \ + poly8x16_t __s0_437 = __p0_437; \ + poly8x8_t __ret_437; \ + __ret_437 = splat_laneq_p8(__s0_437, __p1_437); \ + __ret_437; \ }) #else -#define vdup_laneq_p8(__p0_346, __p1_346) __extension__ ({ \ - poly8x16_t __s0_346 = __p0_346; \ - poly8x16_t __rev0_346; __rev0_346 = __builtin_shufflevector(__s0_346, __s0_346, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x8_t __ret_346; \ - __ret_346 = __noswap_splat_laneq_p8(__rev0_346, __p1_346); \ - __ret_346 = __builtin_shufflevector(__ret_346, __ret_346, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_346; \ +#define vdup_laneq_p8(__p0_438, __p1_438) __extension__ ({ \ + poly8x16_t __s0_438 = __p0_438; \ + poly8x16_t __rev0_438; __rev0_438 = __builtin_shufflevector(__s0_438, __s0_438, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x8_t __ret_438; \ + __ret_438 = __noswap_splat_laneq_p8(__rev0_438, __p1_438); \ + __ret_438 = __builtin_shufflevector(__ret_438, __ret_438, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_438; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p64(__p0_347, __p1_347) __extension__ ({ \ - poly64x2_t __s0_347 = __p0_347; \ - poly64x1_t __ret_347; \ - __ret_347 = splat_laneq_p64(__s0_347, __p1_347); \ - __ret_347; \ +#define vdup_laneq_p64(__p0_439, __p1_439) __extension__ ({ \ + poly64x2_t __s0_439 = __p0_439; \ + poly64x1_t __ret_439; \ + __ret_439 = splat_laneq_p64(__s0_439, __p1_439); \ + __ret_439; \ }) #else -#define vdup_laneq_p64(__p0_348, __p1_348) __extension__ ({ \ - poly64x2_t __s0_348 = __p0_348; \ - poly64x2_t __rev0_348; __rev0_348 = __builtin_shufflevector(__s0_348, __s0_348, 1, 0); \ - poly64x1_t __ret_348; \ - __ret_348 = __noswap_splat_laneq_p64(__rev0_348, __p1_348); \ - __ret_348; \ +#define vdup_laneq_p64(__p0_440, __p1_440) __extension__ ({ \ + poly64x2_t __s0_440 = __p0_440; \ + poly64x2_t __rev0_440; __rev0_440 = __builtin_shufflevector(__s0_440, __s0_440, 1, 0); \ + poly64x1_t __ret_440; \ + __ret_440 = __noswap_splat_laneq_p64(__rev0_440, __p1_440); \ + __ret_440; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_p16(__p0_349, __p1_349) __extension__ ({ \ - poly16x8_t __s0_349 = __p0_349; \ - poly16x4_t __ret_349; \ - __ret_349 = splat_laneq_p16(__s0_349, __p1_349); \ - __ret_349; \ +#define vdup_laneq_p16(__p0_441, __p1_441) __extension__ ({ \ + poly16x8_t __s0_441 = __p0_441; \ + poly16x4_t __ret_441; \ + __ret_441 = splat_laneq_p16(__s0_441, __p1_441); \ + __ret_441; \ }) #else -#define vdup_laneq_p16(__p0_350, __p1_350) __extension__ ({ \ - poly16x8_t __s0_350 = __p0_350; \ - poly16x8_t __rev0_350; __rev0_350 = __builtin_shufflevector(__s0_350, __s0_350, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x4_t __ret_350; \ - __ret_350 = __noswap_splat_laneq_p16(__rev0_350, __p1_350); \ - __ret_350 = __builtin_shufflevector(__ret_350, __ret_350, 3, 2, 1, 0); \ - __ret_350; \ +#define vdup_laneq_p16(__p0_442, __p1_442) __extension__ ({ \ + poly16x8_t __s0_442 = __p0_442; \ + poly16x8_t __rev0_442; __rev0_442 = __builtin_shufflevector(__s0_442, __s0_442, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x4_t __ret_442; \ + __ret_442 = __noswap_splat_laneq_p16(__rev0_442, __p1_442); \ + __ret_442 = __builtin_shufflevector(__ret_442, __ret_442, 3, 2, 1, 0); \ + __ret_442; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p8(__p0_351, __p1_351) __extension__ ({ \ - poly8x16_t __s0_351 = __p0_351; \ - poly8x16_t __ret_351; \ - __ret_351 = splatq_laneq_p8(__s0_351, __p1_351); \ - __ret_351; \ +#define vdupq_laneq_p8(__p0_443, __p1_443) __extension__ ({ \ + poly8x16_t __s0_443 = __p0_443; \ + poly8x16_t __ret_443; \ + __ret_443 = splatq_laneq_p8(__s0_443, __p1_443); \ + __ret_443; \ }) #else -#define vdupq_laneq_p8(__p0_352, __p1_352) __extension__ ({ \ - poly8x16_t __s0_352 = __p0_352; \ - poly8x16_t __rev0_352; __rev0_352 = __builtin_shufflevector(__s0_352, __s0_352, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly8x16_t __ret_352; \ - __ret_352 = __noswap_splatq_laneq_p8(__rev0_352, __p1_352); \ - __ret_352 = __builtin_shufflevector(__ret_352, __ret_352, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_352; \ +#define vdupq_laneq_p8(__p0_444, __p1_444) __extension__ ({ \ + poly8x16_t __s0_444 = __p0_444; \ + poly8x16_t __rev0_444; __rev0_444 = __builtin_shufflevector(__s0_444, __s0_444, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly8x16_t __ret_444; \ + __ret_444 = __noswap_splatq_laneq_p8(__rev0_444, __p1_444); \ + __ret_444 = __builtin_shufflevector(__ret_444, __ret_444, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_444; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p64(__p0_353, __p1_353) __extension__ ({ \ - poly64x2_t __s0_353 = __p0_353; \ - poly64x2_t __ret_353; \ - __ret_353 = splatq_laneq_p64(__s0_353, __p1_353); \ - __ret_353; \ +#define vdupq_laneq_p64(__p0_445, __p1_445) __extension__ ({ \ + poly64x2_t __s0_445 = __p0_445; \ + poly64x2_t __ret_445; \ + __ret_445 = splatq_laneq_p64(__s0_445, __p1_445); \ + __ret_445; \ }) #else -#define vdupq_laneq_p64(__p0_354, __p1_354) __extension__ ({ \ - poly64x2_t __s0_354 = __p0_354; \ - poly64x2_t __rev0_354; __rev0_354 = __builtin_shufflevector(__s0_354, __s0_354, 1, 0); \ - poly64x2_t __ret_354; \ - __ret_354 = __noswap_splatq_laneq_p64(__rev0_354, __p1_354); \ - __ret_354 = __builtin_shufflevector(__ret_354, __ret_354, 1, 0); \ - __ret_354; \ +#define vdupq_laneq_p64(__p0_446, __p1_446) __extension__ ({ \ + poly64x2_t __s0_446 = __p0_446; \ + poly64x2_t __rev0_446; __rev0_446 = __builtin_shufflevector(__s0_446, __s0_446, 1, 0); \ + poly64x2_t __ret_446; \ + __ret_446 = __noswap_splatq_laneq_p64(__rev0_446, __p1_446); \ + __ret_446 = __builtin_shufflevector(__ret_446, __ret_446, 1, 0); \ + __ret_446; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_p16(__p0_355, __p1_355) __extension__ ({ \ - poly16x8_t __s0_355 = __p0_355; \ - poly16x8_t __ret_355; \ - __ret_355 = splatq_laneq_p16(__s0_355, __p1_355); \ - __ret_355; \ +#define vdupq_laneq_p16(__p0_447, __p1_447) __extension__ ({ \ + poly16x8_t __s0_447 = __p0_447; \ + poly16x8_t __ret_447; \ + __ret_447 = splatq_laneq_p16(__s0_447, __p1_447); \ + __ret_447; \ }) #else -#define vdupq_laneq_p16(__p0_356, __p1_356) __extension__ ({ \ - poly16x8_t __s0_356 = __p0_356; \ - poly16x8_t __rev0_356; __rev0_356 = __builtin_shufflevector(__s0_356, __s0_356, 7, 6, 5, 4, 3, 2, 1, 0); \ - poly16x8_t __ret_356; \ - __ret_356 = __noswap_splatq_laneq_p16(__rev0_356, __p1_356); \ - __ret_356 = __builtin_shufflevector(__ret_356, __ret_356, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_356; \ +#define vdupq_laneq_p16(__p0_448, __p1_448) __extension__ ({ \ + poly16x8_t __s0_448 = __p0_448; \ + poly16x8_t __rev0_448; __rev0_448 = __builtin_shufflevector(__s0_448, __s0_448, 7, 6, 5, 4, 3, 2, 1, 0); \ + poly16x8_t __ret_448; \ + __ret_448 = __noswap_splatq_laneq_p16(__rev0_448, __p1_448); \ + __ret_448 = __builtin_shufflevector(__ret_448, __ret_448, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_448; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u8(__p0_357, __p1_357) __extension__ ({ \ - uint8x16_t __s0_357 = __p0_357; \ - uint8x16_t __ret_357; \ - __ret_357 = splatq_laneq_u8(__s0_357, __p1_357); \ - __ret_357; \ +#define vdupq_laneq_u8(__p0_449, __p1_449) __extension__ ({ \ + uint8x16_t __s0_449 = __p0_449; \ + uint8x16_t __ret_449; \ + __ret_449 = splatq_laneq_u8(__s0_449, __p1_449); \ + __ret_449; \ }) #else -#define vdupq_laneq_u8(__p0_358, __p1_358) __extension__ ({ \ - uint8x16_t __s0_358 = __p0_358; \ - uint8x16_t __rev0_358; __rev0_358 = __builtin_shufflevector(__s0_358, __s0_358, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_358; \ - __ret_358 = __noswap_splatq_laneq_u8(__rev0_358, __p1_358); \ - __ret_358 = __builtin_shufflevector(__ret_358, __ret_358, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_358; \ +#define vdupq_laneq_u8(__p0_450, __p1_450) __extension__ ({ \ + uint8x16_t __s0_450 = __p0_450; \ + uint8x16_t __rev0_450; __rev0_450 = __builtin_shufflevector(__s0_450, __s0_450, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_450; \ + __ret_450 = __noswap_splatq_laneq_u8(__rev0_450, __p1_450); \ + __ret_450 = __builtin_shufflevector(__ret_450, __ret_450, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_450; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u32(__p0_359, __p1_359) __extension__ ({ \ - uint32x4_t __s0_359 = __p0_359; \ - uint32x4_t __ret_359; \ - __ret_359 = splatq_laneq_u32(__s0_359, __p1_359); \ - __ret_359; \ +#define vdupq_laneq_u32(__p0_451, __p1_451) __extension__ ({ \ + uint32x4_t __s0_451 = __p0_451; \ + uint32x4_t __ret_451; \ + __ret_451 = splatq_laneq_u32(__s0_451, __p1_451); \ + __ret_451; \ }) #else -#define vdupq_laneq_u32(__p0_360, __p1_360) __extension__ ({ \ - uint32x4_t __s0_360 = __p0_360; \ - uint32x4_t __rev0_360; __rev0_360 = __builtin_shufflevector(__s0_360, __s0_360, 3, 2, 1, 0); \ - uint32x4_t __ret_360; \ - __ret_360 = __noswap_splatq_laneq_u32(__rev0_360, __p1_360); \ - __ret_360 = __builtin_shufflevector(__ret_360, __ret_360, 3, 2, 1, 0); \ - __ret_360; \ +#define vdupq_laneq_u32(__p0_452, __p1_452) __extension__ ({ \ + uint32x4_t __s0_452 = __p0_452; \ + uint32x4_t __rev0_452; __rev0_452 = __builtin_shufflevector(__s0_452, __s0_452, 3, 2, 1, 0); \ + uint32x4_t __ret_452; \ + __ret_452 = __noswap_splatq_laneq_u32(__rev0_452, __p1_452); \ + __ret_452 = __builtin_shufflevector(__ret_452, __ret_452, 3, 2, 1, 0); \ + __ret_452; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u64(__p0_361, __p1_361) __extension__ ({ \ - uint64x2_t __s0_361 = __p0_361; \ - uint64x2_t __ret_361; \ - __ret_361 = splatq_laneq_u64(__s0_361, __p1_361); \ - __ret_361; \ +#define vdupq_laneq_u64(__p0_453, __p1_453) __extension__ ({ \ + uint64x2_t __s0_453 = __p0_453; \ + uint64x2_t __ret_453; \ + __ret_453 = splatq_laneq_u64(__s0_453, __p1_453); \ + __ret_453; \ }) #else -#define vdupq_laneq_u64(__p0_362, __p1_362) __extension__ ({ \ - uint64x2_t __s0_362 = __p0_362; \ - uint64x2_t __rev0_362; __rev0_362 = __builtin_shufflevector(__s0_362, __s0_362, 1, 0); \ - uint64x2_t __ret_362; \ - __ret_362 = __noswap_splatq_laneq_u64(__rev0_362, __p1_362); \ - __ret_362 = __builtin_shufflevector(__ret_362, __ret_362, 1, 0); \ - __ret_362; \ +#define vdupq_laneq_u64(__p0_454, __p1_454) __extension__ ({ \ + uint64x2_t __s0_454 = __p0_454; \ + uint64x2_t __rev0_454; __rev0_454 = __builtin_shufflevector(__s0_454, __s0_454, 1, 0); \ + uint64x2_t __ret_454; \ + __ret_454 = __noswap_splatq_laneq_u64(__rev0_454, __p1_454); \ + __ret_454 = __builtin_shufflevector(__ret_454, __ret_454, 1, 0); \ + __ret_454; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_u16(__p0_363, __p1_363) __extension__ ({ \ - uint16x8_t __s0_363 = __p0_363; \ - uint16x8_t __ret_363; \ - __ret_363 = splatq_laneq_u16(__s0_363, __p1_363); \ - __ret_363; \ +#define vdupq_laneq_u16(__p0_455, __p1_455) __extension__ ({ \ + uint16x8_t __s0_455 = __p0_455; \ + uint16x8_t __ret_455; \ + __ret_455 = splatq_laneq_u16(__s0_455, __p1_455); \ + __ret_455; \ }) #else -#define vdupq_laneq_u16(__p0_364, __p1_364) __extension__ ({ \ - uint16x8_t __s0_364 = __p0_364; \ - uint16x8_t __rev0_364; __rev0_364 = __builtin_shufflevector(__s0_364, __s0_364, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_364; \ - __ret_364 = __noswap_splatq_laneq_u16(__rev0_364, __p1_364); \ - __ret_364 = __builtin_shufflevector(__ret_364, __ret_364, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_364; \ +#define vdupq_laneq_u16(__p0_456, __p1_456) __extension__ ({ \ + uint16x8_t __s0_456 = __p0_456; \ + uint16x8_t __rev0_456; __rev0_456 = __builtin_shufflevector(__s0_456, __s0_456, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_456; \ + __ret_456 = __noswap_splatq_laneq_u16(__rev0_456, __p1_456); \ + __ret_456 = __builtin_shufflevector(__ret_456, __ret_456, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_456; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s8(__p0_365, __p1_365) __extension__ ({ \ - int8x16_t __s0_365 = __p0_365; \ - int8x16_t __ret_365; \ - __ret_365 = splatq_laneq_s8(__s0_365, __p1_365); \ - __ret_365; \ +#define vdupq_laneq_s8(__p0_457, __p1_457) __extension__ ({ \ + int8x16_t __s0_457 = __p0_457; \ + int8x16_t __ret_457; \ + __ret_457 = splatq_laneq_s8(__s0_457, __p1_457); \ + __ret_457; \ }) #else -#define vdupq_laneq_s8(__p0_366, __p1_366) __extension__ ({ \ - int8x16_t __s0_366 = __p0_366; \ - int8x16_t __rev0_366; __rev0_366 = __builtin_shufflevector(__s0_366, __s0_366, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_366; \ - __ret_366 = __noswap_splatq_laneq_s8(__rev0_366, __p1_366); \ - __ret_366 = __builtin_shufflevector(__ret_366, __ret_366, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_366; \ +#define vdupq_laneq_s8(__p0_458, __p1_458) __extension__ ({ \ + int8x16_t __s0_458 = __p0_458; \ + int8x16_t __rev0_458; __rev0_458 = __builtin_shufflevector(__s0_458, __s0_458, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_458; \ + __ret_458 = __noswap_splatq_laneq_s8(__rev0_458, __p1_458); \ + __ret_458 = __builtin_shufflevector(__ret_458, __ret_458, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_458; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f64(__p0_367, __p1_367) __extension__ ({ \ - float64x2_t __s0_367 = __p0_367; \ - float64x2_t __ret_367; \ - __ret_367 = splatq_laneq_f64(__s0_367, __p1_367); \ - __ret_367; \ +#define vdupq_laneq_f64(__p0_459, __p1_459) __extension__ ({ \ + float64x2_t __s0_459 = __p0_459; \ + float64x2_t __ret_459; \ + __ret_459 = splatq_laneq_f64(__s0_459, __p1_459); \ + __ret_459; \ }) #else -#define vdupq_laneq_f64(__p0_368, __p1_368) __extension__ ({ \ - float64x2_t __s0_368 = __p0_368; \ - float64x2_t __rev0_368; __rev0_368 = __builtin_shufflevector(__s0_368, __s0_368, 1, 0); \ - float64x2_t __ret_368; \ - __ret_368 = __noswap_splatq_laneq_f64(__rev0_368, __p1_368); \ - __ret_368 = __builtin_shufflevector(__ret_368, __ret_368, 1, 0); \ - __ret_368; \ +#define vdupq_laneq_f64(__p0_460, __p1_460) __extension__ ({ \ + float64x2_t __s0_460 = __p0_460; \ + float64x2_t __rev0_460; __rev0_460 = __builtin_shufflevector(__s0_460, __s0_460, 1, 0); \ + float64x2_t __ret_460; \ + __ret_460 = __noswap_splatq_laneq_f64(__rev0_460, __p1_460); \ + __ret_460 = __builtin_shufflevector(__ret_460, __ret_460, 1, 0); \ + __ret_460; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f32(__p0_369, __p1_369) __extension__ ({ \ - float32x4_t __s0_369 = __p0_369; \ - float32x4_t __ret_369; \ - __ret_369 = splatq_laneq_f32(__s0_369, __p1_369); \ - __ret_369; \ +#define vdupq_laneq_f32(__p0_461, __p1_461) __extension__ ({ \ + float32x4_t __s0_461 = __p0_461; \ + float32x4_t __ret_461; \ + __ret_461 = splatq_laneq_f32(__s0_461, __p1_461); \ + __ret_461; \ }) #else -#define vdupq_laneq_f32(__p0_370, __p1_370) __extension__ ({ \ - float32x4_t __s0_370 = __p0_370; \ - float32x4_t __rev0_370; __rev0_370 = __builtin_shufflevector(__s0_370, __s0_370, 3, 2, 1, 0); \ - float32x4_t __ret_370; \ - __ret_370 = __noswap_splatq_laneq_f32(__rev0_370, __p1_370); \ - __ret_370 = __builtin_shufflevector(__ret_370, __ret_370, 3, 2, 1, 0); \ - __ret_370; \ +#define vdupq_laneq_f32(__p0_462, __p1_462) __extension__ ({ \ + float32x4_t __s0_462 = __p0_462; \ + float32x4_t __rev0_462; __rev0_462 = __builtin_shufflevector(__s0_462, __s0_462, 3, 2, 1, 0); \ + float32x4_t __ret_462; \ + __ret_462 = __noswap_splatq_laneq_f32(__rev0_462, __p1_462); \ + __ret_462 = __builtin_shufflevector(__ret_462, __ret_462, 3, 2, 1, 0); \ + __ret_462; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_f16(__p0_371, __p1_371) __extension__ ({ \ - float16x8_t __s0_371 = __p0_371; \ - float16x8_t __ret_371; \ - __ret_371 = splatq_laneq_f16(__s0_371, __p1_371); \ - __ret_371; \ +#define vdupq_laneq_f16(__p0_463, __p1_463) __extension__ ({ \ + float16x8_t __s0_463 = __p0_463; \ + float16x8_t __ret_463; \ + __ret_463 = splatq_laneq_f16(__s0_463, __p1_463); \ + __ret_463; \ }) #else -#define vdupq_laneq_f16(__p0_372, __p1_372) __extension__ ({ \ - float16x8_t __s0_372 = __p0_372; \ - float16x8_t __rev0_372; __rev0_372 = __builtin_shufflevector(__s0_372, __s0_372, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_372; \ - __ret_372 = __noswap_splatq_laneq_f16(__rev0_372, __p1_372); \ - __ret_372 = __builtin_shufflevector(__ret_372, __ret_372, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_372; \ +#define vdupq_laneq_f16(__p0_464, __p1_464) __extension__ ({ \ + float16x8_t __s0_464 = __p0_464; \ + float16x8_t __rev0_464; __rev0_464 = __builtin_shufflevector(__s0_464, __s0_464, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_464; \ + __ret_464 = __noswap_splatq_laneq_f16(__rev0_464, __p1_464); \ + __ret_464 = __builtin_shufflevector(__ret_464, __ret_464, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_464; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s32(__p0_373, __p1_373) __extension__ ({ \ - int32x4_t __s0_373 = __p0_373; \ - int32x4_t __ret_373; \ - __ret_373 = splatq_laneq_s32(__s0_373, __p1_373); \ - __ret_373; \ +#define vdupq_laneq_s32(__p0_465, __p1_465) __extension__ ({ \ + int32x4_t __s0_465 = __p0_465; \ + int32x4_t __ret_465; \ + __ret_465 = splatq_laneq_s32(__s0_465, __p1_465); \ + __ret_465; \ }) #else -#define vdupq_laneq_s32(__p0_374, __p1_374) __extension__ ({ \ - int32x4_t __s0_374 = __p0_374; \ - int32x4_t __rev0_374; __rev0_374 = __builtin_shufflevector(__s0_374, __s0_374, 3, 2, 1, 0); \ - int32x4_t __ret_374; \ - __ret_374 = __noswap_splatq_laneq_s32(__rev0_374, __p1_374); \ - __ret_374 = __builtin_shufflevector(__ret_374, __ret_374, 3, 2, 1, 0); \ - __ret_374; \ +#define vdupq_laneq_s32(__p0_466, __p1_466) __extension__ ({ \ + int32x4_t __s0_466 = __p0_466; \ + int32x4_t __rev0_466; __rev0_466 = __builtin_shufflevector(__s0_466, __s0_466, 3, 2, 1, 0); \ + int32x4_t __ret_466; \ + __ret_466 = __noswap_splatq_laneq_s32(__rev0_466, __p1_466); \ + __ret_466 = __builtin_shufflevector(__ret_466, __ret_466, 3, 2, 1, 0); \ + __ret_466; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s64(__p0_375, __p1_375) __extension__ ({ \ - int64x2_t __s0_375 = __p0_375; \ - int64x2_t __ret_375; \ - __ret_375 = splatq_laneq_s64(__s0_375, __p1_375); \ - __ret_375; \ +#define vdupq_laneq_s64(__p0_467, __p1_467) __extension__ ({ \ + int64x2_t __s0_467 = __p0_467; \ + int64x2_t __ret_467; \ + __ret_467 = splatq_laneq_s64(__s0_467, __p1_467); \ + __ret_467; \ }) #else -#define vdupq_laneq_s64(__p0_376, __p1_376) __extension__ ({ \ - int64x2_t __s0_376 = __p0_376; \ - int64x2_t __rev0_376; __rev0_376 = __builtin_shufflevector(__s0_376, __s0_376, 1, 0); \ - int64x2_t __ret_376; \ - __ret_376 = __noswap_splatq_laneq_s64(__rev0_376, __p1_376); \ - __ret_376 = __builtin_shufflevector(__ret_376, __ret_376, 1, 0); \ - __ret_376; \ +#define vdupq_laneq_s64(__p0_468, __p1_468) __extension__ ({ \ + int64x2_t __s0_468 = __p0_468; \ + int64x2_t __rev0_468; __rev0_468 = __builtin_shufflevector(__s0_468, __s0_468, 1, 0); \ + int64x2_t __ret_468; \ + __ret_468 = __noswap_splatq_laneq_s64(__rev0_468, __p1_468); \ + __ret_468 = __builtin_shufflevector(__ret_468, __ret_468, 1, 0); \ + __ret_468; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdupq_laneq_s16(__p0_377, __p1_377) __extension__ ({ \ - int16x8_t __s0_377 = __p0_377; \ - int16x8_t __ret_377; \ - __ret_377 = splatq_laneq_s16(__s0_377, __p1_377); \ - __ret_377; \ +#define vdupq_laneq_s16(__p0_469, __p1_469) __extension__ ({ \ + int16x8_t __s0_469 = __p0_469; \ + int16x8_t __ret_469; \ + __ret_469 = splatq_laneq_s16(__s0_469, __p1_469); \ + __ret_469; \ }) #else -#define vdupq_laneq_s16(__p0_378, __p1_378) __extension__ ({ \ - int16x8_t __s0_378 = __p0_378; \ - int16x8_t __rev0_378; __rev0_378 = __builtin_shufflevector(__s0_378, __s0_378, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_378; \ - __ret_378 = __noswap_splatq_laneq_s16(__rev0_378, __p1_378); \ - __ret_378 = __builtin_shufflevector(__ret_378, __ret_378, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_378; \ +#define vdupq_laneq_s16(__p0_470, __p1_470) __extension__ ({ \ + int16x8_t __s0_470 = __p0_470; \ + int16x8_t __rev0_470; __rev0_470 = __builtin_shufflevector(__s0_470, __s0_470, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_470; \ + __ret_470 = __noswap_splatq_laneq_s16(__rev0_470, __p1_470); \ + __ret_470 = __builtin_shufflevector(__ret_470, __ret_470, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_470; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u8(__p0_379, __p1_379) __extension__ ({ \ - uint8x16_t __s0_379 = __p0_379; \ - uint8x8_t __ret_379; \ - __ret_379 = splat_laneq_u8(__s0_379, __p1_379); \ - __ret_379; \ +#define vdup_laneq_u8(__p0_471, __p1_471) __extension__ ({ \ + uint8x16_t __s0_471 = __p0_471; \ + uint8x8_t __ret_471; \ + __ret_471 = splat_laneq_u8(__s0_471, __p1_471); \ + __ret_471; \ }) #else -#define vdup_laneq_u8(__p0_380, __p1_380) __extension__ ({ \ - uint8x16_t __s0_380 = __p0_380; \ - uint8x16_t __rev0_380; __rev0_380 = __builtin_shufflevector(__s0_380, __s0_380, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __ret_380; \ - __ret_380 = __noswap_splat_laneq_u8(__rev0_380, __p1_380); \ - __ret_380 = __builtin_shufflevector(__ret_380, __ret_380, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_380; \ +#define vdup_laneq_u8(__p0_472, __p1_472) __extension__ ({ \ + uint8x16_t __s0_472 = __p0_472; \ + uint8x16_t __rev0_472; __rev0_472 = __builtin_shufflevector(__s0_472, __s0_472, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __ret_472; \ + __ret_472 = __noswap_splat_laneq_u8(__rev0_472, __p1_472); \ + __ret_472 = __builtin_shufflevector(__ret_472, __ret_472, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_472; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u32(__p0_381, __p1_381) __extension__ ({ \ - uint32x4_t __s0_381 = __p0_381; \ - uint32x2_t __ret_381; \ - __ret_381 = splat_laneq_u32(__s0_381, __p1_381); \ - __ret_381; \ +#define vdup_laneq_u32(__p0_473, __p1_473) __extension__ ({ \ + uint32x4_t __s0_473 = __p0_473; \ + uint32x2_t __ret_473; \ + __ret_473 = splat_laneq_u32(__s0_473, __p1_473); \ + __ret_473; \ }) #else -#define vdup_laneq_u32(__p0_382, __p1_382) __extension__ ({ \ - uint32x4_t __s0_382 = __p0_382; \ - uint32x4_t __rev0_382; __rev0_382 = __builtin_shufflevector(__s0_382, __s0_382, 3, 2, 1, 0); \ - uint32x2_t __ret_382; \ - __ret_382 = __noswap_splat_laneq_u32(__rev0_382, __p1_382); \ - __ret_382 = __builtin_shufflevector(__ret_382, __ret_382, 1, 0); \ - __ret_382; \ +#define vdup_laneq_u32(__p0_474, __p1_474) __extension__ ({ \ + uint32x4_t __s0_474 = __p0_474; \ + uint32x4_t __rev0_474; __rev0_474 = __builtin_shufflevector(__s0_474, __s0_474, 3, 2, 1, 0); \ + uint32x2_t __ret_474; \ + __ret_474 = __noswap_splat_laneq_u32(__rev0_474, __p1_474); \ + __ret_474 = __builtin_shufflevector(__ret_474, __ret_474, 1, 0); \ + __ret_474; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u64(__p0_383, __p1_383) __extension__ ({ \ - uint64x2_t __s0_383 = __p0_383; \ - uint64x1_t __ret_383; \ - __ret_383 = splat_laneq_u64(__s0_383, __p1_383); \ - __ret_383; \ +#define vdup_laneq_u64(__p0_475, __p1_475) __extension__ ({ \ + uint64x2_t __s0_475 = __p0_475; \ + uint64x1_t __ret_475; \ + __ret_475 = splat_laneq_u64(__s0_475, __p1_475); \ + __ret_475; \ }) #else -#define vdup_laneq_u64(__p0_384, __p1_384) __extension__ ({ \ - uint64x2_t __s0_384 = __p0_384; \ - uint64x2_t __rev0_384; __rev0_384 = __builtin_shufflevector(__s0_384, __s0_384, 1, 0); \ - uint64x1_t __ret_384; \ - __ret_384 = __noswap_splat_laneq_u64(__rev0_384, __p1_384); \ - __ret_384; \ +#define vdup_laneq_u64(__p0_476, __p1_476) __extension__ ({ \ + uint64x2_t __s0_476 = __p0_476; \ + uint64x2_t __rev0_476; __rev0_476 = __builtin_shufflevector(__s0_476, __s0_476, 1, 0); \ + uint64x1_t __ret_476; \ + __ret_476 = __noswap_splat_laneq_u64(__rev0_476, __p1_476); \ + __ret_476; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_u16(__p0_385, __p1_385) __extension__ ({ \ - uint16x8_t __s0_385 = __p0_385; \ - uint16x4_t __ret_385; \ - __ret_385 = splat_laneq_u16(__s0_385, __p1_385); \ - __ret_385; \ +#define vdup_laneq_u16(__p0_477, __p1_477) __extension__ ({ \ + uint16x8_t __s0_477 = __p0_477; \ + uint16x4_t __ret_477; \ + __ret_477 = splat_laneq_u16(__s0_477, __p1_477); \ + __ret_477; \ }) #else -#define vdup_laneq_u16(__p0_386, __p1_386) __extension__ ({ \ - uint16x8_t __s0_386 = __p0_386; \ - uint16x8_t __rev0_386; __rev0_386 = __builtin_shufflevector(__s0_386, __s0_386, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_386; \ - __ret_386 = __noswap_splat_laneq_u16(__rev0_386, __p1_386); \ - __ret_386 = __builtin_shufflevector(__ret_386, __ret_386, 3, 2, 1, 0); \ - __ret_386; \ +#define vdup_laneq_u16(__p0_478, __p1_478) __extension__ ({ \ + uint16x8_t __s0_478 = __p0_478; \ + uint16x8_t __rev0_478; __rev0_478 = __builtin_shufflevector(__s0_478, __s0_478, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_478; \ + __ret_478 = __noswap_splat_laneq_u16(__rev0_478, __p1_478); \ + __ret_478 = __builtin_shufflevector(__ret_478, __ret_478, 3, 2, 1, 0); \ + __ret_478; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s8(__p0_387, __p1_387) __extension__ ({ \ - int8x16_t __s0_387 = __p0_387; \ - int8x8_t __ret_387; \ - __ret_387 = splat_laneq_s8(__s0_387, __p1_387); \ - __ret_387; \ +#define vdup_laneq_s8(__p0_479, __p1_479) __extension__ ({ \ + int8x16_t __s0_479 = __p0_479; \ + int8x8_t __ret_479; \ + __ret_479 = splat_laneq_s8(__s0_479, __p1_479); \ + __ret_479; \ }) #else -#define vdup_laneq_s8(__p0_388, __p1_388) __extension__ ({ \ - int8x16_t __s0_388 = __p0_388; \ - int8x16_t __rev0_388; __rev0_388 = __builtin_shufflevector(__s0_388, __s0_388, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x8_t __ret_388; \ - __ret_388 = __noswap_splat_laneq_s8(__rev0_388, __p1_388); \ - __ret_388 = __builtin_shufflevector(__ret_388, __ret_388, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_388; \ +#define vdup_laneq_s8(__p0_480, __p1_480) __extension__ ({ \ + int8x16_t __s0_480 = __p0_480; \ + int8x16_t __rev0_480; __rev0_480 = __builtin_shufflevector(__s0_480, __s0_480, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x8_t __ret_480; \ + __ret_480 = __noswap_splat_laneq_s8(__rev0_480, __p1_480); \ + __ret_480 = __builtin_shufflevector(__ret_480, __ret_480, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_480; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f64(__p0_389, __p1_389) __extension__ ({ \ - float64x2_t __s0_389 = __p0_389; \ - float64x1_t __ret_389; \ - __ret_389 = splat_laneq_f64(__s0_389, __p1_389); \ - __ret_389; \ +#define vdup_laneq_f64(__p0_481, __p1_481) __extension__ ({ \ + float64x2_t __s0_481 = __p0_481; \ + float64x1_t __ret_481; \ + __ret_481 = splat_laneq_f64(__s0_481, __p1_481); \ + __ret_481; \ }) #else -#define vdup_laneq_f64(__p0_390, __p1_390) __extension__ ({ \ - float64x2_t __s0_390 = __p0_390; \ - float64x2_t __rev0_390; __rev0_390 = __builtin_shufflevector(__s0_390, __s0_390, 1, 0); \ - float64x1_t __ret_390; \ - __ret_390 = __noswap_splat_laneq_f64(__rev0_390, __p1_390); \ - __ret_390; \ +#define vdup_laneq_f64(__p0_482, __p1_482) __extension__ ({ \ + float64x2_t __s0_482 = __p0_482; \ + float64x2_t __rev0_482; __rev0_482 = __builtin_shufflevector(__s0_482, __s0_482, 1, 0); \ + float64x1_t __ret_482; \ + __ret_482 = __noswap_splat_laneq_f64(__rev0_482, __p1_482); \ + __ret_482; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f32(__p0_391, __p1_391) __extension__ ({ \ - float32x4_t __s0_391 = __p0_391; \ - float32x2_t __ret_391; \ - __ret_391 = splat_laneq_f32(__s0_391, __p1_391); \ - __ret_391; \ +#define vdup_laneq_f32(__p0_483, __p1_483) __extension__ ({ \ + float32x4_t __s0_483 = __p0_483; \ + float32x2_t __ret_483; \ + __ret_483 = splat_laneq_f32(__s0_483, __p1_483); \ + __ret_483; \ }) #else -#define vdup_laneq_f32(__p0_392, __p1_392) __extension__ ({ \ - float32x4_t __s0_392 = __p0_392; \ - float32x4_t __rev0_392; __rev0_392 = __builtin_shufflevector(__s0_392, __s0_392, 3, 2, 1, 0); \ - float32x2_t __ret_392; \ - __ret_392 = __noswap_splat_laneq_f32(__rev0_392, __p1_392); \ - __ret_392 = __builtin_shufflevector(__ret_392, __ret_392, 1, 0); \ - __ret_392; \ +#define vdup_laneq_f32(__p0_484, __p1_484) __extension__ ({ \ + float32x4_t __s0_484 = __p0_484; \ + float32x4_t __rev0_484; __rev0_484 = __builtin_shufflevector(__s0_484, __s0_484, 3, 2, 1, 0); \ + float32x2_t __ret_484; \ + __ret_484 = __noswap_splat_laneq_f32(__rev0_484, __p1_484); \ + __ret_484 = __builtin_shufflevector(__ret_484, __ret_484, 1, 0); \ + __ret_484; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_f16(__p0_393, __p1_393) __extension__ ({ \ - float16x8_t __s0_393 = __p0_393; \ - float16x4_t __ret_393; \ - __ret_393 = splat_laneq_f16(__s0_393, __p1_393); \ - __ret_393; \ +#define vdup_laneq_f16(__p0_485, __p1_485) __extension__ ({ \ + float16x8_t __s0_485 = __p0_485; \ + float16x4_t __ret_485; \ + __ret_485 = splat_laneq_f16(__s0_485, __p1_485); \ + __ret_485; \ }) #else -#define vdup_laneq_f16(__p0_394, __p1_394) __extension__ ({ \ - float16x8_t __s0_394 = __p0_394; \ - float16x8_t __rev0_394; __rev0_394 = __builtin_shufflevector(__s0_394, __s0_394, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __ret_394; \ - __ret_394 = __noswap_splat_laneq_f16(__rev0_394, __p1_394); \ - __ret_394 = __builtin_shufflevector(__ret_394, __ret_394, 3, 2, 1, 0); \ - __ret_394; \ +#define vdup_laneq_f16(__p0_486, __p1_486) __extension__ ({ \ + float16x8_t __s0_486 = __p0_486; \ + float16x8_t __rev0_486; __rev0_486 = __builtin_shufflevector(__s0_486, __s0_486, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __ret_486; \ + __ret_486 = __noswap_splat_laneq_f16(__rev0_486, __p1_486); \ + __ret_486 = __builtin_shufflevector(__ret_486, __ret_486, 3, 2, 1, 0); \ + __ret_486; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s32(__p0_395, __p1_395) __extension__ ({ \ - int32x4_t __s0_395 = __p0_395; \ - int32x2_t __ret_395; \ - __ret_395 = splat_laneq_s32(__s0_395, __p1_395); \ - __ret_395; \ +#define vdup_laneq_s32(__p0_487, __p1_487) __extension__ ({ \ + int32x4_t __s0_487 = __p0_487; \ + int32x2_t __ret_487; \ + __ret_487 = splat_laneq_s32(__s0_487, __p1_487); \ + __ret_487; \ }) #else -#define vdup_laneq_s32(__p0_396, __p1_396) __extension__ ({ \ - int32x4_t __s0_396 = __p0_396; \ - int32x4_t __rev0_396; __rev0_396 = __builtin_shufflevector(__s0_396, __s0_396, 3, 2, 1, 0); \ - int32x2_t __ret_396; \ - __ret_396 = __noswap_splat_laneq_s32(__rev0_396, __p1_396); \ - __ret_396 = __builtin_shufflevector(__ret_396, __ret_396, 1, 0); \ - __ret_396; \ +#define vdup_laneq_s32(__p0_488, __p1_488) __extension__ ({ \ + int32x4_t __s0_488 = __p0_488; \ + int32x4_t __rev0_488; __rev0_488 = __builtin_shufflevector(__s0_488, __s0_488, 3, 2, 1, 0); \ + int32x2_t __ret_488; \ + __ret_488 = __noswap_splat_laneq_s32(__rev0_488, __p1_488); \ + __ret_488 = __builtin_shufflevector(__ret_488, __ret_488, 1, 0); \ + __ret_488; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s64(__p0_397, __p1_397) __extension__ ({ \ - int64x2_t __s0_397 = __p0_397; \ - int64x1_t __ret_397; \ - __ret_397 = splat_laneq_s64(__s0_397, __p1_397); \ - __ret_397; \ +#define vdup_laneq_s64(__p0_489, __p1_489) __extension__ ({ \ + int64x2_t __s0_489 = __p0_489; \ + int64x1_t __ret_489; \ + __ret_489 = splat_laneq_s64(__s0_489, __p1_489); \ + __ret_489; \ }) #else -#define vdup_laneq_s64(__p0_398, __p1_398) __extension__ ({ \ - int64x2_t __s0_398 = __p0_398; \ - int64x2_t __rev0_398; __rev0_398 = __builtin_shufflevector(__s0_398, __s0_398, 1, 0); \ - int64x1_t __ret_398; \ - __ret_398 = __noswap_splat_laneq_s64(__rev0_398, __p1_398); \ - __ret_398; \ +#define vdup_laneq_s64(__p0_490, __p1_490) __extension__ ({ \ + int64x2_t __s0_490 = __p0_490; \ + int64x2_t __rev0_490; __rev0_490 = __builtin_shufflevector(__s0_490, __s0_490, 1, 0); \ + int64x1_t __ret_490; \ + __ret_490 = __noswap_splat_laneq_s64(__rev0_490, __p1_490); \ + __ret_490; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vdup_laneq_s16(__p0_399, __p1_399) __extension__ ({ \ - int16x8_t __s0_399 = __p0_399; \ - int16x4_t __ret_399; \ - __ret_399 = splat_laneq_s16(__s0_399, __p1_399); \ - __ret_399; \ +#define vdup_laneq_s16(__p0_491, __p1_491) __extension__ ({ \ + int16x8_t __s0_491 = __p0_491; \ + int16x4_t __ret_491; \ + __ret_491 = splat_laneq_s16(__s0_491, __p1_491); \ + __ret_491; \ }) #else -#define vdup_laneq_s16(__p0_400, __p1_400) __extension__ ({ \ - int16x8_t __s0_400 = __p0_400; \ - int16x8_t __rev0_400; __rev0_400 = __builtin_shufflevector(__s0_400, __s0_400, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_400; \ - __ret_400 = __noswap_splat_laneq_s16(__rev0_400, __p1_400); \ - __ret_400 = __builtin_shufflevector(__ret_400, __ret_400, 3, 2, 1, 0); \ - __ret_400; \ +#define vdup_laneq_s16(__p0_492, __p1_492) __extension__ ({ \ + int16x8_t __s0_492 = __p0_492; \ + int16x8_t __rev0_492; __rev0_492 = __builtin_shufflevector(__s0_492, __s0_492, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_492; \ + __ret_492 = __noswap_splat_laneq_s16(__rev0_492, __p1_492); \ + __ret_492 = __builtin_shufflevector(__ret_492, __ret_492, 3, 2, 1, 0); \ + __ret_492; \ }) #endif @@ -50287,246 +51991,246 @@ __ai float64x1_t vfms_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) __ret = vfma_f64(__p0, -__p1, __p2); return __ret; } -#define vfmsd_lane_f64(__p0_401, __p1_401, __p2_401, __p3_401) __extension__ ({ \ - float64_t __s0_401 = __p0_401; \ - float64_t __s1_401 = __p1_401; \ - float64x1_t __s2_401 = __p2_401; \ - float64_t __ret_401; \ - __ret_401 = vfmad_lane_f64(__s0_401, -__s1_401, __s2_401, __p3_401); \ - __ret_401; \ +#define vfmsd_lane_f64(__p0_493, __p1_493, __p2_493, __p3_493) __extension__ ({ \ + float64_t __s0_493 = __p0_493; \ + float64_t __s1_493 = __p1_493; \ + float64x1_t __s2_493 = __p2_493; \ + float64_t __ret_493; \ + __ret_493 = vfmad_lane_f64(__s0_493, -__s1_493, __s2_493, __p3_493); \ + __ret_493; \ }) #ifdef __LITTLE_ENDIAN__ -#define vfmss_lane_f32(__p0_402, __p1_402, __p2_402, __p3_402) __extension__ ({ \ - float32_t __s0_402 = __p0_402; \ - float32_t __s1_402 = __p1_402; \ - float32x2_t __s2_402 = __p2_402; \ - float32_t __ret_402; \ - __ret_402 = vfmas_lane_f32(__s0_402, -__s1_402, __s2_402, __p3_402); \ - __ret_402; \ +#define vfmss_lane_f32(__p0_494, __p1_494, __p2_494, __p3_494) __extension__ ({ \ + float32_t __s0_494 = __p0_494; \ + float32_t __s1_494 = __p1_494; \ + float32x2_t __s2_494 = __p2_494; \ + float32_t __ret_494; \ + __ret_494 = vfmas_lane_f32(__s0_494, -__s1_494, __s2_494, __p3_494); \ + __ret_494; \ }) #else -#define vfmss_lane_f32(__p0_403, __p1_403, __p2_403, __p3_403) __extension__ ({ \ - float32_t __s0_403 = __p0_403; \ - float32_t __s1_403 = __p1_403; \ - float32x2_t __s2_403 = __p2_403; \ - float32x2_t __rev2_403; __rev2_403 = __builtin_shufflevector(__s2_403, __s2_403, 1, 0); \ - float32_t __ret_403; \ - __ret_403 = __noswap_vfmas_lane_f32(__s0_403, -__s1_403, __rev2_403, __p3_403); \ - __ret_403; \ +#define vfmss_lane_f32(__p0_495, __p1_495, __p2_495, __p3_495) __extension__ ({ \ + float32_t __s0_495 = __p0_495; \ + float32_t __s1_495 = __p1_495; \ + float32x2_t __s2_495 = __p2_495; \ + float32x2_t __rev2_495; __rev2_495 = __builtin_shufflevector(__s2_495, __s2_495, 1, 0); \ + float32_t __ret_495; \ + __ret_495 = __noswap_vfmas_lane_f32(__s0_495, -__s1_495, __rev2_495, __p3_495); \ + __ret_495; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f64(__p0_404, __p1_404, __p2_404, __p3_404) __extension__ ({ \ - float64x2_t __s0_404 = __p0_404; \ - float64x2_t __s1_404 = __p1_404; \ - float64x1_t __s2_404 = __p2_404; \ - float64x2_t __ret_404; \ - __ret_404 = vfmaq_lane_f64(__s0_404, -__s1_404, __s2_404, __p3_404); \ - __ret_404; \ +#define vfmsq_lane_f64(__p0_496, __p1_496, __p2_496, __p3_496) __extension__ ({ \ + float64x2_t __s0_496 = __p0_496; \ + float64x2_t __s1_496 = __p1_496; \ + float64x1_t __s2_496 = __p2_496; \ + float64x2_t __ret_496; \ + __ret_496 = vfmaq_lane_f64(__s0_496, -__s1_496, __s2_496, __p3_496); \ + __ret_496; \ }) #else -#define vfmsq_lane_f64(__p0_405, __p1_405, __p2_405, __p3_405) __extension__ ({ \ - float64x2_t __s0_405 = __p0_405; \ - float64x2_t __s1_405 = __p1_405; \ - float64x1_t __s2_405 = __p2_405; \ - float64x2_t __rev0_405; __rev0_405 = __builtin_shufflevector(__s0_405, __s0_405, 1, 0); \ - float64x2_t __rev1_405; __rev1_405 = __builtin_shufflevector(__s1_405, __s1_405, 1, 0); \ - float64x2_t __ret_405; \ - __ret_405 = __noswap_vfmaq_lane_f64(__rev0_405, -__rev1_405, __s2_405, __p3_405); \ - __ret_405 = __builtin_shufflevector(__ret_405, __ret_405, 1, 0); \ - __ret_405; \ +#define vfmsq_lane_f64(__p0_497, __p1_497, __p2_497, __p3_497) __extension__ ({ \ + float64x2_t __s0_497 = __p0_497; \ + float64x2_t __s1_497 = __p1_497; \ + float64x1_t __s2_497 = __p2_497; \ + float64x2_t __rev0_497; __rev0_497 = __builtin_shufflevector(__s0_497, __s0_497, 1, 0); \ + float64x2_t __rev1_497; __rev1_497 = __builtin_shufflevector(__s1_497, __s1_497, 1, 0); \ + float64x2_t __ret_497; \ + __ret_497 = __noswap_vfmaq_lane_f64(__rev0_497, -__rev1_497, __s2_497, __p3_497); \ + __ret_497 = __builtin_shufflevector(__ret_497, __ret_497, 1, 0); \ + __ret_497; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_lane_f32(__p0_406, __p1_406, __p2_406, __p3_406) __extension__ ({ \ - float32x4_t __s0_406 = __p0_406; \ - float32x4_t __s1_406 = __p1_406; \ - float32x2_t __s2_406 = __p2_406; \ - float32x4_t __ret_406; \ - __ret_406 = vfmaq_lane_f32(__s0_406, -__s1_406, __s2_406, __p3_406); \ - __ret_406; \ +#define vfmsq_lane_f32(__p0_498, __p1_498, __p2_498, __p3_498) __extension__ ({ \ + float32x4_t __s0_498 = __p0_498; \ + float32x4_t __s1_498 = __p1_498; \ + float32x2_t __s2_498 = __p2_498; \ + float32x4_t __ret_498; \ + __ret_498 = vfmaq_lane_f32(__s0_498, -__s1_498, __s2_498, __p3_498); \ + __ret_498; \ }) #else -#define vfmsq_lane_f32(__p0_407, __p1_407, __p2_407, __p3_407) __extension__ ({ \ - float32x4_t __s0_407 = __p0_407; \ - float32x4_t __s1_407 = __p1_407; \ - float32x2_t __s2_407 = __p2_407; \ - float32x4_t __rev0_407; __rev0_407 = __builtin_shufflevector(__s0_407, __s0_407, 3, 2, 1, 0); \ - float32x4_t __rev1_407; __rev1_407 = __builtin_shufflevector(__s1_407, __s1_407, 3, 2, 1, 0); \ - float32x2_t __rev2_407; __rev2_407 = __builtin_shufflevector(__s2_407, __s2_407, 1, 0); \ - float32x4_t __ret_407; \ - __ret_407 = __noswap_vfmaq_lane_f32(__rev0_407, -__rev1_407, __rev2_407, __p3_407); \ - __ret_407 = __builtin_shufflevector(__ret_407, __ret_407, 3, 2, 1, 0); \ - __ret_407; \ +#define vfmsq_lane_f32(__p0_499, __p1_499, __p2_499, __p3_499) __extension__ ({ \ + float32x4_t __s0_499 = __p0_499; \ + float32x4_t __s1_499 = __p1_499; \ + float32x2_t __s2_499 = __p2_499; \ + float32x4_t __rev0_499; __rev0_499 = __builtin_shufflevector(__s0_499, __s0_499, 3, 2, 1, 0); \ + float32x4_t __rev1_499; __rev1_499 = __builtin_shufflevector(__s1_499, __s1_499, 3, 2, 1, 0); \ + float32x2_t __rev2_499; __rev2_499 = __builtin_shufflevector(__s2_499, __s2_499, 1, 0); \ + float32x4_t __ret_499; \ + __ret_499 = __noswap_vfmaq_lane_f32(__rev0_499, -__rev1_499, __rev2_499, __p3_499); \ + __ret_499 = __builtin_shufflevector(__ret_499, __ret_499, 3, 2, 1, 0); \ + __ret_499; \ }) #endif -#define vfms_lane_f64(__p0_408, __p1_408, __p2_408, __p3_408) __extension__ ({ \ - float64x1_t __s0_408 = __p0_408; \ - float64x1_t __s1_408 = __p1_408; \ - float64x1_t __s2_408 = __p2_408; \ - float64x1_t __ret_408; \ - __ret_408 = vfma_lane_f64(__s0_408, -__s1_408, __s2_408, __p3_408); \ - __ret_408; \ +#define vfms_lane_f64(__p0_500, __p1_500, __p2_500, __p3_500) __extension__ ({ \ + float64x1_t __s0_500 = __p0_500; \ + float64x1_t __s1_500 = __p1_500; \ + float64x1_t __s2_500 = __p2_500; \ + float64x1_t __ret_500; \ + __ret_500 = vfma_lane_f64(__s0_500, -__s1_500, __s2_500, __p3_500); \ + __ret_500; \ }) #ifdef __LITTLE_ENDIAN__ -#define vfms_lane_f32(__p0_409, __p1_409, __p2_409, __p3_409) __extension__ ({ \ - float32x2_t __s0_409 = __p0_409; \ - float32x2_t __s1_409 = __p1_409; \ - float32x2_t __s2_409 = __p2_409; \ - float32x2_t __ret_409; \ - __ret_409 = vfma_lane_f32(__s0_409, -__s1_409, __s2_409, __p3_409); \ - __ret_409; \ +#define vfms_lane_f32(__p0_501, __p1_501, __p2_501, __p3_501) __extension__ ({ \ + float32x2_t __s0_501 = __p0_501; \ + float32x2_t __s1_501 = __p1_501; \ + float32x2_t __s2_501 = __p2_501; \ + float32x2_t __ret_501; \ + __ret_501 = vfma_lane_f32(__s0_501, -__s1_501, __s2_501, __p3_501); \ + __ret_501; \ }) #else -#define vfms_lane_f32(__p0_410, __p1_410, __p2_410, __p3_410) __extension__ ({ \ - float32x2_t __s0_410 = __p0_410; \ - float32x2_t __s1_410 = __p1_410; \ - float32x2_t __s2_410 = __p2_410; \ - float32x2_t __rev0_410; __rev0_410 = __builtin_shufflevector(__s0_410, __s0_410, 1, 0); \ - float32x2_t __rev1_410; __rev1_410 = __builtin_shufflevector(__s1_410, __s1_410, 1, 0); \ - float32x2_t __rev2_410; __rev2_410 = __builtin_shufflevector(__s2_410, __s2_410, 1, 0); \ - float32x2_t __ret_410; \ - __ret_410 = __noswap_vfma_lane_f32(__rev0_410, -__rev1_410, __rev2_410, __p3_410); \ - __ret_410 = __builtin_shufflevector(__ret_410, __ret_410, 1, 0); \ - __ret_410; \ +#define vfms_lane_f32(__p0_502, __p1_502, __p2_502, __p3_502) __extension__ ({ \ + float32x2_t __s0_502 = __p0_502; \ + float32x2_t __s1_502 = __p1_502; \ + float32x2_t __s2_502 = __p2_502; \ + float32x2_t __rev0_502; __rev0_502 = __builtin_shufflevector(__s0_502, __s0_502, 1, 0); \ + float32x2_t __rev1_502; __rev1_502 = __builtin_shufflevector(__s1_502, __s1_502, 1, 0); \ + float32x2_t __rev2_502; __rev2_502 = __builtin_shufflevector(__s2_502, __s2_502, 1, 0); \ + float32x2_t __ret_502; \ + __ret_502 = __noswap_vfma_lane_f32(__rev0_502, -__rev1_502, __rev2_502, __p3_502); \ + __ret_502 = __builtin_shufflevector(__ret_502, __ret_502, 1, 0); \ + __ret_502; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsd_laneq_f64(__p0_411, __p1_411, __p2_411, __p3_411) __extension__ ({ \ - float64_t __s0_411 = __p0_411; \ - float64_t __s1_411 = __p1_411; \ - float64x2_t __s2_411 = __p2_411; \ - float64_t __ret_411; \ - __ret_411 = vfmad_laneq_f64(__s0_411, -__s1_411, __s2_411, __p3_411); \ - __ret_411; \ +#define vfmsd_laneq_f64(__p0_503, __p1_503, __p2_503, __p3_503) __extension__ ({ \ + float64_t __s0_503 = __p0_503; \ + float64_t __s1_503 = __p1_503; \ + float64x2_t __s2_503 = __p2_503; \ + float64_t __ret_503; \ + __ret_503 = vfmad_laneq_f64(__s0_503, -__s1_503, __s2_503, __p3_503); \ + __ret_503; \ }) #else -#define vfmsd_laneq_f64(__p0_412, __p1_412, __p2_412, __p3_412) __extension__ ({ \ - float64_t __s0_412 = __p0_412; \ - float64_t __s1_412 = __p1_412; \ - float64x2_t __s2_412 = __p2_412; \ - float64x2_t __rev2_412; __rev2_412 = __builtin_shufflevector(__s2_412, __s2_412, 1, 0); \ - float64_t __ret_412; \ - __ret_412 = __noswap_vfmad_laneq_f64(__s0_412, -__s1_412, __rev2_412, __p3_412); \ - __ret_412; \ +#define vfmsd_laneq_f64(__p0_504, __p1_504, __p2_504, __p3_504) __extension__ ({ \ + float64_t __s0_504 = __p0_504; \ + float64_t __s1_504 = __p1_504; \ + float64x2_t __s2_504 = __p2_504; \ + float64x2_t __rev2_504; __rev2_504 = __builtin_shufflevector(__s2_504, __s2_504, 1, 0); \ + float64_t __ret_504; \ + __ret_504 = __noswap_vfmad_laneq_f64(__s0_504, -__s1_504, __rev2_504, __p3_504); \ + __ret_504; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmss_laneq_f32(__p0_413, __p1_413, __p2_413, __p3_413) __extension__ ({ \ - float32_t __s0_413 = __p0_413; \ - float32_t __s1_413 = __p1_413; \ - float32x4_t __s2_413 = __p2_413; \ - float32_t __ret_413; \ - __ret_413 = vfmas_laneq_f32(__s0_413, -__s1_413, __s2_413, __p3_413); \ - __ret_413; \ +#define vfmss_laneq_f32(__p0_505, __p1_505, __p2_505, __p3_505) __extension__ ({ \ + float32_t __s0_505 = __p0_505; \ + float32_t __s1_505 = __p1_505; \ + float32x4_t __s2_505 = __p2_505; \ + float32_t __ret_505; \ + __ret_505 = vfmas_laneq_f32(__s0_505, -__s1_505, __s2_505, __p3_505); \ + __ret_505; \ }) #else -#define vfmss_laneq_f32(__p0_414, __p1_414, __p2_414, __p3_414) __extension__ ({ \ - float32_t __s0_414 = __p0_414; \ - float32_t __s1_414 = __p1_414; \ - float32x4_t __s2_414 = __p2_414; \ - float32x4_t __rev2_414; __rev2_414 = __builtin_shufflevector(__s2_414, __s2_414, 3, 2, 1, 0); \ - float32_t __ret_414; \ - __ret_414 = __noswap_vfmas_laneq_f32(__s0_414, -__s1_414, __rev2_414, __p3_414); \ - __ret_414; \ +#define vfmss_laneq_f32(__p0_506, __p1_506, __p2_506, __p3_506) __extension__ ({ \ + float32_t __s0_506 = __p0_506; \ + float32_t __s1_506 = __p1_506; \ + float32x4_t __s2_506 = __p2_506; \ + float32x4_t __rev2_506; __rev2_506 = __builtin_shufflevector(__s2_506, __s2_506, 3, 2, 1, 0); \ + float32_t __ret_506; \ + __ret_506 = __noswap_vfmas_laneq_f32(__s0_506, -__s1_506, __rev2_506, __p3_506); \ + __ret_506; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f64(__p0_415, __p1_415, __p2_415, __p3_415) __extension__ ({ \ - float64x2_t __s0_415 = __p0_415; \ - float64x2_t __s1_415 = __p1_415; \ - float64x2_t __s2_415 = __p2_415; \ - float64x2_t __ret_415; \ - __ret_415 = vfmaq_laneq_f64(__s0_415, -__s1_415, __s2_415, __p3_415); \ - __ret_415; \ +#define vfmsq_laneq_f64(__p0_507, __p1_507, __p2_507, __p3_507) __extension__ ({ \ + float64x2_t __s0_507 = __p0_507; \ + float64x2_t __s1_507 = __p1_507; \ + float64x2_t __s2_507 = __p2_507; \ + float64x2_t __ret_507; \ + __ret_507 = vfmaq_laneq_f64(__s0_507, -__s1_507, __s2_507, __p3_507); \ + __ret_507; \ }) #else -#define vfmsq_laneq_f64(__p0_416, __p1_416, __p2_416, __p3_416) __extension__ ({ \ - float64x2_t __s0_416 = __p0_416; \ - float64x2_t __s1_416 = __p1_416; \ - float64x2_t __s2_416 = __p2_416; \ - float64x2_t __rev0_416; __rev0_416 = __builtin_shufflevector(__s0_416, __s0_416, 1, 0); \ - float64x2_t __rev1_416; __rev1_416 = __builtin_shufflevector(__s1_416, __s1_416, 1, 0); \ - float64x2_t __rev2_416; __rev2_416 = __builtin_shufflevector(__s2_416, __s2_416, 1, 0); \ - float64x2_t __ret_416; \ - __ret_416 = __noswap_vfmaq_laneq_f64(__rev0_416, -__rev1_416, __rev2_416, __p3_416); \ - __ret_416 = __builtin_shufflevector(__ret_416, __ret_416, 1, 0); \ - __ret_416; \ +#define vfmsq_laneq_f64(__p0_508, __p1_508, __p2_508, __p3_508) __extension__ ({ \ + float64x2_t __s0_508 = __p0_508; \ + float64x2_t __s1_508 = __p1_508; \ + float64x2_t __s2_508 = __p2_508; \ + float64x2_t __rev0_508; __rev0_508 = __builtin_shufflevector(__s0_508, __s0_508, 1, 0); \ + float64x2_t __rev1_508; __rev1_508 = __builtin_shufflevector(__s1_508, __s1_508, 1, 0); \ + float64x2_t __rev2_508; __rev2_508 = __builtin_shufflevector(__s2_508, __s2_508, 1, 0); \ + float64x2_t __ret_508; \ + __ret_508 = __noswap_vfmaq_laneq_f64(__rev0_508, -__rev1_508, __rev2_508, __p3_508); \ + __ret_508 = __builtin_shufflevector(__ret_508, __ret_508, 1, 0); \ + __ret_508; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmsq_laneq_f32(__p0_417, __p1_417, __p2_417, __p3_417) __extension__ ({ \ - float32x4_t __s0_417 = __p0_417; \ - float32x4_t __s1_417 = __p1_417; \ - float32x4_t __s2_417 = __p2_417; \ - float32x4_t __ret_417; \ - __ret_417 = vfmaq_laneq_f32(__s0_417, -__s1_417, __s2_417, __p3_417); \ - __ret_417; \ +#define vfmsq_laneq_f32(__p0_509, __p1_509, __p2_509, __p3_509) __extension__ ({ \ + float32x4_t __s0_509 = __p0_509; \ + float32x4_t __s1_509 = __p1_509; \ + float32x4_t __s2_509 = __p2_509; \ + float32x4_t __ret_509; \ + __ret_509 = vfmaq_laneq_f32(__s0_509, -__s1_509, __s2_509, __p3_509); \ + __ret_509; \ }) #else -#define vfmsq_laneq_f32(__p0_418, __p1_418, __p2_418, __p3_418) __extension__ ({ \ - float32x4_t __s0_418 = __p0_418; \ - float32x4_t __s1_418 = __p1_418; \ - float32x4_t __s2_418 = __p2_418; \ - float32x4_t __rev0_418; __rev0_418 = __builtin_shufflevector(__s0_418, __s0_418, 3, 2, 1, 0); \ - float32x4_t __rev1_418; __rev1_418 = __builtin_shufflevector(__s1_418, __s1_418, 3, 2, 1, 0); \ - float32x4_t __rev2_418; __rev2_418 = __builtin_shufflevector(__s2_418, __s2_418, 3, 2, 1, 0); \ - float32x4_t __ret_418; \ - __ret_418 = __noswap_vfmaq_laneq_f32(__rev0_418, -__rev1_418, __rev2_418, __p3_418); \ - __ret_418 = __builtin_shufflevector(__ret_418, __ret_418, 3, 2, 1, 0); \ - __ret_418; \ +#define vfmsq_laneq_f32(__p0_510, __p1_510, __p2_510, __p3_510) __extension__ ({ \ + float32x4_t __s0_510 = __p0_510; \ + float32x4_t __s1_510 = __p1_510; \ + float32x4_t __s2_510 = __p2_510; \ + float32x4_t __rev0_510; __rev0_510 = __builtin_shufflevector(__s0_510, __s0_510, 3, 2, 1, 0); \ + float32x4_t __rev1_510; __rev1_510 = __builtin_shufflevector(__s1_510, __s1_510, 3, 2, 1, 0); \ + float32x4_t __rev2_510; __rev2_510 = __builtin_shufflevector(__s2_510, __s2_510, 3, 2, 1, 0); \ + float32x4_t __ret_510; \ + __ret_510 = __noswap_vfmaq_laneq_f32(__rev0_510, -__rev1_510, __rev2_510, __p3_510); \ + __ret_510 = __builtin_shufflevector(__ret_510, __ret_510, 3, 2, 1, 0); \ + __ret_510; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f64(__p0_419, __p1_419, __p2_419, __p3_419) __extension__ ({ \ - float64x1_t __s0_419 = __p0_419; \ - float64x1_t __s1_419 = __p1_419; \ - float64x2_t __s2_419 = __p2_419; \ - float64x1_t __ret_419; \ - __ret_419 = vfma_laneq_f64(__s0_419, -__s1_419, __s2_419, __p3_419); \ - __ret_419; \ +#define vfms_laneq_f64(__p0_511, __p1_511, __p2_511, __p3_511) __extension__ ({ \ + float64x1_t __s0_511 = __p0_511; \ + float64x1_t __s1_511 = __p1_511; \ + float64x2_t __s2_511 = __p2_511; \ + float64x1_t __ret_511; \ + __ret_511 = vfma_laneq_f64(__s0_511, -__s1_511, __s2_511, __p3_511); \ + __ret_511; \ }) #else -#define vfms_laneq_f64(__p0_420, __p1_420, __p2_420, __p3_420) __extension__ ({ \ - float64x1_t __s0_420 = __p0_420; \ - float64x1_t __s1_420 = __p1_420; \ - float64x2_t __s2_420 = __p2_420; \ - float64x2_t __rev2_420; __rev2_420 = __builtin_shufflevector(__s2_420, __s2_420, 1, 0); \ - float64x1_t __ret_420; \ - __ret_420 = __noswap_vfma_laneq_f64(__s0_420, -__s1_420, __rev2_420, __p3_420); \ - __ret_420; \ +#define vfms_laneq_f64(__p0_512, __p1_512, __p2_512, __p3_512) __extension__ ({ \ + float64x1_t __s0_512 = __p0_512; \ + float64x1_t __s1_512 = __p1_512; \ + float64x2_t __s2_512 = __p2_512; \ + float64x2_t __rev2_512; __rev2_512 = __builtin_shufflevector(__s2_512, __s2_512, 1, 0); \ + float64x1_t __ret_512; \ + __ret_512 = __noswap_vfma_laneq_f64(__s0_512, -__s1_512, __rev2_512, __p3_512); \ + __ret_512; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfms_laneq_f32(__p0_421, __p1_421, __p2_421, __p3_421) __extension__ ({ \ - float32x2_t __s0_421 = __p0_421; \ - float32x2_t __s1_421 = __p1_421; \ - float32x4_t __s2_421 = __p2_421; \ - float32x2_t __ret_421; \ - __ret_421 = vfma_laneq_f32(__s0_421, -__s1_421, __s2_421, __p3_421); \ - __ret_421; \ +#define vfms_laneq_f32(__p0_513, __p1_513, __p2_513, __p3_513) __extension__ ({ \ + float32x2_t __s0_513 = __p0_513; \ + float32x2_t __s1_513 = __p1_513; \ + float32x4_t __s2_513 = __p2_513; \ + float32x2_t __ret_513; \ + __ret_513 = vfma_laneq_f32(__s0_513, -__s1_513, __s2_513, __p3_513); \ + __ret_513; \ }) #else -#define vfms_laneq_f32(__p0_422, __p1_422, __p2_422, __p3_422) __extension__ ({ \ - float32x2_t __s0_422 = __p0_422; \ - float32x2_t __s1_422 = __p1_422; \ - float32x4_t __s2_422 = __p2_422; \ - float32x2_t __rev0_422; __rev0_422 = __builtin_shufflevector(__s0_422, __s0_422, 1, 0); \ - float32x2_t __rev1_422; __rev1_422 = __builtin_shufflevector(__s1_422, __s1_422, 1, 0); \ - float32x4_t __rev2_422; __rev2_422 = __builtin_shufflevector(__s2_422, __s2_422, 3, 2, 1, 0); \ - float32x2_t __ret_422; \ - __ret_422 = __noswap_vfma_laneq_f32(__rev0_422, -__rev1_422, __rev2_422, __p3_422); \ - __ret_422 = __builtin_shufflevector(__ret_422, __ret_422, 1, 0); \ - __ret_422; \ +#define vfms_laneq_f32(__p0_514, __p1_514, __p2_514, __p3_514) __extension__ ({ \ + float32x2_t __s0_514 = __p0_514; \ + float32x2_t __s1_514 = __p1_514; \ + float32x4_t __s2_514 = __p2_514; \ + float32x2_t __rev0_514; __rev0_514 = __builtin_shufflevector(__s0_514, __s0_514, 1, 0); \ + float32x2_t __rev1_514; __rev1_514 = __builtin_shufflevector(__s1_514, __s1_514, 1, 0); \ + float32x4_t __rev2_514; __rev2_514 = __builtin_shufflevector(__s2_514, __s2_514, 3, 2, 1, 0); \ + float32x2_t __ret_514; \ + __ret_514 = __noswap_vfma_laneq_f32(__rev0_514, -__rev1_514, __rev2_514, __p3_514); \ + __ret_514 = __builtin_shufflevector(__ret_514, __ret_514, 1, 0); \ + __ret_514; \ }) #endif @@ -52548,547 +54252,530 @@ __ai float64x1_t vmla_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u32(__p0_423, __p1_423, __p2_423, __p3_423) __extension__ ({ \ - uint32x4_t __s0_423 = __p0_423; \ - uint32x4_t __s1_423 = __p1_423; \ - uint32x4_t __s2_423 = __p2_423; \ - uint32x4_t __ret_423; \ - __ret_423 = __s0_423 + __s1_423 * splatq_laneq_u32(__s2_423, __p3_423); \ - __ret_423; \ +#define vmlaq_laneq_u32(__p0_515, __p1_515, __p2_515, __p3_515) __extension__ ({ \ + uint32x4_t __s0_515 = __p0_515; \ + uint32x4_t __s1_515 = __p1_515; \ + uint32x4_t __s2_515 = __p2_515; \ + uint32x4_t __ret_515; \ + __ret_515 = __s0_515 + __s1_515 * splatq_laneq_u32(__s2_515, __p3_515); \ + __ret_515; \ }) #else -#define vmlaq_laneq_u32(__p0_424, __p1_424, __p2_424, __p3_424) __extension__ ({ \ - uint32x4_t __s0_424 = __p0_424; \ - uint32x4_t __s1_424 = __p1_424; \ - uint32x4_t __s2_424 = __p2_424; \ - uint32x4_t __rev0_424; __rev0_424 = __builtin_shufflevector(__s0_424, __s0_424, 3, 2, 1, 0); \ - uint32x4_t __rev1_424; __rev1_424 = __builtin_shufflevector(__s1_424, __s1_424, 3, 2, 1, 0); \ - uint32x4_t __rev2_424; __rev2_424 = __builtin_shufflevector(__s2_424, __s2_424, 3, 2, 1, 0); \ - uint32x4_t __ret_424; \ - __ret_424 = __rev0_424 + __rev1_424 * __noswap_splatq_laneq_u32(__rev2_424, __p3_424); \ - __ret_424 = __builtin_shufflevector(__ret_424, __ret_424, 3, 2, 1, 0); \ - __ret_424; \ +#define vmlaq_laneq_u32(__p0_516, __p1_516, __p2_516, __p3_516) __extension__ ({ \ + uint32x4_t __s0_516 = __p0_516; \ + uint32x4_t __s1_516 = __p1_516; \ + uint32x4_t __s2_516 = __p2_516; \ + uint32x4_t __rev0_516; __rev0_516 = __builtin_shufflevector(__s0_516, __s0_516, 3, 2, 1, 0); \ + uint32x4_t __rev1_516; __rev1_516 = __builtin_shufflevector(__s1_516, __s1_516, 3, 2, 1, 0); \ + uint32x4_t __rev2_516; __rev2_516 = __builtin_shufflevector(__s2_516, __s2_516, 3, 2, 1, 0); \ + uint32x4_t __ret_516; \ + __ret_516 = __rev0_516 + __rev1_516 * __noswap_splatq_laneq_u32(__rev2_516, __p3_516); \ + __ret_516 = __builtin_shufflevector(__ret_516, __ret_516, 3, 2, 1, 0); \ + __ret_516; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_u16(__p0_425, __p1_425, __p2_425, __p3_425) __extension__ ({ \ - uint16x8_t __s0_425 = __p0_425; \ - uint16x8_t __s1_425 = __p1_425; \ - uint16x8_t __s2_425 = __p2_425; \ - uint16x8_t __ret_425; \ - __ret_425 = __s0_425 + __s1_425 * splatq_laneq_u16(__s2_425, __p3_425); \ - __ret_425; \ +#define vmlaq_laneq_u16(__p0_517, __p1_517, __p2_517, __p3_517) __extension__ ({ \ + uint16x8_t __s0_517 = __p0_517; \ + uint16x8_t __s1_517 = __p1_517; \ + uint16x8_t __s2_517 = __p2_517; \ + uint16x8_t __ret_517; \ + __ret_517 = __s0_517 + __s1_517 * splatq_laneq_u16(__s2_517, __p3_517); \ + __ret_517; \ }) #else -#define vmlaq_laneq_u16(__p0_426, __p1_426, __p2_426, __p3_426) __extension__ ({ \ - uint16x8_t __s0_426 = __p0_426; \ - uint16x8_t __s1_426 = __p1_426; \ - uint16x8_t __s2_426 = __p2_426; \ - uint16x8_t __rev0_426; __rev0_426 = __builtin_shufflevector(__s0_426, __s0_426, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_426; __rev1_426 = __builtin_shufflevector(__s1_426, __s1_426, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_426; __rev2_426 = __builtin_shufflevector(__s2_426, __s2_426, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_426; \ - __ret_426 = __rev0_426 + __rev1_426 * __noswap_splatq_laneq_u16(__rev2_426, __p3_426); \ - __ret_426 = __builtin_shufflevector(__ret_426, __ret_426, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_426; \ +#define vmlaq_laneq_u16(__p0_518, __p1_518, __p2_518, __p3_518) __extension__ ({ \ + uint16x8_t __s0_518 = __p0_518; \ + uint16x8_t __s1_518 = __p1_518; \ + uint16x8_t __s2_518 = __p2_518; \ + uint16x8_t __rev0_518; __rev0_518 = __builtin_shufflevector(__s0_518, __s0_518, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_518; __rev1_518 = __builtin_shufflevector(__s1_518, __s1_518, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_518; __rev2_518 = __builtin_shufflevector(__s2_518, __s2_518, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_518; \ + __ret_518 = __rev0_518 + __rev1_518 * __noswap_splatq_laneq_u16(__rev2_518, __p3_518); \ + __ret_518 = __builtin_shufflevector(__ret_518, __ret_518, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_518; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_f32(__p0_427, __p1_427, __p2_427, __p3_427) __extension__ ({ \ - float32x4_t __s0_427 = __p0_427; \ - float32x4_t __s1_427 = __p1_427; \ - float32x4_t __s2_427 = __p2_427; \ - float32x4_t __ret_427; \ - __ret_427 = __s0_427 + __s1_427 * splatq_laneq_f32(__s2_427, __p3_427); \ - __ret_427; \ +#define vmlaq_laneq_f32(__p0_519, __p1_519, __p2_519, __p3_519) __extension__ ({ \ + float32x4_t __s0_519 = __p0_519; \ + float32x4_t __s1_519 = __p1_519; \ + float32x4_t __s2_519 = __p2_519; \ + float32x4_t __ret_519; \ + __ret_519 = __s0_519 + __s1_519 * splatq_laneq_f32(__s2_519, __p3_519); \ + __ret_519; \ }) #else -#define vmlaq_laneq_f32(__p0_428, __p1_428, __p2_428, __p3_428) __extension__ ({ \ - float32x4_t __s0_428 = __p0_428; \ - float32x4_t __s1_428 = __p1_428; \ - float32x4_t __s2_428 = __p2_428; \ - float32x4_t __rev0_428; __rev0_428 = __builtin_shufflevector(__s0_428, __s0_428, 3, 2, 1, 0); \ - float32x4_t __rev1_428; __rev1_428 = __builtin_shufflevector(__s1_428, __s1_428, 3, 2, 1, 0); \ - float32x4_t __rev2_428; __rev2_428 = __builtin_shufflevector(__s2_428, __s2_428, 3, 2, 1, 0); \ - float32x4_t __ret_428; \ - __ret_428 = __rev0_428 + __rev1_428 * __noswap_splatq_laneq_f32(__rev2_428, __p3_428); \ - __ret_428 = __builtin_shufflevector(__ret_428, __ret_428, 3, 2, 1, 0); \ - __ret_428; \ +#define vmlaq_laneq_f32(__p0_520, __p1_520, __p2_520, __p3_520) __extension__ ({ \ + float32x4_t __s0_520 = __p0_520; \ + float32x4_t __s1_520 = __p1_520; \ + float32x4_t __s2_520 = __p2_520; \ + float32x4_t __rev0_520; __rev0_520 = __builtin_shufflevector(__s0_520, __s0_520, 3, 2, 1, 0); \ + float32x4_t __rev1_520; __rev1_520 = __builtin_shufflevector(__s1_520, __s1_520, 3, 2, 1, 0); \ + float32x4_t __rev2_520; __rev2_520 = __builtin_shufflevector(__s2_520, __s2_520, 3, 2, 1, 0); \ + float32x4_t __ret_520; \ + __ret_520 = __rev0_520 + __rev1_520 * __noswap_splatq_laneq_f32(__rev2_520, __p3_520); \ + __ret_520 = __builtin_shufflevector(__ret_520, __ret_520, 3, 2, 1, 0); \ + __ret_520; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s32(__p0_429, __p1_429, __p2_429, __p3_429) __extension__ ({ \ - int32x4_t __s0_429 = __p0_429; \ - int32x4_t __s1_429 = __p1_429; \ - int32x4_t __s2_429 = __p2_429; \ - int32x4_t __ret_429; \ - __ret_429 = __s0_429 + __s1_429 * splatq_laneq_s32(__s2_429, __p3_429); \ - __ret_429; \ +#define vmlaq_laneq_s32(__p0_521, __p1_521, __p2_521, __p3_521) __extension__ ({ \ + int32x4_t __s0_521 = __p0_521; \ + int32x4_t __s1_521 = __p1_521; \ + int32x4_t __s2_521 = __p2_521; \ + int32x4_t __ret_521; \ + __ret_521 = __s0_521 + __s1_521 * splatq_laneq_s32(__s2_521, __p3_521); \ + __ret_521; \ }) #else -#define vmlaq_laneq_s32(__p0_430, __p1_430, __p2_430, __p3_430) __extension__ ({ \ - int32x4_t __s0_430 = __p0_430; \ - int32x4_t __s1_430 = __p1_430; \ - int32x4_t __s2_430 = __p2_430; \ - int32x4_t __rev0_430; __rev0_430 = __builtin_shufflevector(__s0_430, __s0_430, 3, 2, 1, 0); \ - int32x4_t __rev1_430; __rev1_430 = __builtin_shufflevector(__s1_430, __s1_430, 3, 2, 1, 0); \ - int32x4_t __rev2_430; __rev2_430 = __builtin_shufflevector(__s2_430, __s2_430, 3, 2, 1, 0); \ - int32x4_t __ret_430; \ - __ret_430 = __rev0_430 + __rev1_430 * __noswap_splatq_laneq_s32(__rev2_430, __p3_430); \ - __ret_430 = __builtin_shufflevector(__ret_430, __ret_430, 3, 2, 1, 0); \ - __ret_430; \ +#define vmlaq_laneq_s32(__p0_522, __p1_522, __p2_522, __p3_522) __extension__ ({ \ + int32x4_t __s0_522 = __p0_522; \ + int32x4_t __s1_522 = __p1_522; \ + int32x4_t __s2_522 = __p2_522; \ + int32x4_t __rev0_522; __rev0_522 = __builtin_shufflevector(__s0_522, __s0_522, 3, 2, 1, 0); \ + int32x4_t __rev1_522; __rev1_522 = __builtin_shufflevector(__s1_522, __s1_522, 3, 2, 1, 0); \ + int32x4_t __rev2_522; __rev2_522 = __builtin_shufflevector(__s2_522, __s2_522, 3, 2, 1, 0); \ + int32x4_t __ret_522; \ + __ret_522 = __rev0_522 + __rev1_522 * __noswap_splatq_laneq_s32(__rev2_522, __p3_522); \ + __ret_522 = __builtin_shufflevector(__ret_522, __ret_522, 3, 2, 1, 0); \ + __ret_522; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlaq_laneq_s16(__p0_431, __p1_431, __p2_431, __p3_431) __extension__ ({ \ - int16x8_t __s0_431 = __p0_431; \ - int16x8_t __s1_431 = __p1_431; \ - int16x8_t __s2_431 = __p2_431; \ - int16x8_t __ret_431; \ - __ret_431 = __s0_431 + __s1_431 * splatq_laneq_s16(__s2_431, __p3_431); \ - __ret_431; \ +#define vmlaq_laneq_s16(__p0_523, __p1_523, __p2_523, __p3_523) __extension__ ({ \ + int16x8_t __s0_523 = __p0_523; \ + int16x8_t __s1_523 = __p1_523; \ + int16x8_t __s2_523 = __p2_523; \ + int16x8_t __ret_523; \ + __ret_523 = __s0_523 + __s1_523 * splatq_laneq_s16(__s2_523, __p3_523); \ + __ret_523; \ }) #else -#define vmlaq_laneq_s16(__p0_432, __p1_432, __p2_432, __p3_432) __extension__ ({ \ - int16x8_t __s0_432 = __p0_432; \ - int16x8_t __s1_432 = __p1_432; \ - int16x8_t __s2_432 = __p2_432; \ - int16x8_t __rev0_432; __rev0_432 = __builtin_shufflevector(__s0_432, __s0_432, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_432; __rev1_432 = __builtin_shufflevector(__s1_432, __s1_432, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_432; __rev2_432 = __builtin_shufflevector(__s2_432, __s2_432, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_432; \ - __ret_432 = __rev0_432 + __rev1_432 * __noswap_splatq_laneq_s16(__rev2_432, __p3_432); \ - __ret_432 = __builtin_shufflevector(__ret_432, __ret_432, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_432; \ +#define vmlaq_laneq_s16(__p0_524, __p1_524, __p2_524, __p3_524) __extension__ ({ \ + int16x8_t __s0_524 = __p0_524; \ + int16x8_t __s1_524 = __p1_524; \ + int16x8_t __s2_524 = __p2_524; \ + int16x8_t __rev0_524; __rev0_524 = __builtin_shufflevector(__s0_524, __s0_524, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_524; __rev1_524 = __builtin_shufflevector(__s1_524, __s1_524, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_524; __rev2_524 = __builtin_shufflevector(__s2_524, __s2_524, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_524; \ + __ret_524 = __rev0_524 + __rev1_524 * __noswap_splatq_laneq_s16(__rev2_524, __p3_524); \ + __ret_524 = __builtin_shufflevector(__ret_524, __ret_524, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_524; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u32(__p0_433, __p1_433, __p2_433, __p3_433) __extension__ ({ \ - uint32x2_t __s0_433 = __p0_433; \ - uint32x2_t __s1_433 = __p1_433; \ - uint32x4_t __s2_433 = __p2_433; \ - uint32x2_t __ret_433; \ - __ret_433 = __s0_433 + __s1_433 * splat_laneq_u32(__s2_433, __p3_433); \ - __ret_433; \ +#define vmla_laneq_u32(__p0_525, __p1_525, __p2_525, __p3_525) __extension__ ({ \ + uint32x2_t __s0_525 = __p0_525; \ + uint32x2_t __s1_525 = __p1_525; \ + uint32x4_t __s2_525 = __p2_525; \ + uint32x2_t __ret_525; \ + __ret_525 = __s0_525 + __s1_525 * splat_laneq_u32(__s2_525, __p3_525); \ + __ret_525; \ }) #else -#define vmla_laneq_u32(__p0_434, __p1_434, __p2_434, __p3_434) __extension__ ({ \ - uint32x2_t __s0_434 = __p0_434; \ - uint32x2_t __s1_434 = __p1_434; \ - uint32x4_t __s2_434 = __p2_434; \ - uint32x2_t __rev0_434; __rev0_434 = __builtin_shufflevector(__s0_434, __s0_434, 1, 0); \ - uint32x2_t __rev1_434; __rev1_434 = __builtin_shufflevector(__s1_434, __s1_434, 1, 0); \ - uint32x4_t __rev2_434; __rev2_434 = __builtin_shufflevector(__s2_434, __s2_434, 3, 2, 1, 0); \ - uint32x2_t __ret_434; \ - __ret_434 = __rev0_434 + __rev1_434 * __noswap_splat_laneq_u32(__rev2_434, __p3_434); \ - __ret_434 = __builtin_shufflevector(__ret_434, __ret_434, 1, 0); \ - __ret_434; \ +#define vmla_laneq_u32(__p0_526, __p1_526, __p2_526, __p3_526) __extension__ ({ \ + uint32x2_t __s0_526 = __p0_526; \ + uint32x2_t __s1_526 = __p1_526; \ + uint32x4_t __s2_526 = __p2_526; \ + uint32x2_t __rev0_526; __rev0_526 = __builtin_shufflevector(__s0_526, __s0_526, 1, 0); \ + uint32x2_t __rev1_526; __rev1_526 = __builtin_shufflevector(__s1_526, __s1_526, 1, 0); \ + uint32x4_t __rev2_526; __rev2_526 = __builtin_shufflevector(__s2_526, __s2_526, 3, 2, 1, 0); \ + uint32x2_t __ret_526; \ + __ret_526 = __rev0_526 + __rev1_526 * __noswap_splat_laneq_u32(__rev2_526, __p3_526); \ + __ret_526 = __builtin_shufflevector(__ret_526, __ret_526, 1, 0); \ + __ret_526; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_u16(__p0_435, __p1_435, __p2_435, __p3_435) __extension__ ({ \ - uint16x4_t __s0_435 = __p0_435; \ - uint16x4_t __s1_435 = __p1_435; \ - uint16x8_t __s2_435 = __p2_435; \ - uint16x4_t __ret_435; \ - __ret_435 = __s0_435 + __s1_435 * splat_laneq_u16(__s2_435, __p3_435); \ - __ret_435; \ +#define vmla_laneq_u16(__p0_527, __p1_527, __p2_527, __p3_527) __extension__ ({ \ + uint16x4_t __s0_527 = __p0_527; \ + uint16x4_t __s1_527 = __p1_527; \ + uint16x8_t __s2_527 = __p2_527; \ + uint16x4_t __ret_527; \ + __ret_527 = __s0_527 + __s1_527 * splat_laneq_u16(__s2_527, __p3_527); \ + __ret_527; \ }) #else -#define vmla_laneq_u16(__p0_436, __p1_436, __p2_436, __p3_436) __extension__ ({ \ - uint16x4_t __s0_436 = __p0_436; \ - uint16x4_t __s1_436 = __p1_436; \ - uint16x8_t __s2_436 = __p2_436; \ - uint16x4_t __rev0_436; __rev0_436 = __builtin_shufflevector(__s0_436, __s0_436, 3, 2, 1, 0); \ - uint16x4_t __rev1_436; __rev1_436 = __builtin_shufflevector(__s1_436, __s1_436, 3, 2, 1, 0); \ - uint16x8_t __rev2_436; __rev2_436 = __builtin_shufflevector(__s2_436, __s2_436, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_436; \ - __ret_436 = __rev0_436 + __rev1_436 * __noswap_splat_laneq_u16(__rev2_436, __p3_436); \ - __ret_436 = __builtin_shufflevector(__ret_436, __ret_436, 3, 2, 1, 0); \ - __ret_436; \ +#define vmla_laneq_u16(__p0_528, __p1_528, __p2_528, __p3_528) __extension__ ({ \ + uint16x4_t __s0_528 = __p0_528; \ + uint16x4_t __s1_528 = __p1_528; \ + uint16x8_t __s2_528 = __p2_528; \ + uint16x4_t __rev0_528; __rev0_528 = __builtin_shufflevector(__s0_528, __s0_528, 3, 2, 1, 0); \ + uint16x4_t __rev1_528; __rev1_528 = __builtin_shufflevector(__s1_528, __s1_528, 3, 2, 1, 0); \ + uint16x8_t __rev2_528; __rev2_528 = __builtin_shufflevector(__s2_528, __s2_528, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_528; \ + __ret_528 = __rev0_528 + __rev1_528 * __noswap_splat_laneq_u16(__rev2_528, __p3_528); \ + __ret_528 = __builtin_shufflevector(__ret_528, __ret_528, 3, 2, 1, 0); \ + __ret_528; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_f32(__p0_437, __p1_437, __p2_437, __p3_437) __extension__ ({ \ - float32x2_t __s0_437 = __p0_437; \ - float32x2_t __s1_437 = __p1_437; \ - float32x4_t __s2_437 = __p2_437; \ - float32x2_t __ret_437; \ - __ret_437 = __s0_437 + __s1_437 * splat_laneq_f32(__s2_437, __p3_437); \ - __ret_437; \ +#define vmla_laneq_f32(__p0_529, __p1_529, __p2_529, __p3_529) __extension__ ({ \ + float32x2_t __s0_529 = __p0_529; \ + float32x2_t __s1_529 = __p1_529; \ + float32x4_t __s2_529 = __p2_529; \ + float32x2_t __ret_529; \ + __ret_529 = __s0_529 + __s1_529 * splat_laneq_f32(__s2_529, __p3_529); \ + __ret_529; \ }) #else -#define vmla_laneq_f32(__p0_438, __p1_438, __p2_438, __p3_438) __extension__ ({ \ - float32x2_t __s0_438 = __p0_438; \ - float32x2_t __s1_438 = __p1_438; \ - float32x4_t __s2_438 = __p2_438; \ - float32x2_t __rev0_438; __rev0_438 = __builtin_shufflevector(__s0_438, __s0_438, 1, 0); \ - float32x2_t __rev1_438; __rev1_438 = __builtin_shufflevector(__s1_438, __s1_438, 1, 0); \ - float32x4_t __rev2_438; __rev2_438 = __builtin_shufflevector(__s2_438, __s2_438, 3, 2, 1, 0); \ - float32x2_t __ret_438; \ - __ret_438 = __rev0_438 + __rev1_438 * __noswap_splat_laneq_f32(__rev2_438, __p3_438); \ - __ret_438 = __builtin_shufflevector(__ret_438, __ret_438, 1, 0); \ - __ret_438; \ +#define vmla_laneq_f32(__p0_530, __p1_530, __p2_530, __p3_530) __extension__ ({ \ + float32x2_t __s0_530 = __p0_530; \ + float32x2_t __s1_530 = __p1_530; \ + float32x4_t __s2_530 = __p2_530; \ + float32x2_t __rev0_530; __rev0_530 = __builtin_shufflevector(__s0_530, __s0_530, 1, 0); \ + float32x2_t __rev1_530; __rev1_530 = __builtin_shufflevector(__s1_530, __s1_530, 1, 0); \ + float32x4_t __rev2_530; __rev2_530 = __builtin_shufflevector(__s2_530, __s2_530, 3, 2, 1, 0); \ + float32x2_t __ret_530; \ + __ret_530 = __rev0_530 + __rev1_530 * __noswap_splat_laneq_f32(__rev2_530, __p3_530); \ + __ret_530 = __builtin_shufflevector(__ret_530, __ret_530, 1, 0); \ + __ret_530; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s32(__p0_439, __p1_439, __p2_439, __p3_439) __extension__ ({ \ - int32x2_t __s0_439 = __p0_439; \ - int32x2_t __s1_439 = __p1_439; \ - int32x4_t __s2_439 = __p2_439; \ - int32x2_t __ret_439; \ - __ret_439 = __s0_439 + __s1_439 * splat_laneq_s32(__s2_439, __p3_439); \ - __ret_439; \ +#define vmla_laneq_s32(__p0_531, __p1_531, __p2_531, __p3_531) __extension__ ({ \ + int32x2_t __s0_531 = __p0_531; \ + int32x2_t __s1_531 = __p1_531; \ + int32x4_t __s2_531 = __p2_531; \ + int32x2_t __ret_531; \ + __ret_531 = __s0_531 + __s1_531 * splat_laneq_s32(__s2_531, __p3_531); \ + __ret_531; \ }) #else -#define vmla_laneq_s32(__p0_440, __p1_440, __p2_440, __p3_440) __extension__ ({ \ - int32x2_t __s0_440 = __p0_440; \ - int32x2_t __s1_440 = __p1_440; \ - int32x4_t __s2_440 = __p2_440; \ - int32x2_t __rev0_440; __rev0_440 = __builtin_shufflevector(__s0_440, __s0_440, 1, 0); \ - int32x2_t __rev1_440; __rev1_440 = __builtin_shufflevector(__s1_440, __s1_440, 1, 0); \ - int32x4_t __rev2_440; __rev2_440 = __builtin_shufflevector(__s2_440, __s2_440, 3, 2, 1, 0); \ - int32x2_t __ret_440; \ - __ret_440 = __rev0_440 + __rev1_440 * __noswap_splat_laneq_s32(__rev2_440, __p3_440); \ - __ret_440 = __builtin_shufflevector(__ret_440, __ret_440, 1, 0); \ - __ret_440; \ +#define vmla_laneq_s32(__p0_532, __p1_532, __p2_532, __p3_532) __extension__ ({ \ + int32x2_t __s0_532 = __p0_532; \ + int32x2_t __s1_532 = __p1_532; \ + int32x4_t __s2_532 = __p2_532; \ + int32x2_t __rev0_532; __rev0_532 = __builtin_shufflevector(__s0_532, __s0_532, 1, 0); \ + int32x2_t __rev1_532; __rev1_532 = __builtin_shufflevector(__s1_532, __s1_532, 1, 0); \ + int32x4_t __rev2_532; __rev2_532 = __builtin_shufflevector(__s2_532, __s2_532, 3, 2, 1, 0); \ + int32x2_t __ret_532; \ + __ret_532 = __rev0_532 + __rev1_532 * __noswap_splat_laneq_s32(__rev2_532, __p3_532); \ + __ret_532 = __builtin_shufflevector(__ret_532, __ret_532, 1, 0); \ + __ret_532; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmla_laneq_s16(__p0_441, __p1_441, __p2_441, __p3_441) __extension__ ({ \ - int16x4_t __s0_441 = __p0_441; \ - int16x4_t __s1_441 = __p1_441; \ - int16x8_t __s2_441 = __p2_441; \ - int16x4_t __ret_441; \ - __ret_441 = __s0_441 + __s1_441 * splat_laneq_s16(__s2_441, __p3_441); \ - __ret_441; \ +#define vmla_laneq_s16(__p0_533, __p1_533, __p2_533, __p3_533) __extension__ ({ \ + int16x4_t __s0_533 = __p0_533; \ + int16x4_t __s1_533 = __p1_533; \ + int16x8_t __s2_533 = __p2_533; \ + int16x4_t __ret_533; \ + __ret_533 = __s0_533 + __s1_533 * splat_laneq_s16(__s2_533, __p3_533); \ + __ret_533; \ }) #else -#define vmla_laneq_s16(__p0_442, __p1_442, __p2_442, __p3_442) __extension__ ({ \ - int16x4_t __s0_442 = __p0_442; \ - int16x4_t __s1_442 = __p1_442; \ - int16x8_t __s2_442 = __p2_442; \ - int16x4_t __rev0_442; __rev0_442 = __builtin_shufflevector(__s0_442, __s0_442, 3, 2, 1, 0); \ - int16x4_t __rev1_442; __rev1_442 = __builtin_shufflevector(__s1_442, __s1_442, 3, 2, 1, 0); \ - int16x8_t __rev2_442; __rev2_442 = __builtin_shufflevector(__s2_442, __s2_442, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_442; \ - __ret_442 = __rev0_442 + __rev1_442 * __noswap_splat_laneq_s16(__rev2_442, __p3_442); \ - __ret_442 = __builtin_shufflevector(__ret_442, __ret_442, 3, 2, 1, 0); \ - __ret_442; \ +#define vmla_laneq_s16(__p0_534, __p1_534, __p2_534, __p3_534) __extension__ ({ \ + int16x4_t __s0_534 = __p0_534; \ + int16x4_t __s1_534 = __p1_534; \ + int16x8_t __s2_534 = __p2_534; \ + int16x4_t __rev0_534; __rev0_534 = __builtin_shufflevector(__s0_534, __s0_534, 3, 2, 1, 0); \ + int16x4_t __rev1_534; __rev1_534 = __builtin_shufflevector(__s1_534, __s1_534, 3, 2, 1, 0); \ + int16x8_t __rev2_534; __rev2_534 = __builtin_shufflevector(__s2_534, __s2_534, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_534; \ + __ret_534 = __rev0_534 + __rev1_534 * __noswap_splat_laneq_s16(__rev2_534, __p3_534); \ + __ret_534 = __builtin_shufflevector(__ret_534, __ret_534, 3, 2, 1, 0); \ + __ret_534; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = __p0 + __p1 * (float64x2_t) {__p2, __p2}; - return __ret; -} -#else -__ai float64x2_t vmlaq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 + __rev1 * (float64x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u32(__p0_443, __p1_443, __p2_443, __p3_443) __extension__ ({ \ - uint64x2_t __s0_443 = __p0_443; \ - uint32x4_t __s1_443 = __p1_443; \ - uint32x2_t __s2_443 = __p2_443; \ - uint64x2_t __ret_443; \ - __ret_443 = __s0_443 + vmull_u32(vget_high_u32(__s1_443), splat_lane_u32(__s2_443, __p3_443)); \ - __ret_443; \ +#define vmlal_high_lane_u32(__p0_535, __p1_535, __p2_535, __p3_535) __extension__ ({ \ + uint64x2_t __s0_535 = __p0_535; \ + uint32x4_t __s1_535 = __p1_535; \ + uint32x2_t __s2_535 = __p2_535; \ + uint64x2_t __ret_535; \ + __ret_535 = __s0_535 + vmull_u32(vget_high_u32(__s1_535), splat_lane_u32(__s2_535, __p3_535)); \ + __ret_535; \ }) #else -#define vmlal_high_lane_u32(__p0_444, __p1_444, __p2_444, __p3_444) __extension__ ({ \ - uint64x2_t __s0_444 = __p0_444; \ - uint32x4_t __s1_444 = __p1_444; \ - uint32x2_t __s2_444 = __p2_444; \ - uint64x2_t __rev0_444; __rev0_444 = __builtin_shufflevector(__s0_444, __s0_444, 1, 0); \ - uint32x4_t __rev1_444; __rev1_444 = __builtin_shufflevector(__s1_444, __s1_444, 3, 2, 1, 0); \ - uint32x2_t __rev2_444; __rev2_444 = __builtin_shufflevector(__s2_444, __s2_444, 1, 0); \ - uint64x2_t __ret_444; \ - __ret_444 = __rev0_444 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_444), __noswap_splat_lane_u32(__rev2_444, __p3_444)); \ - __ret_444 = __builtin_shufflevector(__ret_444, __ret_444, 1, 0); \ - __ret_444; \ +#define vmlal_high_lane_u32(__p0_536, __p1_536, __p2_536, __p3_536) __extension__ ({ \ + uint64x2_t __s0_536 = __p0_536; \ + uint32x4_t __s1_536 = __p1_536; \ + uint32x2_t __s2_536 = __p2_536; \ + uint64x2_t __rev0_536; __rev0_536 = __builtin_shufflevector(__s0_536, __s0_536, 1, 0); \ + uint32x4_t __rev1_536; __rev1_536 = __builtin_shufflevector(__s1_536, __s1_536, 3, 2, 1, 0); \ + uint32x2_t __rev2_536; __rev2_536 = __builtin_shufflevector(__s2_536, __s2_536, 1, 0); \ + uint64x2_t __ret_536; \ + __ret_536 = __rev0_536 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_536), __noswap_splat_lane_u32(__rev2_536, __p3_536)); \ + __ret_536 = __builtin_shufflevector(__ret_536, __ret_536, 1, 0); \ + __ret_536; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_u16(__p0_445, __p1_445, __p2_445, __p3_445) __extension__ ({ \ - uint32x4_t __s0_445 = __p0_445; \ - uint16x8_t __s1_445 = __p1_445; \ - uint16x4_t __s2_445 = __p2_445; \ - uint32x4_t __ret_445; \ - __ret_445 = __s0_445 + vmull_u16(vget_high_u16(__s1_445), splat_lane_u16(__s2_445, __p3_445)); \ - __ret_445; \ +#define vmlal_high_lane_u16(__p0_537, __p1_537, __p2_537, __p3_537) __extension__ ({ \ + uint32x4_t __s0_537 = __p0_537; \ + uint16x8_t __s1_537 = __p1_537; \ + uint16x4_t __s2_537 = __p2_537; \ + uint32x4_t __ret_537; \ + __ret_537 = __s0_537 + vmull_u16(vget_high_u16(__s1_537), splat_lane_u16(__s2_537, __p3_537)); \ + __ret_537; \ }) #else -#define vmlal_high_lane_u16(__p0_446, __p1_446, __p2_446, __p3_446) __extension__ ({ \ - uint32x4_t __s0_446 = __p0_446; \ - uint16x8_t __s1_446 = __p1_446; \ - uint16x4_t __s2_446 = __p2_446; \ - uint32x4_t __rev0_446; __rev0_446 = __builtin_shufflevector(__s0_446, __s0_446, 3, 2, 1, 0); \ - uint16x8_t __rev1_446; __rev1_446 = __builtin_shufflevector(__s1_446, __s1_446, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_446; __rev2_446 = __builtin_shufflevector(__s2_446, __s2_446, 3, 2, 1, 0); \ - uint32x4_t __ret_446; \ - __ret_446 = __rev0_446 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_446), __noswap_splat_lane_u16(__rev2_446, __p3_446)); \ - __ret_446 = __builtin_shufflevector(__ret_446, __ret_446, 3, 2, 1, 0); \ - __ret_446; \ +#define vmlal_high_lane_u16(__p0_538, __p1_538, __p2_538, __p3_538) __extension__ ({ \ + uint32x4_t __s0_538 = __p0_538; \ + uint16x8_t __s1_538 = __p1_538; \ + uint16x4_t __s2_538 = __p2_538; \ + uint32x4_t __rev0_538; __rev0_538 = __builtin_shufflevector(__s0_538, __s0_538, 3, 2, 1, 0); \ + uint16x8_t __rev1_538; __rev1_538 = __builtin_shufflevector(__s1_538, __s1_538, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2_538; __rev2_538 = __builtin_shufflevector(__s2_538, __s2_538, 3, 2, 1, 0); \ + uint32x4_t __ret_538; \ + __ret_538 = __rev0_538 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_538), __noswap_splat_lane_u16(__rev2_538, __p3_538)); \ + __ret_538 = __builtin_shufflevector(__ret_538, __ret_538, 3, 2, 1, 0); \ + __ret_538; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s32(__p0_447, __p1_447, __p2_447, __p3_447) __extension__ ({ \ - int64x2_t __s0_447 = __p0_447; \ - int32x4_t __s1_447 = __p1_447; \ - int32x2_t __s2_447 = __p2_447; \ - int64x2_t __ret_447; \ - __ret_447 = __s0_447 + vmull_s32(vget_high_s32(__s1_447), splat_lane_s32(__s2_447, __p3_447)); \ - __ret_447; \ +#define vmlal_high_lane_s32(__p0_539, __p1_539, __p2_539, __p3_539) __extension__ ({ \ + int64x2_t __s0_539 = __p0_539; \ + int32x4_t __s1_539 = __p1_539; \ + int32x2_t __s2_539 = __p2_539; \ + int64x2_t __ret_539; \ + __ret_539 = __s0_539 + vmull_s32(vget_high_s32(__s1_539), splat_lane_s32(__s2_539, __p3_539)); \ + __ret_539; \ }) #else -#define vmlal_high_lane_s32(__p0_448, __p1_448, __p2_448, __p3_448) __extension__ ({ \ - int64x2_t __s0_448 = __p0_448; \ - int32x4_t __s1_448 = __p1_448; \ - int32x2_t __s2_448 = __p2_448; \ - int64x2_t __rev0_448; __rev0_448 = __builtin_shufflevector(__s0_448, __s0_448, 1, 0); \ - int32x4_t __rev1_448; __rev1_448 = __builtin_shufflevector(__s1_448, __s1_448, 3, 2, 1, 0); \ - int32x2_t __rev2_448; __rev2_448 = __builtin_shufflevector(__s2_448, __s2_448, 1, 0); \ - int64x2_t __ret_448; \ - __ret_448 = __rev0_448 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_448), __noswap_splat_lane_s32(__rev2_448, __p3_448)); \ - __ret_448 = __builtin_shufflevector(__ret_448, __ret_448, 1, 0); \ - __ret_448; \ +#define vmlal_high_lane_s32(__p0_540, __p1_540, __p2_540, __p3_540) __extension__ ({ \ + int64x2_t __s0_540 = __p0_540; \ + int32x4_t __s1_540 = __p1_540; \ + int32x2_t __s2_540 = __p2_540; \ + int64x2_t __rev0_540; __rev0_540 = __builtin_shufflevector(__s0_540, __s0_540, 1, 0); \ + int32x4_t __rev1_540; __rev1_540 = __builtin_shufflevector(__s1_540, __s1_540, 3, 2, 1, 0); \ + int32x2_t __rev2_540; __rev2_540 = __builtin_shufflevector(__s2_540, __s2_540, 1, 0); \ + int64x2_t __ret_540; \ + __ret_540 = __rev0_540 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_540), __noswap_splat_lane_s32(__rev2_540, __p3_540)); \ + __ret_540 = __builtin_shufflevector(__ret_540, __ret_540, 1, 0); \ + __ret_540; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_lane_s16(__p0_449, __p1_449, __p2_449, __p3_449) __extension__ ({ \ - int32x4_t __s0_449 = __p0_449; \ - int16x8_t __s1_449 = __p1_449; \ - int16x4_t __s2_449 = __p2_449; \ - int32x4_t __ret_449; \ - __ret_449 = __s0_449 + vmull_s16(vget_high_s16(__s1_449), splat_lane_s16(__s2_449, __p3_449)); \ - __ret_449; \ +#define vmlal_high_lane_s16(__p0_541, __p1_541, __p2_541, __p3_541) __extension__ ({ \ + int32x4_t __s0_541 = __p0_541; \ + int16x8_t __s1_541 = __p1_541; \ + int16x4_t __s2_541 = __p2_541; \ + int32x4_t __ret_541; \ + __ret_541 = __s0_541 + vmull_s16(vget_high_s16(__s1_541), splat_lane_s16(__s2_541, __p3_541)); \ + __ret_541; \ }) #else -#define vmlal_high_lane_s16(__p0_450, __p1_450, __p2_450, __p3_450) __extension__ ({ \ - int32x4_t __s0_450 = __p0_450; \ - int16x8_t __s1_450 = __p1_450; \ - int16x4_t __s2_450 = __p2_450; \ - int32x4_t __rev0_450; __rev0_450 = __builtin_shufflevector(__s0_450, __s0_450, 3, 2, 1, 0); \ - int16x8_t __rev1_450; __rev1_450 = __builtin_shufflevector(__s1_450, __s1_450, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_450; __rev2_450 = __builtin_shufflevector(__s2_450, __s2_450, 3, 2, 1, 0); \ - int32x4_t __ret_450; \ - __ret_450 = __rev0_450 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_450), __noswap_splat_lane_s16(__rev2_450, __p3_450)); \ - __ret_450 = __builtin_shufflevector(__ret_450, __ret_450, 3, 2, 1, 0); \ - __ret_450; \ +#define vmlal_high_lane_s16(__p0_542, __p1_542, __p2_542, __p3_542) __extension__ ({ \ + int32x4_t __s0_542 = __p0_542; \ + int16x8_t __s1_542 = __p1_542; \ + int16x4_t __s2_542 = __p2_542; \ + int32x4_t __rev0_542; __rev0_542 = __builtin_shufflevector(__s0_542, __s0_542, 3, 2, 1, 0); \ + int16x8_t __rev1_542; __rev1_542 = __builtin_shufflevector(__s1_542, __s1_542, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_542; __rev2_542 = __builtin_shufflevector(__s2_542, __s2_542, 3, 2, 1, 0); \ + int32x4_t __ret_542; \ + __ret_542 = __rev0_542 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_542), __noswap_splat_lane_s16(__rev2_542, __p3_542)); \ + __ret_542 = __builtin_shufflevector(__ret_542, __ret_542, 3, 2, 1, 0); \ + __ret_542; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u32(__p0_451, __p1_451, __p2_451, __p3_451) __extension__ ({ \ - uint64x2_t __s0_451 = __p0_451; \ - uint32x4_t __s1_451 = __p1_451; \ - uint32x4_t __s2_451 = __p2_451; \ - uint64x2_t __ret_451; \ - __ret_451 = __s0_451 + vmull_u32(vget_high_u32(__s1_451), splat_laneq_u32(__s2_451, __p3_451)); \ - __ret_451; \ +#define vmlal_high_laneq_u32(__p0_543, __p1_543, __p2_543, __p3_543) __extension__ ({ \ + uint64x2_t __s0_543 = __p0_543; \ + uint32x4_t __s1_543 = __p1_543; \ + uint32x4_t __s2_543 = __p2_543; \ + uint64x2_t __ret_543; \ + __ret_543 = __s0_543 + vmull_u32(vget_high_u32(__s1_543), splat_laneq_u32(__s2_543, __p3_543)); \ + __ret_543; \ }) #else -#define vmlal_high_laneq_u32(__p0_452, __p1_452, __p2_452, __p3_452) __extension__ ({ \ - uint64x2_t __s0_452 = __p0_452; \ - uint32x4_t __s1_452 = __p1_452; \ - uint32x4_t __s2_452 = __p2_452; \ - uint64x2_t __rev0_452; __rev0_452 = __builtin_shufflevector(__s0_452, __s0_452, 1, 0); \ - uint32x4_t __rev1_452; __rev1_452 = __builtin_shufflevector(__s1_452, __s1_452, 3, 2, 1, 0); \ - uint32x4_t __rev2_452; __rev2_452 = __builtin_shufflevector(__s2_452, __s2_452, 3, 2, 1, 0); \ - uint64x2_t __ret_452; \ - __ret_452 = __rev0_452 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_452), __noswap_splat_laneq_u32(__rev2_452, __p3_452)); \ - __ret_452 = __builtin_shufflevector(__ret_452, __ret_452, 1, 0); \ - __ret_452; \ +#define vmlal_high_laneq_u32(__p0_544, __p1_544, __p2_544, __p3_544) __extension__ ({ \ + uint64x2_t __s0_544 = __p0_544; \ + uint32x4_t __s1_544 = __p1_544; \ + uint32x4_t __s2_544 = __p2_544; \ + uint64x2_t __rev0_544; __rev0_544 = __builtin_shufflevector(__s0_544, __s0_544, 1, 0); \ + uint32x4_t __rev1_544; __rev1_544 = __builtin_shufflevector(__s1_544, __s1_544, 3, 2, 1, 0); \ + uint32x4_t __rev2_544; __rev2_544 = __builtin_shufflevector(__s2_544, __s2_544, 3, 2, 1, 0); \ + uint64x2_t __ret_544; \ + __ret_544 = __rev0_544 + __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_544), __noswap_splat_laneq_u32(__rev2_544, __p3_544)); \ + __ret_544 = __builtin_shufflevector(__ret_544, __ret_544, 1, 0); \ + __ret_544; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_u16(__p0_453, __p1_453, __p2_453, __p3_453) __extension__ ({ \ - uint32x4_t __s0_453 = __p0_453; \ - uint16x8_t __s1_453 = __p1_453; \ - uint16x8_t __s2_453 = __p2_453; \ - uint32x4_t __ret_453; \ - __ret_453 = __s0_453 + vmull_u16(vget_high_u16(__s1_453), splat_laneq_u16(__s2_453, __p3_453)); \ - __ret_453; \ +#define vmlal_high_laneq_u16(__p0_545, __p1_545, __p2_545, __p3_545) __extension__ ({ \ + uint32x4_t __s0_545 = __p0_545; \ + uint16x8_t __s1_545 = __p1_545; \ + uint16x8_t __s2_545 = __p2_545; \ + uint32x4_t __ret_545; \ + __ret_545 = __s0_545 + vmull_u16(vget_high_u16(__s1_545), splat_laneq_u16(__s2_545, __p3_545)); \ + __ret_545; \ }) #else -#define vmlal_high_laneq_u16(__p0_454, __p1_454, __p2_454, __p3_454) __extension__ ({ \ - uint32x4_t __s0_454 = __p0_454; \ - uint16x8_t __s1_454 = __p1_454; \ - uint16x8_t __s2_454 = __p2_454; \ - uint32x4_t __rev0_454; __rev0_454 = __builtin_shufflevector(__s0_454, __s0_454, 3, 2, 1, 0); \ - uint16x8_t __rev1_454; __rev1_454 = __builtin_shufflevector(__s1_454, __s1_454, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_454; __rev2_454 = __builtin_shufflevector(__s2_454, __s2_454, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_454; \ - __ret_454 = __rev0_454 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_454), __noswap_splat_laneq_u16(__rev2_454, __p3_454)); \ - __ret_454 = __builtin_shufflevector(__ret_454, __ret_454, 3, 2, 1, 0); \ - __ret_454; \ +#define vmlal_high_laneq_u16(__p0_546, __p1_546, __p2_546, __p3_546) __extension__ ({ \ + uint32x4_t __s0_546 = __p0_546; \ + uint16x8_t __s1_546 = __p1_546; \ + uint16x8_t __s2_546 = __p2_546; \ + uint32x4_t __rev0_546; __rev0_546 = __builtin_shufflevector(__s0_546, __s0_546, 3, 2, 1, 0); \ + uint16x8_t __rev1_546; __rev1_546 = __builtin_shufflevector(__s1_546, __s1_546, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_546; __rev2_546 = __builtin_shufflevector(__s2_546, __s2_546, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_546; \ + __ret_546 = __rev0_546 + __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_546), __noswap_splat_laneq_u16(__rev2_546, __p3_546)); \ + __ret_546 = __builtin_shufflevector(__ret_546, __ret_546, 3, 2, 1, 0); \ + __ret_546; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s32(__p0_455, __p1_455, __p2_455, __p3_455) __extension__ ({ \ - int64x2_t __s0_455 = __p0_455; \ - int32x4_t __s1_455 = __p1_455; \ - int32x4_t __s2_455 = __p2_455; \ - int64x2_t __ret_455; \ - __ret_455 = __s0_455 + vmull_s32(vget_high_s32(__s1_455), splat_laneq_s32(__s2_455, __p3_455)); \ - __ret_455; \ +#define vmlal_high_laneq_s32(__p0_547, __p1_547, __p2_547, __p3_547) __extension__ ({ \ + int64x2_t __s0_547 = __p0_547; \ + int32x4_t __s1_547 = __p1_547; \ + int32x4_t __s2_547 = __p2_547; \ + int64x2_t __ret_547; \ + __ret_547 = __s0_547 + vmull_s32(vget_high_s32(__s1_547), splat_laneq_s32(__s2_547, __p3_547)); \ + __ret_547; \ }) #else -#define vmlal_high_laneq_s32(__p0_456, __p1_456, __p2_456, __p3_456) __extension__ ({ \ - int64x2_t __s0_456 = __p0_456; \ - int32x4_t __s1_456 = __p1_456; \ - int32x4_t __s2_456 = __p2_456; \ - int64x2_t __rev0_456; __rev0_456 = __builtin_shufflevector(__s0_456, __s0_456, 1, 0); \ - int32x4_t __rev1_456; __rev1_456 = __builtin_shufflevector(__s1_456, __s1_456, 3, 2, 1, 0); \ - int32x4_t __rev2_456; __rev2_456 = __builtin_shufflevector(__s2_456, __s2_456, 3, 2, 1, 0); \ - int64x2_t __ret_456; \ - __ret_456 = __rev0_456 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_456), __noswap_splat_laneq_s32(__rev2_456, __p3_456)); \ - __ret_456 = __builtin_shufflevector(__ret_456, __ret_456, 1, 0); \ - __ret_456; \ +#define vmlal_high_laneq_s32(__p0_548, __p1_548, __p2_548, __p3_548) __extension__ ({ \ + int64x2_t __s0_548 = __p0_548; \ + int32x4_t __s1_548 = __p1_548; \ + int32x4_t __s2_548 = __p2_548; \ + int64x2_t __rev0_548; __rev0_548 = __builtin_shufflevector(__s0_548, __s0_548, 1, 0); \ + int32x4_t __rev1_548; __rev1_548 = __builtin_shufflevector(__s1_548, __s1_548, 3, 2, 1, 0); \ + int32x4_t __rev2_548; __rev2_548 = __builtin_shufflevector(__s2_548, __s2_548, 3, 2, 1, 0); \ + int64x2_t __ret_548; \ + __ret_548 = __rev0_548 + __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_548), __noswap_splat_laneq_s32(__rev2_548, __p3_548)); \ + __ret_548 = __builtin_shufflevector(__ret_548, __ret_548, 1, 0); \ + __ret_548; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_high_laneq_s16(__p0_457, __p1_457, __p2_457, __p3_457) __extension__ ({ \ - int32x4_t __s0_457 = __p0_457; \ - int16x8_t __s1_457 = __p1_457; \ - int16x8_t __s2_457 = __p2_457; \ - int32x4_t __ret_457; \ - __ret_457 = __s0_457 + vmull_s16(vget_high_s16(__s1_457), splat_laneq_s16(__s2_457, __p3_457)); \ - __ret_457; \ +#define vmlal_high_laneq_s16(__p0_549, __p1_549, __p2_549, __p3_549) __extension__ ({ \ + int32x4_t __s0_549 = __p0_549; \ + int16x8_t __s1_549 = __p1_549; \ + int16x8_t __s2_549 = __p2_549; \ + int32x4_t __ret_549; \ + __ret_549 = __s0_549 + vmull_s16(vget_high_s16(__s1_549), splat_laneq_s16(__s2_549, __p3_549)); \ + __ret_549; \ }) #else -#define vmlal_high_laneq_s16(__p0_458, __p1_458, __p2_458, __p3_458) __extension__ ({ \ - int32x4_t __s0_458 = __p0_458; \ - int16x8_t __s1_458 = __p1_458; \ - int16x8_t __s2_458 = __p2_458; \ - int32x4_t __rev0_458; __rev0_458 = __builtin_shufflevector(__s0_458, __s0_458, 3, 2, 1, 0); \ - int16x8_t __rev1_458; __rev1_458 = __builtin_shufflevector(__s1_458, __s1_458, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_458; __rev2_458 = __builtin_shufflevector(__s2_458, __s2_458, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_458; \ - __ret_458 = __rev0_458 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_458), __noswap_splat_laneq_s16(__rev2_458, __p3_458)); \ - __ret_458 = __builtin_shufflevector(__ret_458, __ret_458, 3, 2, 1, 0); \ - __ret_458; \ +#define vmlal_high_laneq_s16(__p0_550, __p1_550, __p2_550, __p3_550) __extension__ ({ \ + int32x4_t __s0_550 = __p0_550; \ + int16x8_t __s1_550 = __p1_550; \ + int16x8_t __s2_550 = __p2_550; \ + int32x4_t __rev0_550; __rev0_550 = __builtin_shufflevector(__s0_550, __s0_550, 3, 2, 1, 0); \ + int16x8_t __rev1_550; __rev1_550 = __builtin_shufflevector(__s1_550, __s1_550, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_550; __rev2_550 = __builtin_shufflevector(__s2_550, __s2_550, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_550; \ + __ret_550 = __rev0_550 + __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_550), __noswap_splat_laneq_s16(__rev2_550, __p3_550)); \ + __ret_550 = __builtin_shufflevector(__ret_550, __ret_550, 3, 2, 1, 0); \ + __ret_550; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u32(__p0_459, __p1_459, __p2_459, __p3_459) __extension__ ({ \ - uint64x2_t __s0_459 = __p0_459; \ - uint32x2_t __s1_459 = __p1_459; \ - uint32x4_t __s2_459 = __p2_459; \ - uint64x2_t __ret_459; \ - __ret_459 = __s0_459 + vmull_u32(__s1_459, splat_laneq_u32(__s2_459, __p3_459)); \ - __ret_459; \ +#define vmlal_laneq_u32(__p0_551, __p1_551, __p2_551, __p3_551) __extension__ ({ \ + uint64x2_t __s0_551 = __p0_551; \ + uint32x2_t __s1_551 = __p1_551; \ + uint32x4_t __s2_551 = __p2_551; \ + uint64x2_t __ret_551; \ + __ret_551 = __s0_551 + vmull_u32(__s1_551, splat_laneq_u32(__s2_551, __p3_551)); \ + __ret_551; \ }) #else -#define vmlal_laneq_u32(__p0_460, __p1_460, __p2_460, __p3_460) __extension__ ({ \ - uint64x2_t __s0_460 = __p0_460; \ - uint32x2_t __s1_460 = __p1_460; \ - uint32x4_t __s2_460 = __p2_460; \ - uint64x2_t __rev0_460; __rev0_460 = __builtin_shufflevector(__s0_460, __s0_460, 1, 0); \ - uint32x2_t __rev1_460; __rev1_460 = __builtin_shufflevector(__s1_460, __s1_460, 1, 0); \ - uint32x4_t __rev2_460; __rev2_460 = __builtin_shufflevector(__s2_460, __s2_460, 3, 2, 1, 0); \ - uint64x2_t __ret_460; \ - __ret_460 = __rev0_460 + __noswap_vmull_u32(__rev1_460, __noswap_splat_laneq_u32(__rev2_460, __p3_460)); \ - __ret_460 = __builtin_shufflevector(__ret_460, __ret_460, 1, 0); \ - __ret_460; \ +#define vmlal_laneq_u32(__p0_552, __p1_552, __p2_552, __p3_552) __extension__ ({ \ + uint64x2_t __s0_552 = __p0_552; \ + uint32x2_t __s1_552 = __p1_552; \ + uint32x4_t __s2_552 = __p2_552; \ + uint64x2_t __rev0_552; __rev0_552 = __builtin_shufflevector(__s0_552, __s0_552, 1, 0); \ + uint32x2_t __rev1_552; __rev1_552 = __builtin_shufflevector(__s1_552, __s1_552, 1, 0); \ + uint32x4_t __rev2_552; __rev2_552 = __builtin_shufflevector(__s2_552, __s2_552, 3, 2, 1, 0); \ + uint64x2_t __ret_552; \ + __ret_552 = __rev0_552 + __noswap_vmull_u32(__rev1_552, __noswap_splat_laneq_u32(__rev2_552, __p3_552)); \ + __ret_552 = __builtin_shufflevector(__ret_552, __ret_552, 1, 0); \ + __ret_552; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_u16(__p0_461, __p1_461, __p2_461, __p3_461) __extension__ ({ \ - uint32x4_t __s0_461 = __p0_461; \ - uint16x4_t __s1_461 = __p1_461; \ - uint16x8_t __s2_461 = __p2_461; \ - uint32x4_t __ret_461; \ - __ret_461 = __s0_461 + vmull_u16(__s1_461, splat_laneq_u16(__s2_461, __p3_461)); \ - __ret_461; \ +#define vmlal_laneq_u16(__p0_553, __p1_553, __p2_553, __p3_553) __extension__ ({ \ + uint32x4_t __s0_553 = __p0_553; \ + uint16x4_t __s1_553 = __p1_553; \ + uint16x8_t __s2_553 = __p2_553; \ + uint32x4_t __ret_553; \ + __ret_553 = __s0_553 + vmull_u16(__s1_553, splat_laneq_u16(__s2_553, __p3_553)); \ + __ret_553; \ }) #else -#define vmlal_laneq_u16(__p0_462, __p1_462, __p2_462, __p3_462) __extension__ ({ \ - uint32x4_t __s0_462 = __p0_462; \ - uint16x4_t __s1_462 = __p1_462; \ - uint16x8_t __s2_462 = __p2_462; \ - uint32x4_t __rev0_462; __rev0_462 = __builtin_shufflevector(__s0_462, __s0_462, 3, 2, 1, 0); \ - uint16x4_t __rev1_462; __rev1_462 = __builtin_shufflevector(__s1_462, __s1_462, 3, 2, 1, 0); \ - uint16x8_t __rev2_462; __rev2_462 = __builtin_shufflevector(__s2_462, __s2_462, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_462; \ - __ret_462 = __rev0_462 + __noswap_vmull_u16(__rev1_462, __noswap_splat_laneq_u16(__rev2_462, __p3_462)); \ - __ret_462 = __builtin_shufflevector(__ret_462, __ret_462, 3, 2, 1, 0); \ - __ret_462; \ +#define vmlal_laneq_u16(__p0_554, __p1_554, __p2_554, __p3_554) __extension__ ({ \ + uint32x4_t __s0_554 = __p0_554; \ + uint16x4_t __s1_554 = __p1_554; \ + uint16x8_t __s2_554 = __p2_554; \ + uint32x4_t __rev0_554; __rev0_554 = __builtin_shufflevector(__s0_554, __s0_554, 3, 2, 1, 0); \ + uint16x4_t __rev1_554; __rev1_554 = __builtin_shufflevector(__s1_554, __s1_554, 3, 2, 1, 0); \ + uint16x8_t __rev2_554; __rev2_554 = __builtin_shufflevector(__s2_554, __s2_554, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_554; \ + __ret_554 = __rev0_554 + __noswap_vmull_u16(__rev1_554, __noswap_splat_laneq_u16(__rev2_554, __p3_554)); \ + __ret_554 = __builtin_shufflevector(__ret_554, __ret_554, 3, 2, 1, 0); \ + __ret_554; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s32(__p0_463, __p1_463, __p2_463, __p3_463) __extension__ ({ \ - int64x2_t __s0_463 = __p0_463; \ - int32x2_t __s1_463 = __p1_463; \ - int32x4_t __s2_463 = __p2_463; \ - int64x2_t __ret_463; \ - __ret_463 = __s0_463 + vmull_s32(__s1_463, splat_laneq_s32(__s2_463, __p3_463)); \ - __ret_463; \ +#define vmlal_laneq_s32(__p0_555, __p1_555, __p2_555, __p3_555) __extension__ ({ \ + int64x2_t __s0_555 = __p0_555; \ + int32x2_t __s1_555 = __p1_555; \ + int32x4_t __s2_555 = __p2_555; \ + int64x2_t __ret_555; \ + __ret_555 = __s0_555 + vmull_s32(__s1_555, splat_laneq_s32(__s2_555, __p3_555)); \ + __ret_555; \ }) #else -#define vmlal_laneq_s32(__p0_464, __p1_464, __p2_464, __p3_464) __extension__ ({ \ - int64x2_t __s0_464 = __p0_464; \ - int32x2_t __s1_464 = __p1_464; \ - int32x4_t __s2_464 = __p2_464; \ - int64x2_t __rev0_464; __rev0_464 = __builtin_shufflevector(__s0_464, __s0_464, 1, 0); \ - int32x2_t __rev1_464; __rev1_464 = __builtin_shufflevector(__s1_464, __s1_464, 1, 0); \ - int32x4_t __rev2_464; __rev2_464 = __builtin_shufflevector(__s2_464, __s2_464, 3, 2, 1, 0); \ - int64x2_t __ret_464; \ - __ret_464 = __rev0_464 + __noswap_vmull_s32(__rev1_464, __noswap_splat_laneq_s32(__rev2_464, __p3_464)); \ - __ret_464 = __builtin_shufflevector(__ret_464, __ret_464, 1, 0); \ - __ret_464; \ +#define vmlal_laneq_s32(__p0_556, __p1_556, __p2_556, __p3_556) __extension__ ({ \ + int64x2_t __s0_556 = __p0_556; \ + int32x2_t __s1_556 = __p1_556; \ + int32x4_t __s2_556 = __p2_556; \ + int64x2_t __rev0_556; __rev0_556 = __builtin_shufflevector(__s0_556, __s0_556, 1, 0); \ + int32x2_t __rev1_556; __rev1_556 = __builtin_shufflevector(__s1_556, __s1_556, 1, 0); \ + int32x4_t __rev2_556; __rev2_556 = __builtin_shufflevector(__s2_556, __s2_556, 3, 2, 1, 0); \ + int64x2_t __ret_556; \ + __ret_556 = __rev0_556 + __noswap_vmull_s32(__rev1_556, __noswap_splat_laneq_s32(__rev2_556, __p3_556)); \ + __ret_556 = __builtin_shufflevector(__ret_556, __ret_556, 1, 0); \ + __ret_556; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_laneq_s16(__p0_465, __p1_465, __p2_465, __p3_465) __extension__ ({ \ - int32x4_t __s0_465 = __p0_465; \ - int16x4_t __s1_465 = __p1_465; \ - int16x8_t __s2_465 = __p2_465; \ - int32x4_t __ret_465; \ - __ret_465 = __s0_465 + vmull_s16(__s1_465, splat_laneq_s16(__s2_465, __p3_465)); \ - __ret_465; \ +#define vmlal_laneq_s16(__p0_557, __p1_557, __p2_557, __p3_557) __extension__ ({ \ + int32x4_t __s0_557 = __p0_557; \ + int16x4_t __s1_557 = __p1_557; \ + int16x8_t __s2_557 = __p2_557; \ + int32x4_t __ret_557; \ + __ret_557 = __s0_557 + vmull_s16(__s1_557, splat_laneq_s16(__s2_557, __p3_557)); \ + __ret_557; \ }) #else -#define vmlal_laneq_s16(__p0_466, __p1_466, __p2_466, __p3_466) __extension__ ({ \ - int32x4_t __s0_466 = __p0_466; \ - int16x4_t __s1_466 = __p1_466; \ - int16x8_t __s2_466 = __p2_466; \ - int32x4_t __rev0_466; __rev0_466 = __builtin_shufflevector(__s0_466, __s0_466, 3, 2, 1, 0); \ - int16x4_t __rev1_466; __rev1_466 = __builtin_shufflevector(__s1_466, __s1_466, 3, 2, 1, 0); \ - int16x8_t __rev2_466; __rev2_466 = __builtin_shufflevector(__s2_466, __s2_466, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_466; \ - __ret_466 = __rev0_466 + __noswap_vmull_s16(__rev1_466, __noswap_splat_laneq_s16(__rev2_466, __p3_466)); \ - __ret_466 = __builtin_shufflevector(__ret_466, __ret_466, 3, 2, 1, 0); \ - __ret_466; \ +#define vmlal_laneq_s16(__p0_558, __p1_558, __p2_558, __p3_558) __extension__ ({ \ + int32x4_t __s0_558 = __p0_558; \ + int16x4_t __s1_558 = __p1_558; \ + int16x8_t __s2_558 = __p2_558; \ + int32x4_t __rev0_558; __rev0_558 = __builtin_shufflevector(__s0_558, __s0_558, 3, 2, 1, 0); \ + int16x4_t __rev1_558; __rev1_558 = __builtin_shufflevector(__s1_558, __s1_558, 3, 2, 1, 0); \ + int16x8_t __rev2_558; __rev2_558 = __builtin_shufflevector(__s2_558, __s2_558, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_558; \ + __ret_558 = __rev0_558 + __noswap_vmull_s16(__rev1_558, __noswap_splat_laneq_s16(__rev2_558, __p3_558)); \ + __ret_558 = __builtin_shufflevector(__ret_558, __ret_558, 3, 2, 1, 0); \ + __ret_558; \ }) #endif @@ -53116,547 +54803,530 @@ __ai float64x1_t vmls_f64(float64x1_t __p0, float64x1_t __p1, float64x1_t __p2) return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u32(__p0_467, __p1_467, __p2_467, __p3_467) __extension__ ({ \ - uint32x4_t __s0_467 = __p0_467; \ - uint32x4_t __s1_467 = __p1_467; \ - uint32x4_t __s2_467 = __p2_467; \ - uint32x4_t __ret_467; \ - __ret_467 = __s0_467 - __s1_467 * splatq_laneq_u32(__s2_467, __p3_467); \ - __ret_467; \ +#define vmlsq_laneq_u32(__p0_559, __p1_559, __p2_559, __p3_559) __extension__ ({ \ + uint32x4_t __s0_559 = __p0_559; \ + uint32x4_t __s1_559 = __p1_559; \ + uint32x4_t __s2_559 = __p2_559; \ + uint32x4_t __ret_559; \ + __ret_559 = __s0_559 - __s1_559 * splatq_laneq_u32(__s2_559, __p3_559); \ + __ret_559; \ }) #else -#define vmlsq_laneq_u32(__p0_468, __p1_468, __p2_468, __p3_468) __extension__ ({ \ - uint32x4_t __s0_468 = __p0_468; \ - uint32x4_t __s1_468 = __p1_468; \ - uint32x4_t __s2_468 = __p2_468; \ - uint32x4_t __rev0_468; __rev0_468 = __builtin_shufflevector(__s0_468, __s0_468, 3, 2, 1, 0); \ - uint32x4_t __rev1_468; __rev1_468 = __builtin_shufflevector(__s1_468, __s1_468, 3, 2, 1, 0); \ - uint32x4_t __rev2_468; __rev2_468 = __builtin_shufflevector(__s2_468, __s2_468, 3, 2, 1, 0); \ - uint32x4_t __ret_468; \ - __ret_468 = __rev0_468 - __rev1_468 * __noswap_splatq_laneq_u32(__rev2_468, __p3_468); \ - __ret_468 = __builtin_shufflevector(__ret_468, __ret_468, 3, 2, 1, 0); \ - __ret_468; \ +#define vmlsq_laneq_u32(__p0_560, __p1_560, __p2_560, __p3_560) __extension__ ({ \ + uint32x4_t __s0_560 = __p0_560; \ + uint32x4_t __s1_560 = __p1_560; \ + uint32x4_t __s2_560 = __p2_560; \ + uint32x4_t __rev0_560; __rev0_560 = __builtin_shufflevector(__s0_560, __s0_560, 3, 2, 1, 0); \ + uint32x4_t __rev1_560; __rev1_560 = __builtin_shufflevector(__s1_560, __s1_560, 3, 2, 1, 0); \ + uint32x4_t __rev2_560; __rev2_560 = __builtin_shufflevector(__s2_560, __s2_560, 3, 2, 1, 0); \ + uint32x4_t __ret_560; \ + __ret_560 = __rev0_560 - __rev1_560 * __noswap_splatq_laneq_u32(__rev2_560, __p3_560); \ + __ret_560 = __builtin_shufflevector(__ret_560, __ret_560, 3, 2, 1, 0); \ + __ret_560; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_u16(__p0_469, __p1_469, __p2_469, __p3_469) __extension__ ({ \ - uint16x8_t __s0_469 = __p0_469; \ - uint16x8_t __s1_469 = __p1_469; \ - uint16x8_t __s2_469 = __p2_469; \ - uint16x8_t __ret_469; \ - __ret_469 = __s0_469 - __s1_469 * splatq_laneq_u16(__s2_469, __p3_469); \ - __ret_469; \ +#define vmlsq_laneq_u16(__p0_561, __p1_561, __p2_561, __p3_561) __extension__ ({ \ + uint16x8_t __s0_561 = __p0_561; \ + uint16x8_t __s1_561 = __p1_561; \ + uint16x8_t __s2_561 = __p2_561; \ + uint16x8_t __ret_561; \ + __ret_561 = __s0_561 - __s1_561 * splatq_laneq_u16(__s2_561, __p3_561); \ + __ret_561; \ }) #else -#define vmlsq_laneq_u16(__p0_470, __p1_470, __p2_470, __p3_470) __extension__ ({ \ - uint16x8_t __s0_470 = __p0_470; \ - uint16x8_t __s1_470 = __p1_470; \ - uint16x8_t __s2_470 = __p2_470; \ - uint16x8_t __rev0_470; __rev0_470 = __builtin_shufflevector(__s0_470, __s0_470, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_470; __rev1_470 = __builtin_shufflevector(__s1_470, __s1_470, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_470; __rev2_470 = __builtin_shufflevector(__s2_470, __s2_470, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_470; \ - __ret_470 = __rev0_470 - __rev1_470 * __noswap_splatq_laneq_u16(__rev2_470, __p3_470); \ - __ret_470 = __builtin_shufflevector(__ret_470, __ret_470, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_470; \ +#define vmlsq_laneq_u16(__p0_562, __p1_562, __p2_562, __p3_562) __extension__ ({ \ + uint16x8_t __s0_562 = __p0_562; \ + uint16x8_t __s1_562 = __p1_562; \ + uint16x8_t __s2_562 = __p2_562; \ + uint16x8_t __rev0_562; __rev0_562 = __builtin_shufflevector(__s0_562, __s0_562, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_562; __rev1_562 = __builtin_shufflevector(__s1_562, __s1_562, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_562; __rev2_562 = __builtin_shufflevector(__s2_562, __s2_562, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_562; \ + __ret_562 = __rev0_562 - __rev1_562 * __noswap_splatq_laneq_u16(__rev2_562, __p3_562); \ + __ret_562 = __builtin_shufflevector(__ret_562, __ret_562, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_562; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_f32(__p0_471, __p1_471, __p2_471, __p3_471) __extension__ ({ \ - float32x4_t __s0_471 = __p0_471; \ - float32x4_t __s1_471 = __p1_471; \ - float32x4_t __s2_471 = __p2_471; \ - float32x4_t __ret_471; \ - __ret_471 = __s0_471 - __s1_471 * splatq_laneq_f32(__s2_471, __p3_471); \ - __ret_471; \ +#define vmlsq_laneq_f32(__p0_563, __p1_563, __p2_563, __p3_563) __extension__ ({ \ + float32x4_t __s0_563 = __p0_563; \ + float32x4_t __s1_563 = __p1_563; \ + float32x4_t __s2_563 = __p2_563; \ + float32x4_t __ret_563; \ + __ret_563 = __s0_563 - __s1_563 * splatq_laneq_f32(__s2_563, __p3_563); \ + __ret_563; \ }) #else -#define vmlsq_laneq_f32(__p0_472, __p1_472, __p2_472, __p3_472) __extension__ ({ \ - float32x4_t __s0_472 = __p0_472; \ - float32x4_t __s1_472 = __p1_472; \ - float32x4_t __s2_472 = __p2_472; \ - float32x4_t __rev0_472; __rev0_472 = __builtin_shufflevector(__s0_472, __s0_472, 3, 2, 1, 0); \ - float32x4_t __rev1_472; __rev1_472 = __builtin_shufflevector(__s1_472, __s1_472, 3, 2, 1, 0); \ - float32x4_t __rev2_472; __rev2_472 = __builtin_shufflevector(__s2_472, __s2_472, 3, 2, 1, 0); \ - float32x4_t __ret_472; \ - __ret_472 = __rev0_472 - __rev1_472 * __noswap_splatq_laneq_f32(__rev2_472, __p3_472); \ - __ret_472 = __builtin_shufflevector(__ret_472, __ret_472, 3, 2, 1, 0); \ - __ret_472; \ +#define vmlsq_laneq_f32(__p0_564, __p1_564, __p2_564, __p3_564) __extension__ ({ \ + float32x4_t __s0_564 = __p0_564; \ + float32x4_t __s1_564 = __p1_564; \ + float32x4_t __s2_564 = __p2_564; \ + float32x4_t __rev0_564; __rev0_564 = __builtin_shufflevector(__s0_564, __s0_564, 3, 2, 1, 0); \ + float32x4_t __rev1_564; __rev1_564 = __builtin_shufflevector(__s1_564, __s1_564, 3, 2, 1, 0); \ + float32x4_t __rev2_564; __rev2_564 = __builtin_shufflevector(__s2_564, __s2_564, 3, 2, 1, 0); \ + float32x4_t __ret_564; \ + __ret_564 = __rev0_564 - __rev1_564 * __noswap_splatq_laneq_f32(__rev2_564, __p3_564); \ + __ret_564 = __builtin_shufflevector(__ret_564, __ret_564, 3, 2, 1, 0); \ + __ret_564; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s32(__p0_473, __p1_473, __p2_473, __p3_473) __extension__ ({ \ - int32x4_t __s0_473 = __p0_473; \ - int32x4_t __s1_473 = __p1_473; \ - int32x4_t __s2_473 = __p2_473; \ - int32x4_t __ret_473; \ - __ret_473 = __s0_473 - __s1_473 * splatq_laneq_s32(__s2_473, __p3_473); \ - __ret_473; \ +#define vmlsq_laneq_s32(__p0_565, __p1_565, __p2_565, __p3_565) __extension__ ({ \ + int32x4_t __s0_565 = __p0_565; \ + int32x4_t __s1_565 = __p1_565; \ + int32x4_t __s2_565 = __p2_565; \ + int32x4_t __ret_565; \ + __ret_565 = __s0_565 - __s1_565 * splatq_laneq_s32(__s2_565, __p3_565); \ + __ret_565; \ }) #else -#define vmlsq_laneq_s32(__p0_474, __p1_474, __p2_474, __p3_474) __extension__ ({ \ - int32x4_t __s0_474 = __p0_474; \ - int32x4_t __s1_474 = __p1_474; \ - int32x4_t __s2_474 = __p2_474; \ - int32x4_t __rev0_474; __rev0_474 = __builtin_shufflevector(__s0_474, __s0_474, 3, 2, 1, 0); \ - int32x4_t __rev1_474; __rev1_474 = __builtin_shufflevector(__s1_474, __s1_474, 3, 2, 1, 0); \ - int32x4_t __rev2_474; __rev2_474 = __builtin_shufflevector(__s2_474, __s2_474, 3, 2, 1, 0); \ - int32x4_t __ret_474; \ - __ret_474 = __rev0_474 - __rev1_474 * __noswap_splatq_laneq_s32(__rev2_474, __p3_474); \ - __ret_474 = __builtin_shufflevector(__ret_474, __ret_474, 3, 2, 1, 0); \ - __ret_474; \ +#define vmlsq_laneq_s32(__p0_566, __p1_566, __p2_566, __p3_566) __extension__ ({ \ + int32x4_t __s0_566 = __p0_566; \ + int32x4_t __s1_566 = __p1_566; \ + int32x4_t __s2_566 = __p2_566; \ + int32x4_t __rev0_566; __rev0_566 = __builtin_shufflevector(__s0_566, __s0_566, 3, 2, 1, 0); \ + int32x4_t __rev1_566; __rev1_566 = __builtin_shufflevector(__s1_566, __s1_566, 3, 2, 1, 0); \ + int32x4_t __rev2_566; __rev2_566 = __builtin_shufflevector(__s2_566, __s2_566, 3, 2, 1, 0); \ + int32x4_t __ret_566; \ + __ret_566 = __rev0_566 - __rev1_566 * __noswap_splatq_laneq_s32(__rev2_566, __p3_566); \ + __ret_566 = __builtin_shufflevector(__ret_566, __ret_566, 3, 2, 1, 0); \ + __ret_566; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsq_laneq_s16(__p0_475, __p1_475, __p2_475, __p3_475) __extension__ ({ \ - int16x8_t __s0_475 = __p0_475; \ - int16x8_t __s1_475 = __p1_475; \ - int16x8_t __s2_475 = __p2_475; \ - int16x8_t __ret_475; \ - __ret_475 = __s0_475 - __s1_475 * splatq_laneq_s16(__s2_475, __p3_475); \ - __ret_475; \ +#define vmlsq_laneq_s16(__p0_567, __p1_567, __p2_567, __p3_567) __extension__ ({ \ + int16x8_t __s0_567 = __p0_567; \ + int16x8_t __s1_567 = __p1_567; \ + int16x8_t __s2_567 = __p2_567; \ + int16x8_t __ret_567; \ + __ret_567 = __s0_567 - __s1_567 * splatq_laneq_s16(__s2_567, __p3_567); \ + __ret_567; \ }) #else -#define vmlsq_laneq_s16(__p0_476, __p1_476, __p2_476, __p3_476) __extension__ ({ \ - int16x8_t __s0_476 = __p0_476; \ - int16x8_t __s1_476 = __p1_476; \ - int16x8_t __s2_476 = __p2_476; \ - int16x8_t __rev0_476; __rev0_476 = __builtin_shufflevector(__s0_476, __s0_476, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_476; __rev1_476 = __builtin_shufflevector(__s1_476, __s1_476, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_476; __rev2_476 = __builtin_shufflevector(__s2_476, __s2_476, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_476; \ - __ret_476 = __rev0_476 - __rev1_476 * __noswap_splatq_laneq_s16(__rev2_476, __p3_476); \ - __ret_476 = __builtin_shufflevector(__ret_476, __ret_476, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_476; \ +#define vmlsq_laneq_s16(__p0_568, __p1_568, __p2_568, __p3_568) __extension__ ({ \ + int16x8_t __s0_568 = __p0_568; \ + int16x8_t __s1_568 = __p1_568; \ + int16x8_t __s2_568 = __p2_568; \ + int16x8_t __rev0_568; __rev0_568 = __builtin_shufflevector(__s0_568, __s0_568, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_568; __rev1_568 = __builtin_shufflevector(__s1_568, __s1_568, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_568; __rev2_568 = __builtin_shufflevector(__s2_568, __s2_568, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_568; \ + __ret_568 = __rev0_568 - __rev1_568 * __noswap_splatq_laneq_s16(__rev2_568, __p3_568); \ + __ret_568 = __builtin_shufflevector(__ret_568, __ret_568, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_568; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u32(__p0_477, __p1_477, __p2_477, __p3_477) __extension__ ({ \ - uint32x2_t __s0_477 = __p0_477; \ - uint32x2_t __s1_477 = __p1_477; \ - uint32x4_t __s2_477 = __p2_477; \ - uint32x2_t __ret_477; \ - __ret_477 = __s0_477 - __s1_477 * splat_laneq_u32(__s2_477, __p3_477); \ - __ret_477; \ +#define vmls_laneq_u32(__p0_569, __p1_569, __p2_569, __p3_569) __extension__ ({ \ + uint32x2_t __s0_569 = __p0_569; \ + uint32x2_t __s1_569 = __p1_569; \ + uint32x4_t __s2_569 = __p2_569; \ + uint32x2_t __ret_569; \ + __ret_569 = __s0_569 - __s1_569 * splat_laneq_u32(__s2_569, __p3_569); \ + __ret_569; \ }) #else -#define vmls_laneq_u32(__p0_478, __p1_478, __p2_478, __p3_478) __extension__ ({ \ - uint32x2_t __s0_478 = __p0_478; \ - uint32x2_t __s1_478 = __p1_478; \ - uint32x4_t __s2_478 = __p2_478; \ - uint32x2_t __rev0_478; __rev0_478 = __builtin_shufflevector(__s0_478, __s0_478, 1, 0); \ - uint32x2_t __rev1_478; __rev1_478 = __builtin_shufflevector(__s1_478, __s1_478, 1, 0); \ - uint32x4_t __rev2_478; __rev2_478 = __builtin_shufflevector(__s2_478, __s2_478, 3, 2, 1, 0); \ - uint32x2_t __ret_478; \ - __ret_478 = __rev0_478 - __rev1_478 * __noswap_splat_laneq_u32(__rev2_478, __p3_478); \ - __ret_478 = __builtin_shufflevector(__ret_478, __ret_478, 1, 0); \ - __ret_478; \ +#define vmls_laneq_u32(__p0_570, __p1_570, __p2_570, __p3_570) __extension__ ({ \ + uint32x2_t __s0_570 = __p0_570; \ + uint32x2_t __s1_570 = __p1_570; \ + uint32x4_t __s2_570 = __p2_570; \ + uint32x2_t __rev0_570; __rev0_570 = __builtin_shufflevector(__s0_570, __s0_570, 1, 0); \ + uint32x2_t __rev1_570; __rev1_570 = __builtin_shufflevector(__s1_570, __s1_570, 1, 0); \ + uint32x4_t __rev2_570; __rev2_570 = __builtin_shufflevector(__s2_570, __s2_570, 3, 2, 1, 0); \ + uint32x2_t __ret_570; \ + __ret_570 = __rev0_570 - __rev1_570 * __noswap_splat_laneq_u32(__rev2_570, __p3_570); \ + __ret_570 = __builtin_shufflevector(__ret_570, __ret_570, 1, 0); \ + __ret_570; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_u16(__p0_479, __p1_479, __p2_479, __p3_479) __extension__ ({ \ - uint16x4_t __s0_479 = __p0_479; \ - uint16x4_t __s1_479 = __p1_479; \ - uint16x8_t __s2_479 = __p2_479; \ - uint16x4_t __ret_479; \ - __ret_479 = __s0_479 - __s1_479 * splat_laneq_u16(__s2_479, __p3_479); \ - __ret_479; \ +#define vmls_laneq_u16(__p0_571, __p1_571, __p2_571, __p3_571) __extension__ ({ \ + uint16x4_t __s0_571 = __p0_571; \ + uint16x4_t __s1_571 = __p1_571; \ + uint16x8_t __s2_571 = __p2_571; \ + uint16x4_t __ret_571; \ + __ret_571 = __s0_571 - __s1_571 * splat_laneq_u16(__s2_571, __p3_571); \ + __ret_571; \ }) #else -#define vmls_laneq_u16(__p0_480, __p1_480, __p2_480, __p3_480) __extension__ ({ \ - uint16x4_t __s0_480 = __p0_480; \ - uint16x4_t __s1_480 = __p1_480; \ - uint16x8_t __s2_480 = __p2_480; \ - uint16x4_t __rev0_480; __rev0_480 = __builtin_shufflevector(__s0_480, __s0_480, 3, 2, 1, 0); \ - uint16x4_t __rev1_480; __rev1_480 = __builtin_shufflevector(__s1_480, __s1_480, 3, 2, 1, 0); \ - uint16x8_t __rev2_480; __rev2_480 = __builtin_shufflevector(__s2_480, __s2_480, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_480; \ - __ret_480 = __rev0_480 - __rev1_480 * __noswap_splat_laneq_u16(__rev2_480, __p3_480); \ - __ret_480 = __builtin_shufflevector(__ret_480, __ret_480, 3, 2, 1, 0); \ - __ret_480; \ +#define vmls_laneq_u16(__p0_572, __p1_572, __p2_572, __p3_572) __extension__ ({ \ + uint16x4_t __s0_572 = __p0_572; \ + uint16x4_t __s1_572 = __p1_572; \ + uint16x8_t __s2_572 = __p2_572; \ + uint16x4_t __rev0_572; __rev0_572 = __builtin_shufflevector(__s0_572, __s0_572, 3, 2, 1, 0); \ + uint16x4_t __rev1_572; __rev1_572 = __builtin_shufflevector(__s1_572, __s1_572, 3, 2, 1, 0); \ + uint16x8_t __rev2_572; __rev2_572 = __builtin_shufflevector(__s2_572, __s2_572, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_572; \ + __ret_572 = __rev0_572 - __rev1_572 * __noswap_splat_laneq_u16(__rev2_572, __p3_572); \ + __ret_572 = __builtin_shufflevector(__ret_572, __ret_572, 3, 2, 1, 0); \ + __ret_572; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_f32(__p0_481, __p1_481, __p2_481, __p3_481) __extension__ ({ \ - float32x2_t __s0_481 = __p0_481; \ - float32x2_t __s1_481 = __p1_481; \ - float32x4_t __s2_481 = __p2_481; \ - float32x2_t __ret_481; \ - __ret_481 = __s0_481 - __s1_481 * splat_laneq_f32(__s2_481, __p3_481); \ - __ret_481; \ +#define vmls_laneq_f32(__p0_573, __p1_573, __p2_573, __p3_573) __extension__ ({ \ + float32x2_t __s0_573 = __p0_573; \ + float32x2_t __s1_573 = __p1_573; \ + float32x4_t __s2_573 = __p2_573; \ + float32x2_t __ret_573; \ + __ret_573 = __s0_573 - __s1_573 * splat_laneq_f32(__s2_573, __p3_573); \ + __ret_573; \ }) #else -#define vmls_laneq_f32(__p0_482, __p1_482, __p2_482, __p3_482) __extension__ ({ \ - float32x2_t __s0_482 = __p0_482; \ - float32x2_t __s1_482 = __p1_482; \ - float32x4_t __s2_482 = __p2_482; \ - float32x2_t __rev0_482; __rev0_482 = __builtin_shufflevector(__s0_482, __s0_482, 1, 0); \ - float32x2_t __rev1_482; __rev1_482 = __builtin_shufflevector(__s1_482, __s1_482, 1, 0); \ - float32x4_t __rev2_482; __rev2_482 = __builtin_shufflevector(__s2_482, __s2_482, 3, 2, 1, 0); \ - float32x2_t __ret_482; \ - __ret_482 = __rev0_482 - __rev1_482 * __noswap_splat_laneq_f32(__rev2_482, __p3_482); \ - __ret_482 = __builtin_shufflevector(__ret_482, __ret_482, 1, 0); \ - __ret_482; \ +#define vmls_laneq_f32(__p0_574, __p1_574, __p2_574, __p3_574) __extension__ ({ \ + float32x2_t __s0_574 = __p0_574; \ + float32x2_t __s1_574 = __p1_574; \ + float32x4_t __s2_574 = __p2_574; \ + float32x2_t __rev0_574; __rev0_574 = __builtin_shufflevector(__s0_574, __s0_574, 1, 0); \ + float32x2_t __rev1_574; __rev1_574 = __builtin_shufflevector(__s1_574, __s1_574, 1, 0); \ + float32x4_t __rev2_574; __rev2_574 = __builtin_shufflevector(__s2_574, __s2_574, 3, 2, 1, 0); \ + float32x2_t __ret_574; \ + __ret_574 = __rev0_574 - __rev1_574 * __noswap_splat_laneq_f32(__rev2_574, __p3_574); \ + __ret_574 = __builtin_shufflevector(__ret_574, __ret_574, 1, 0); \ + __ret_574; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s32(__p0_483, __p1_483, __p2_483, __p3_483) __extension__ ({ \ - int32x2_t __s0_483 = __p0_483; \ - int32x2_t __s1_483 = __p1_483; \ - int32x4_t __s2_483 = __p2_483; \ - int32x2_t __ret_483; \ - __ret_483 = __s0_483 - __s1_483 * splat_laneq_s32(__s2_483, __p3_483); \ - __ret_483; \ +#define vmls_laneq_s32(__p0_575, __p1_575, __p2_575, __p3_575) __extension__ ({ \ + int32x2_t __s0_575 = __p0_575; \ + int32x2_t __s1_575 = __p1_575; \ + int32x4_t __s2_575 = __p2_575; \ + int32x2_t __ret_575; \ + __ret_575 = __s0_575 - __s1_575 * splat_laneq_s32(__s2_575, __p3_575); \ + __ret_575; \ }) #else -#define vmls_laneq_s32(__p0_484, __p1_484, __p2_484, __p3_484) __extension__ ({ \ - int32x2_t __s0_484 = __p0_484; \ - int32x2_t __s1_484 = __p1_484; \ - int32x4_t __s2_484 = __p2_484; \ - int32x2_t __rev0_484; __rev0_484 = __builtin_shufflevector(__s0_484, __s0_484, 1, 0); \ - int32x2_t __rev1_484; __rev1_484 = __builtin_shufflevector(__s1_484, __s1_484, 1, 0); \ - int32x4_t __rev2_484; __rev2_484 = __builtin_shufflevector(__s2_484, __s2_484, 3, 2, 1, 0); \ - int32x2_t __ret_484; \ - __ret_484 = __rev0_484 - __rev1_484 * __noswap_splat_laneq_s32(__rev2_484, __p3_484); \ - __ret_484 = __builtin_shufflevector(__ret_484, __ret_484, 1, 0); \ - __ret_484; \ +#define vmls_laneq_s32(__p0_576, __p1_576, __p2_576, __p3_576) __extension__ ({ \ + int32x2_t __s0_576 = __p0_576; \ + int32x2_t __s1_576 = __p1_576; \ + int32x4_t __s2_576 = __p2_576; \ + int32x2_t __rev0_576; __rev0_576 = __builtin_shufflevector(__s0_576, __s0_576, 1, 0); \ + int32x2_t __rev1_576; __rev1_576 = __builtin_shufflevector(__s1_576, __s1_576, 1, 0); \ + int32x4_t __rev2_576; __rev2_576 = __builtin_shufflevector(__s2_576, __s2_576, 3, 2, 1, 0); \ + int32x2_t __ret_576; \ + __ret_576 = __rev0_576 - __rev1_576 * __noswap_splat_laneq_s32(__rev2_576, __p3_576); \ + __ret_576 = __builtin_shufflevector(__ret_576, __ret_576, 1, 0); \ + __ret_576; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmls_laneq_s16(__p0_485, __p1_485, __p2_485, __p3_485) __extension__ ({ \ - int16x4_t __s0_485 = __p0_485; \ - int16x4_t __s1_485 = __p1_485; \ - int16x8_t __s2_485 = __p2_485; \ - int16x4_t __ret_485; \ - __ret_485 = __s0_485 - __s1_485 * splat_laneq_s16(__s2_485, __p3_485); \ - __ret_485; \ +#define vmls_laneq_s16(__p0_577, __p1_577, __p2_577, __p3_577) __extension__ ({ \ + int16x4_t __s0_577 = __p0_577; \ + int16x4_t __s1_577 = __p1_577; \ + int16x8_t __s2_577 = __p2_577; \ + int16x4_t __ret_577; \ + __ret_577 = __s0_577 - __s1_577 * splat_laneq_s16(__s2_577, __p3_577); \ + __ret_577; \ }) #else -#define vmls_laneq_s16(__p0_486, __p1_486, __p2_486, __p3_486) __extension__ ({ \ - int16x4_t __s0_486 = __p0_486; \ - int16x4_t __s1_486 = __p1_486; \ - int16x8_t __s2_486 = __p2_486; \ - int16x4_t __rev0_486; __rev0_486 = __builtin_shufflevector(__s0_486, __s0_486, 3, 2, 1, 0); \ - int16x4_t __rev1_486; __rev1_486 = __builtin_shufflevector(__s1_486, __s1_486, 3, 2, 1, 0); \ - int16x8_t __rev2_486; __rev2_486 = __builtin_shufflevector(__s2_486, __s2_486, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_486; \ - __ret_486 = __rev0_486 - __rev1_486 * __noswap_splat_laneq_s16(__rev2_486, __p3_486); \ - __ret_486 = __builtin_shufflevector(__ret_486, __ret_486, 3, 2, 1, 0); \ - __ret_486; \ +#define vmls_laneq_s16(__p0_578, __p1_578, __p2_578, __p3_578) __extension__ ({ \ + int16x4_t __s0_578 = __p0_578; \ + int16x4_t __s1_578 = __p1_578; \ + int16x8_t __s2_578 = __p2_578; \ + int16x4_t __rev0_578; __rev0_578 = __builtin_shufflevector(__s0_578, __s0_578, 3, 2, 1, 0); \ + int16x4_t __rev1_578; __rev1_578 = __builtin_shufflevector(__s1_578, __s1_578, 3, 2, 1, 0); \ + int16x8_t __rev2_578; __rev2_578 = __builtin_shufflevector(__s2_578, __s2_578, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_578; \ + __ret_578 = __rev0_578 - __rev1_578 * __noswap_splat_laneq_s16(__rev2_578, __p3_578); \ + __ret_578 = __builtin_shufflevector(__ret_578, __ret_578, 3, 2, 1, 0); \ + __ret_578; \ }) #endif #ifdef __LITTLE_ENDIAN__ -__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __ret; - __ret = __p0 - __p1 * (float64x2_t) {__p2, __p2}; - return __ret; -} -#else -__ai float64x2_t vmlsq_n_f64(float64x2_t __p0, float64x2_t __p1, float64_t __p2) { - float64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - float64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); - float64x2_t __ret; - __ret = __rev0 - __rev1 * (float64x2_t) {__p2, __p2}; - __ret = __builtin_shufflevector(__ret, __ret, 1, 0); - return __ret; -} -#endif - -#ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u32(__p0_487, __p1_487, __p2_487, __p3_487) __extension__ ({ \ - uint64x2_t __s0_487 = __p0_487; \ - uint32x4_t __s1_487 = __p1_487; \ - uint32x2_t __s2_487 = __p2_487; \ - uint64x2_t __ret_487; \ - __ret_487 = __s0_487 - vmull_u32(vget_high_u32(__s1_487), splat_lane_u32(__s2_487, __p3_487)); \ - __ret_487; \ +#define vmlsl_high_lane_u32(__p0_579, __p1_579, __p2_579, __p3_579) __extension__ ({ \ + uint64x2_t __s0_579 = __p0_579; \ + uint32x4_t __s1_579 = __p1_579; \ + uint32x2_t __s2_579 = __p2_579; \ + uint64x2_t __ret_579; \ + __ret_579 = __s0_579 - vmull_u32(vget_high_u32(__s1_579), splat_lane_u32(__s2_579, __p3_579)); \ + __ret_579; \ }) #else -#define vmlsl_high_lane_u32(__p0_488, __p1_488, __p2_488, __p3_488) __extension__ ({ \ - uint64x2_t __s0_488 = __p0_488; \ - uint32x4_t __s1_488 = __p1_488; \ - uint32x2_t __s2_488 = __p2_488; \ - uint64x2_t __rev0_488; __rev0_488 = __builtin_shufflevector(__s0_488, __s0_488, 1, 0); \ - uint32x4_t __rev1_488; __rev1_488 = __builtin_shufflevector(__s1_488, __s1_488, 3, 2, 1, 0); \ - uint32x2_t __rev2_488; __rev2_488 = __builtin_shufflevector(__s2_488, __s2_488, 1, 0); \ - uint64x2_t __ret_488; \ - __ret_488 = __rev0_488 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_488), __noswap_splat_lane_u32(__rev2_488, __p3_488)); \ - __ret_488 = __builtin_shufflevector(__ret_488, __ret_488, 1, 0); \ - __ret_488; \ +#define vmlsl_high_lane_u32(__p0_580, __p1_580, __p2_580, __p3_580) __extension__ ({ \ + uint64x2_t __s0_580 = __p0_580; \ + uint32x4_t __s1_580 = __p1_580; \ + uint32x2_t __s2_580 = __p2_580; \ + uint64x2_t __rev0_580; __rev0_580 = __builtin_shufflevector(__s0_580, __s0_580, 1, 0); \ + uint32x4_t __rev1_580; __rev1_580 = __builtin_shufflevector(__s1_580, __s1_580, 3, 2, 1, 0); \ + uint32x2_t __rev2_580; __rev2_580 = __builtin_shufflevector(__s2_580, __s2_580, 1, 0); \ + uint64x2_t __ret_580; \ + __ret_580 = __rev0_580 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_580), __noswap_splat_lane_u32(__rev2_580, __p3_580)); \ + __ret_580 = __builtin_shufflevector(__ret_580, __ret_580, 1, 0); \ + __ret_580; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_u16(__p0_489, __p1_489, __p2_489, __p3_489) __extension__ ({ \ - uint32x4_t __s0_489 = __p0_489; \ - uint16x8_t __s1_489 = __p1_489; \ - uint16x4_t __s2_489 = __p2_489; \ - uint32x4_t __ret_489; \ - __ret_489 = __s0_489 - vmull_u16(vget_high_u16(__s1_489), splat_lane_u16(__s2_489, __p3_489)); \ - __ret_489; \ +#define vmlsl_high_lane_u16(__p0_581, __p1_581, __p2_581, __p3_581) __extension__ ({ \ + uint32x4_t __s0_581 = __p0_581; \ + uint16x8_t __s1_581 = __p1_581; \ + uint16x4_t __s2_581 = __p2_581; \ + uint32x4_t __ret_581; \ + __ret_581 = __s0_581 - vmull_u16(vget_high_u16(__s1_581), splat_lane_u16(__s2_581, __p3_581)); \ + __ret_581; \ }) #else -#define vmlsl_high_lane_u16(__p0_490, __p1_490, __p2_490, __p3_490) __extension__ ({ \ - uint32x4_t __s0_490 = __p0_490; \ - uint16x8_t __s1_490 = __p1_490; \ - uint16x4_t __s2_490 = __p2_490; \ - uint32x4_t __rev0_490; __rev0_490 = __builtin_shufflevector(__s0_490, __s0_490, 3, 2, 1, 0); \ - uint16x8_t __rev1_490; __rev1_490 = __builtin_shufflevector(__s1_490, __s1_490, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev2_490; __rev2_490 = __builtin_shufflevector(__s2_490, __s2_490, 3, 2, 1, 0); \ - uint32x4_t __ret_490; \ - __ret_490 = __rev0_490 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_490), __noswap_splat_lane_u16(__rev2_490, __p3_490)); \ - __ret_490 = __builtin_shufflevector(__ret_490, __ret_490, 3, 2, 1, 0); \ - __ret_490; \ +#define vmlsl_high_lane_u16(__p0_582, __p1_582, __p2_582, __p3_582) __extension__ ({ \ + uint32x4_t __s0_582 = __p0_582; \ + uint16x8_t __s1_582 = __p1_582; \ + uint16x4_t __s2_582 = __p2_582; \ + uint32x4_t __rev0_582; __rev0_582 = __builtin_shufflevector(__s0_582, __s0_582, 3, 2, 1, 0); \ + uint16x8_t __rev1_582; __rev1_582 = __builtin_shufflevector(__s1_582, __s1_582, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev2_582; __rev2_582 = __builtin_shufflevector(__s2_582, __s2_582, 3, 2, 1, 0); \ + uint32x4_t __ret_582; \ + __ret_582 = __rev0_582 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_582), __noswap_splat_lane_u16(__rev2_582, __p3_582)); \ + __ret_582 = __builtin_shufflevector(__ret_582, __ret_582, 3, 2, 1, 0); \ + __ret_582; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s32(__p0_491, __p1_491, __p2_491, __p3_491) __extension__ ({ \ - int64x2_t __s0_491 = __p0_491; \ - int32x4_t __s1_491 = __p1_491; \ - int32x2_t __s2_491 = __p2_491; \ - int64x2_t __ret_491; \ - __ret_491 = __s0_491 - vmull_s32(vget_high_s32(__s1_491), splat_lane_s32(__s2_491, __p3_491)); \ - __ret_491; \ +#define vmlsl_high_lane_s32(__p0_583, __p1_583, __p2_583, __p3_583) __extension__ ({ \ + int64x2_t __s0_583 = __p0_583; \ + int32x4_t __s1_583 = __p1_583; \ + int32x2_t __s2_583 = __p2_583; \ + int64x2_t __ret_583; \ + __ret_583 = __s0_583 - vmull_s32(vget_high_s32(__s1_583), splat_lane_s32(__s2_583, __p3_583)); \ + __ret_583; \ }) #else -#define vmlsl_high_lane_s32(__p0_492, __p1_492, __p2_492, __p3_492) __extension__ ({ \ - int64x2_t __s0_492 = __p0_492; \ - int32x4_t __s1_492 = __p1_492; \ - int32x2_t __s2_492 = __p2_492; \ - int64x2_t __rev0_492; __rev0_492 = __builtin_shufflevector(__s0_492, __s0_492, 1, 0); \ - int32x4_t __rev1_492; __rev1_492 = __builtin_shufflevector(__s1_492, __s1_492, 3, 2, 1, 0); \ - int32x2_t __rev2_492; __rev2_492 = __builtin_shufflevector(__s2_492, __s2_492, 1, 0); \ - int64x2_t __ret_492; \ - __ret_492 = __rev0_492 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_492), __noswap_splat_lane_s32(__rev2_492, __p3_492)); \ - __ret_492 = __builtin_shufflevector(__ret_492, __ret_492, 1, 0); \ - __ret_492; \ +#define vmlsl_high_lane_s32(__p0_584, __p1_584, __p2_584, __p3_584) __extension__ ({ \ + int64x2_t __s0_584 = __p0_584; \ + int32x4_t __s1_584 = __p1_584; \ + int32x2_t __s2_584 = __p2_584; \ + int64x2_t __rev0_584; __rev0_584 = __builtin_shufflevector(__s0_584, __s0_584, 1, 0); \ + int32x4_t __rev1_584; __rev1_584 = __builtin_shufflevector(__s1_584, __s1_584, 3, 2, 1, 0); \ + int32x2_t __rev2_584; __rev2_584 = __builtin_shufflevector(__s2_584, __s2_584, 1, 0); \ + int64x2_t __ret_584; \ + __ret_584 = __rev0_584 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_584), __noswap_splat_lane_s32(__rev2_584, __p3_584)); \ + __ret_584 = __builtin_shufflevector(__ret_584, __ret_584, 1, 0); \ + __ret_584; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_lane_s16(__p0_493, __p1_493, __p2_493, __p3_493) __extension__ ({ \ - int32x4_t __s0_493 = __p0_493; \ - int16x8_t __s1_493 = __p1_493; \ - int16x4_t __s2_493 = __p2_493; \ - int32x4_t __ret_493; \ - __ret_493 = __s0_493 - vmull_s16(vget_high_s16(__s1_493), splat_lane_s16(__s2_493, __p3_493)); \ - __ret_493; \ +#define vmlsl_high_lane_s16(__p0_585, __p1_585, __p2_585, __p3_585) __extension__ ({ \ + int32x4_t __s0_585 = __p0_585; \ + int16x8_t __s1_585 = __p1_585; \ + int16x4_t __s2_585 = __p2_585; \ + int32x4_t __ret_585; \ + __ret_585 = __s0_585 - vmull_s16(vget_high_s16(__s1_585), splat_lane_s16(__s2_585, __p3_585)); \ + __ret_585; \ }) #else -#define vmlsl_high_lane_s16(__p0_494, __p1_494, __p2_494, __p3_494) __extension__ ({ \ - int32x4_t __s0_494 = __p0_494; \ - int16x8_t __s1_494 = __p1_494; \ - int16x4_t __s2_494 = __p2_494; \ - int32x4_t __rev0_494; __rev0_494 = __builtin_shufflevector(__s0_494, __s0_494, 3, 2, 1, 0); \ - int16x8_t __rev1_494; __rev1_494 = __builtin_shufflevector(__s1_494, __s1_494, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_494; __rev2_494 = __builtin_shufflevector(__s2_494, __s2_494, 3, 2, 1, 0); \ - int32x4_t __ret_494; \ - __ret_494 = __rev0_494 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_494), __noswap_splat_lane_s16(__rev2_494, __p3_494)); \ - __ret_494 = __builtin_shufflevector(__ret_494, __ret_494, 3, 2, 1, 0); \ - __ret_494; \ +#define vmlsl_high_lane_s16(__p0_586, __p1_586, __p2_586, __p3_586) __extension__ ({ \ + int32x4_t __s0_586 = __p0_586; \ + int16x8_t __s1_586 = __p1_586; \ + int16x4_t __s2_586 = __p2_586; \ + int32x4_t __rev0_586; __rev0_586 = __builtin_shufflevector(__s0_586, __s0_586, 3, 2, 1, 0); \ + int16x8_t __rev1_586; __rev1_586 = __builtin_shufflevector(__s1_586, __s1_586, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_586; __rev2_586 = __builtin_shufflevector(__s2_586, __s2_586, 3, 2, 1, 0); \ + int32x4_t __ret_586; \ + __ret_586 = __rev0_586 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_586), __noswap_splat_lane_s16(__rev2_586, __p3_586)); \ + __ret_586 = __builtin_shufflevector(__ret_586, __ret_586, 3, 2, 1, 0); \ + __ret_586; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u32(__p0_495, __p1_495, __p2_495, __p3_495) __extension__ ({ \ - uint64x2_t __s0_495 = __p0_495; \ - uint32x4_t __s1_495 = __p1_495; \ - uint32x4_t __s2_495 = __p2_495; \ - uint64x2_t __ret_495; \ - __ret_495 = __s0_495 - vmull_u32(vget_high_u32(__s1_495), splat_laneq_u32(__s2_495, __p3_495)); \ - __ret_495; \ +#define vmlsl_high_laneq_u32(__p0_587, __p1_587, __p2_587, __p3_587) __extension__ ({ \ + uint64x2_t __s0_587 = __p0_587; \ + uint32x4_t __s1_587 = __p1_587; \ + uint32x4_t __s2_587 = __p2_587; \ + uint64x2_t __ret_587; \ + __ret_587 = __s0_587 - vmull_u32(vget_high_u32(__s1_587), splat_laneq_u32(__s2_587, __p3_587)); \ + __ret_587; \ }) #else -#define vmlsl_high_laneq_u32(__p0_496, __p1_496, __p2_496, __p3_496) __extension__ ({ \ - uint64x2_t __s0_496 = __p0_496; \ - uint32x4_t __s1_496 = __p1_496; \ - uint32x4_t __s2_496 = __p2_496; \ - uint64x2_t __rev0_496; __rev0_496 = __builtin_shufflevector(__s0_496, __s0_496, 1, 0); \ - uint32x4_t __rev1_496; __rev1_496 = __builtin_shufflevector(__s1_496, __s1_496, 3, 2, 1, 0); \ - uint32x4_t __rev2_496; __rev2_496 = __builtin_shufflevector(__s2_496, __s2_496, 3, 2, 1, 0); \ - uint64x2_t __ret_496; \ - __ret_496 = __rev0_496 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_496), __noswap_splat_laneq_u32(__rev2_496, __p3_496)); \ - __ret_496 = __builtin_shufflevector(__ret_496, __ret_496, 1, 0); \ - __ret_496; \ +#define vmlsl_high_laneq_u32(__p0_588, __p1_588, __p2_588, __p3_588) __extension__ ({ \ + uint64x2_t __s0_588 = __p0_588; \ + uint32x4_t __s1_588 = __p1_588; \ + uint32x4_t __s2_588 = __p2_588; \ + uint64x2_t __rev0_588; __rev0_588 = __builtin_shufflevector(__s0_588, __s0_588, 1, 0); \ + uint32x4_t __rev1_588; __rev1_588 = __builtin_shufflevector(__s1_588, __s1_588, 3, 2, 1, 0); \ + uint32x4_t __rev2_588; __rev2_588 = __builtin_shufflevector(__s2_588, __s2_588, 3, 2, 1, 0); \ + uint64x2_t __ret_588; \ + __ret_588 = __rev0_588 - __noswap_vmull_u32(__noswap_vget_high_u32(__rev1_588), __noswap_splat_laneq_u32(__rev2_588, __p3_588)); \ + __ret_588 = __builtin_shufflevector(__ret_588, __ret_588, 1, 0); \ + __ret_588; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_u16(__p0_497, __p1_497, __p2_497, __p3_497) __extension__ ({ \ - uint32x4_t __s0_497 = __p0_497; \ - uint16x8_t __s1_497 = __p1_497; \ - uint16x8_t __s2_497 = __p2_497; \ - uint32x4_t __ret_497; \ - __ret_497 = __s0_497 - vmull_u16(vget_high_u16(__s1_497), splat_laneq_u16(__s2_497, __p3_497)); \ - __ret_497; \ +#define vmlsl_high_laneq_u16(__p0_589, __p1_589, __p2_589, __p3_589) __extension__ ({ \ + uint32x4_t __s0_589 = __p0_589; \ + uint16x8_t __s1_589 = __p1_589; \ + uint16x8_t __s2_589 = __p2_589; \ + uint32x4_t __ret_589; \ + __ret_589 = __s0_589 - vmull_u16(vget_high_u16(__s1_589), splat_laneq_u16(__s2_589, __p3_589)); \ + __ret_589; \ }) #else -#define vmlsl_high_laneq_u16(__p0_498, __p1_498, __p2_498, __p3_498) __extension__ ({ \ - uint32x4_t __s0_498 = __p0_498; \ - uint16x8_t __s1_498 = __p1_498; \ - uint16x8_t __s2_498 = __p2_498; \ - uint32x4_t __rev0_498; __rev0_498 = __builtin_shufflevector(__s0_498, __s0_498, 3, 2, 1, 0); \ - uint16x8_t __rev1_498; __rev1_498 = __builtin_shufflevector(__s1_498, __s1_498, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev2_498; __rev2_498 = __builtin_shufflevector(__s2_498, __s2_498, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_498; \ - __ret_498 = __rev0_498 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_498), __noswap_splat_laneq_u16(__rev2_498, __p3_498)); \ - __ret_498 = __builtin_shufflevector(__ret_498, __ret_498, 3, 2, 1, 0); \ - __ret_498; \ +#define vmlsl_high_laneq_u16(__p0_590, __p1_590, __p2_590, __p3_590) __extension__ ({ \ + uint32x4_t __s0_590 = __p0_590; \ + uint16x8_t __s1_590 = __p1_590; \ + uint16x8_t __s2_590 = __p2_590; \ + uint32x4_t __rev0_590; __rev0_590 = __builtin_shufflevector(__s0_590, __s0_590, 3, 2, 1, 0); \ + uint16x8_t __rev1_590; __rev1_590 = __builtin_shufflevector(__s1_590, __s1_590, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev2_590; __rev2_590 = __builtin_shufflevector(__s2_590, __s2_590, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_590; \ + __ret_590 = __rev0_590 - __noswap_vmull_u16(__noswap_vget_high_u16(__rev1_590), __noswap_splat_laneq_u16(__rev2_590, __p3_590)); \ + __ret_590 = __builtin_shufflevector(__ret_590, __ret_590, 3, 2, 1, 0); \ + __ret_590; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s32(__p0_499, __p1_499, __p2_499, __p3_499) __extension__ ({ \ - int64x2_t __s0_499 = __p0_499; \ - int32x4_t __s1_499 = __p1_499; \ - int32x4_t __s2_499 = __p2_499; \ - int64x2_t __ret_499; \ - __ret_499 = __s0_499 - vmull_s32(vget_high_s32(__s1_499), splat_laneq_s32(__s2_499, __p3_499)); \ - __ret_499; \ +#define vmlsl_high_laneq_s32(__p0_591, __p1_591, __p2_591, __p3_591) __extension__ ({ \ + int64x2_t __s0_591 = __p0_591; \ + int32x4_t __s1_591 = __p1_591; \ + int32x4_t __s2_591 = __p2_591; \ + int64x2_t __ret_591; \ + __ret_591 = __s0_591 - vmull_s32(vget_high_s32(__s1_591), splat_laneq_s32(__s2_591, __p3_591)); \ + __ret_591; \ }) #else -#define vmlsl_high_laneq_s32(__p0_500, __p1_500, __p2_500, __p3_500) __extension__ ({ \ - int64x2_t __s0_500 = __p0_500; \ - int32x4_t __s1_500 = __p1_500; \ - int32x4_t __s2_500 = __p2_500; \ - int64x2_t __rev0_500; __rev0_500 = __builtin_shufflevector(__s0_500, __s0_500, 1, 0); \ - int32x4_t __rev1_500; __rev1_500 = __builtin_shufflevector(__s1_500, __s1_500, 3, 2, 1, 0); \ - int32x4_t __rev2_500; __rev2_500 = __builtin_shufflevector(__s2_500, __s2_500, 3, 2, 1, 0); \ - int64x2_t __ret_500; \ - __ret_500 = __rev0_500 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_500), __noswap_splat_laneq_s32(__rev2_500, __p3_500)); \ - __ret_500 = __builtin_shufflevector(__ret_500, __ret_500, 1, 0); \ - __ret_500; \ +#define vmlsl_high_laneq_s32(__p0_592, __p1_592, __p2_592, __p3_592) __extension__ ({ \ + int64x2_t __s0_592 = __p0_592; \ + int32x4_t __s1_592 = __p1_592; \ + int32x4_t __s2_592 = __p2_592; \ + int64x2_t __rev0_592; __rev0_592 = __builtin_shufflevector(__s0_592, __s0_592, 1, 0); \ + int32x4_t __rev1_592; __rev1_592 = __builtin_shufflevector(__s1_592, __s1_592, 3, 2, 1, 0); \ + int32x4_t __rev2_592; __rev2_592 = __builtin_shufflevector(__s2_592, __s2_592, 3, 2, 1, 0); \ + int64x2_t __ret_592; \ + __ret_592 = __rev0_592 - __noswap_vmull_s32(__noswap_vget_high_s32(__rev1_592), __noswap_splat_laneq_s32(__rev2_592, __p3_592)); \ + __ret_592 = __builtin_shufflevector(__ret_592, __ret_592, 1, 0); \ + __ret_592; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_high_laneq_s16(__p0_501, __p1_501, __p2_501, __p3_501) __extension__ ({ \ - int32x4_t __s0_501 = __p0_501; \ - int16x8_t __s1_501 = __p1_501; \ - int16x8_t __s2_501 = __p2_501; \ - int32x4_t __ret_501; \ - __ret_501 = __s0_501 - vmull_s16(vget_high_s16(__s1_501), splat_laneq_s16(__s2_501, __p3_501)); \ - __ret_501; \ +#define vmlsl_high_laneq_s16(__p0_593, __p1_593, __p2_593, __p3_593) __extension__ ({ \ + int32x4_t __s0_593 = __p0_593; \ + int16x8_t __s1_593 = __p1_593; \ + int16x8_t __s2_593 = __p2_593; \ + int32x4_t __ret_593; \ + __ret_593 = __s0_593 - vmull_s16(vget_high_s16(__s1_593), splat_laneq_s16(__s2_593, __p3_593)); \ + __ret_593; \ }) #else -#define vmlsl_high_laneq_s16(__p0_502, __p1_502, __p2_502, __p3_502) __extension__ ({ \ - int32x4_t __s0_502 = __p0_502; \ - int16x8_t __s1_502 = __p1_502; \ - int16x8_t __s2_502 = __p2_502; \ - int32x4_t __rev0_502; __rev0_502 = __builtin_shufflevector(__s0_502, __s0_502, 3, 2, 1, 0); \ - int16x8_t __rev1_502; __rev1_502 = __builtin_shufflevector(__s1_502, __s1_502, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_502; __rev2_502 = __builtin_shufflevector(__s2_502, __s2_502, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_502; \ - __ret_502 = __rev0_502 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_502), __noswap_splat_laneq_s16(__rev2_502, __p3_502)); \ - __ret_502 = __builtin_shufflevector(__ret_502, __ret_502, 3, 2, 1, 0); \ - __ret_502; \ +#define vmlsl_high_laneq_s16(__p0_594, __p1_594, __p2_594, __p3_594) __extension__ ({ \ + int32x4_t __s0_594 = __p0_594; \ + int16x8_t __s1_594 = __p1_594; \ + int16x8_t __s2_594 = __p2_594; \ + int32x4_t __rev0_594; __rev0_594 = __builtin_shufflevector(__s0_594, __s0_594, 3, 2, 1, 0); \ + int16x8_t __rev1_594; __rev1_594 = __builtin_shufflevector(__s1_594, __s1_594, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_594; __rev2_594 = __builtin_shufflevector(__s2_594, __s2_594, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_594; \ + __ret_594 = __rev0_594 - __noswap_vmull_s16(__noswap_vget_high_s16(__rev1_594), __noswap_splat_laneq_s16(__rev2_594, __p3_594)); \ + __ret_594 = __builtin_shufflevector(__ret_594, __ret_594, 3, 2, 1, 0); \ + __ret_594; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u32(__p0_503, __p1_503, __p2_503, __p3_503) __extension__ ({ \ - uint64x2_t __s0_503 = __p0_503; \ - uint32x2_t __s1_503 = __p1_503; \ - uint32x4_t __s2_503 = __p2_503; \ - uint64x2_t __ret_503; \ - __ret_503 = __s0_503 - vmull_u32(__s1_503, splat_laneq_u32(__s2_503, __p3_503)); \ - __ret_503; \ +#define vmlsl_laneq_u32(__p0_595, __p1_595, __p2_595, __p3_595) __extension__ ({ \ + uint64x2_t __s0_595 = __p0_595; \ + uint32x2_t __s1_595 = __p1_595; \ + uint32x4_t __s2_595 = __p2_595; \ + uint64x2_t __ret_595; \ + __ret_595 = __s0_595 - vmull_u32(__s1_595, splat_laneq_u32(__s2_595, __p3_595)); \ + __ret_595; \ }) #else -#define vmlsl_laneq_u32(__p0_504, __p1_504, __p2_504, __p3_504) __extension__ ({ \ - uint64x2_t __s0_504 = __p0_504; \ - uint32x2_t __s1_504 = __p1_504; \ - uint32x4_t __s2_504 = __p2_504; \ - uint64x2_t __rev0_504; __rev0_504 = __builtin_shufflevector(__s0_504, __s0_504, 1, 0); \ - uint32x2_t __rev1_504; __rev1_504 = __builtin_shufflevector(__s1_504, __s1_504, 1, 0); \ - uint32x4_t __rev2_504; __rev2_504 = __builtin_shufflevector(__s2_504, __s2_504, 3, 2, 1, 0); \ - uint64x2_t __ret_504; \ - __ret_504 = __rev0_504 - __noswap_vmull_u32(__rev1_504, __noswap_splat_laneq_u32(__rev2_504, __p3_504)); \ - __ret_504 = __builtin_shufflevector(__ret_504, __ret_504, 1, 0); \ - __ret_504; \ +#define vmlsl_laneq_u32(__p0_596, __p1_596, __p2_596, __p3_596) __extension__ ({ \ + uint64x2_t __s0_596 = __p0_596; \ + uint32x2_t __s1_596 = __p1_596; \ + uint32x4_t __s2_596 = __p2_596; \ + uint64x2_t __rev0_596; __rev0_596 = __builtin_shufflevector(__s0_596, __s0_596, 1, 0); \ + uint32x2_t __rev1_596; __rev1_596 = __builtin_shufflevector(__s1_596, __s1_596, 1, 0); \ + uint32x4_t __rev2_596; __rev2_596 = __builtin_shufflevector(__s2_596, __s2_596, 3, 2, 1, 0); \ + uint64x2_t __ret_596; \ + __ret_596 = __rev0_596 - __noswap_vmull_u32(__rev1_596, __noswap_splat_laneq_u32(__rev2_596, __p3_596)); \ + __ret_596 = __builtin_shufflevector(__ret_596, __ret_596, 1, 0); \ + __ret_596; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_u16(__p0_505, __p1_505, __p2_505, __p3_505) __extension__ ({ \ - uint32x4_t __s0_505 = __p0_505; \ - uint16x4_t __s1_505 = __p1_505; \ - uint16x8_t __s2_505 = __p2_505; \ - uint32x4_t __ret_505; \ - __ret_505 = __s0_505 - vmull_u16(__s1_505, splat_laneq_u16(__s2_505, __p3_505)); \ - __ret_505; \ +#define vmlsl_laneq_u16(__p0_597, __p1_597, __p2_597, __p3_597) __extension__ ({ \ + uint32x4_t __s0_597 = __p0_597; \ + uint16x4_t __s1_597 = __p1_597; \ + uint16x8_t __s2_597 = __p2_597; \ + uint32x4_t __ret_597; \ + __ret_597 = __s0_597 - vmull_u16(__s1_597, splat_laneq_u16(__s2_597, __p3_597)); \ + __ret_597; \ }) #else -#define vmlsl_laneq_u16(__p0_506, __p1_506, __p2_506, __p3_506) __extension__ ({ \ - uint32x4_t __s0_506 = __p0_506; \ - uint16x4_t __s1_506 = __p1_506; \ - uint16x8_t __s2_506 = __p2_506; \ - uint32x4_t __rev0_506; __rev0_506 = __builtin_shufflevector(__s0_506, __s0_506, 3, 2, 1, 0); \ - uint16x4_t __rev1_506; __rev1_506 = __builtin_shufflevector(__s1_506, __s1_506, 3, 2, 1, 0); \ - uint16x8_t __rev2_506; __rev2_506 = __builtin_shufflevector(__s2_506, __s2_506, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_506; \ - __ret_506 = __rev0_506 - __noswap_vmull_u16(__rev1_506, __noswap_splat_laneq_u16(__rev2_506, __p3_506)); \ - __ret_506 = __builtin_shufflevector(__ret_506, __ret_506, 3, 2, 1, 0); \ - __ret_506; \ +#define vmlsl_laneq_u16(__p0_598, __p1_598, __p2_598, __p3_598) __extension__ ({ \ + uint32x4_t __s0_598 = __p0_598; \ + uint16x4_t __s1_598 = __p1_598; \ + uint16x8_t __s2_598 = __p2_598; \ + uint32x4_t __rev0_598; __rev0_598 = __builtin_shufflevector(__s0_598, __s0_598, 3, 2, 1, 0); \ + uint16x4_t __rev1_598; __rev1_598 = __builtin_shufflevector(__s1_598, __s1_598, 3, 2, 1, 0); \ + uint16x8_t __rev2_598; __rev2_598 = __builtin_shufflevector(__s2_598, __s2_598, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_598; \ + __ret_598 = __rev0_598 - __noswap_vmull_u16(__rev1_598, __noswap_splat_laneq_u16(__rev2_598, __p3_598)); \ + __ret_598 = __builtin_shufflevector(__ret_598, __ret_598, 3, 2, 1, 0); \ + __ret_598; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s32(__p0_507, __p1_507, __p2_507, __p3_507) __extension__ ({ \ - int64x2_t __s0_507 = __p0_507; \ - int32x2_t __s1_507 = __p1_507; \ - int32x4_t __s2_507 = __p2_507; \ - int64x2_t __ret_507; \ - __ret_507 = __s0_507 - vmull_s32(__s1_507, splat_laneq_s32(__s2_507, __p3_507)); \ - __ret_507; \ +#define vmlsl_laneq_s32(__p0_599, __p1_599, __p2_599, __p3_599) __extension__ ({ \ + int64x2_t __s0_599 = __p0_599; \ + int32x2_t __s1_599 = __p1_599; \ + int32x4_t __s2_599 = __p2_599; \ + int64x2_t __ret_599; \ + __ret_599 = __s0_599 - vmull_s32(__s1_599, splat_laneq_s32(__s2_599, __p3_599)); \ + __ret_599; \ }) #else -#define vmlsl_laneq_s32(__p0_508, __p1_508, __p2_508, __p3_508) __extension__ ({ \ - int64x2_t __s0_508 = __p0_508; \ - int32x2_t __s1_508 = __p1_508; \ - int32x4_t __s2_508 = __p2_508; \ - int64x2_t __rev0_508; __rev0_508 = __builtin_shufflevector(__s0_508, __s0_508, 1, 0); \ - int32x2_t __rev1_508; __rev1_508 = __builtin_shufflevector(__s1_508, __s1_508, 1, 0); \ - int32x4_t __rev2_508; __rev2_508 = __builtin_shufflevector(__s2_508, __s2_508, 3, 2, 1, 0); \ - int64x2_t __ret_508; \ - __ret_508 = __rev0_508 - __noswap_vmull_s32(__rev1_508, __noswap_splat_laneq_s32(__rev2_508, __p3_508)); \ - __ret_508 = __builtin_shufflevector(__ret_508, __ret_508, 1, 0); \ - __ret_508; \ +#define vmlsl_laneq_s32(__p0_600, __p1_600, __p2_600, __p3_600) __extension__ ({ \ + int64x2_t __s0_600 = __p0_600; \ + int32x2_t __s1_600 = __p1_600; \ + int32x4_t __s2_600 = __p2_600; \ + int64x2_t __rev0_600; __rev0_600 = __builtin_shufflevector(__s0_600, __s0_600, 1, 0); \ + int32x2_t __rev1_600; __rev1_600 = __builtin_shufflevector(__s1_600, __s1_600, 1, 0); \ + int32x4_t __rev2_600; __rev2_600 = __builtin_shufflevector(__s2_600, __s2_600, 3, 2, 1, 0); \ + int64x2_t __ret_600; \ + __ret_600 = __rev0_600 - __noswap_vmull_s32(__rev1_600, __noswap_splat_laneq_s32(__rev2_600, __p3_600)); \ + __ret_600 = __builtin_shufflevector(__ret_600, __ret_600, 1, 0); \ + __ret_600; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_laneq_s16(__p0_509, __p1_509, __p2_509, __p3_509) __extension__ ({ \ - int32x4_t __s0_509 = __p0_509; \ - int16x4_t __s1_509 = __p1_509; \ - int16x8_t __s2_509 = __p2_509; \ - int32x4_t __ret_509; \ - __ret_509 = __s0_509 - vmull_s16(__s1_509, splat_laneq_s16(__s2_509, __p3_509)); \ - __ret_509; \ +#define vmlsl_laneq_s16(__p0_601, __p1_601, __p2_601, __p3_601) __extension__ ({ \ + int32x4_t __s0_601 = __p0_601; \ + int16x4_t __s1_601 = __p1_601; \ + int16x8_t __s2_601 = __p2_601; \ + int32x4_t __ret_601; \ + __ret_601 = __s0_601 - vmull_s16(__s1_601, splat_laneq_s16(__s2_601, __p3_601)); \ + __ret_601; \ }) #else -#define vmlsl_laneq_s16(__p0_510, __p1_510, __p2_510, __p3_510) __extension__ ({ \ - int32x4_t __s0_510 = __p0_510; \ - int16x4_t __s1_510 = __p1_510; \ - int16x8_t __s2_510 = __p2_510; \ - int32x4_t __rev0_510; __rev0_510 = __builtin_shufflevector(__s0_510, __s0_510, 3, 2, 1, 0); \ - int16x4_t __rev1_510; __rev1_510 = __builtin_shufflevector(__s1_510, __s1_510, 3, 2, 1, 0); \ - int16x8_t __rev2_510; __rev2_510 = __builtin_shufflevector(__s2_510, __s2_510, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_510; \ - __ret_510 = __rev0_510 - __noswap_vmull_s16(__rev1_510, __noswap_splat_laneq_s16(__rev2_510, __p3_510)); \ - __ret_510 = __builtin_shufflevector(__ret_510, __ret_510, 3, 2, 1, 0); \ - __ret_510; \ +#define vmlsl_laneq_s16(__p0_602, __p1_602, __p2_602, __p3_602) __extension__ ({ \ + int32x4_t __s0_602 = __p0_602; \ + int16x4_t __s1_602 = __p1_602; \ + int16x8_t __s2_602 = __p2_602; \ + int32x4_t __rev0_602; __rev0_602 = __builtin_shufflevector(__s0_602, __s0_602, 3, 2, 1, 0); \ + int16x4_t __rev1_602; __rev1_602 = __builtin_shufflevector(__s1_602, __s1_602, 3, 2, 1, 0); \ + int16x8_t __rev2_602; __rev2_602 = __builtin_shufflevector(__s2_602, __s2_602, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_602; \ + __ret_602 = __rev0_602 - __noswap_vmull_s16(__rev1_602, __noswap_splat_laneq_s16(__rev2_602, __p3_602)); \ + __ret_602 = __builtin_shufflevector(__ret_602, __ret_602, 3, 2, 1, 0); \ + __ret_602; \ }) #endif @@ -53701,146 +55371,146 @@ __ai float64x1_t vmov_n_f64(float64_t __p0) { return __ret; } #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_511) { - uint16x8_t __ret_511; - uint8x8_t __a1_511 = vget_high_u8(__p0_511); - __ret_511 = (uint16x8_t)(vshll_n_u8(__a1_511, 0)); - return __ret_511; +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_603) { + uint16x8_t __ret_603; + uint8x8_t __a1_603 = vget_high_u8(__p0_603); + __ret_603 = (uint16x8_t)(vshll_n_u8(__a1_603, 0)); + return __ret_603; } #else -__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_512) { - uint8x16_t __rev0_512; __rev0_512 = __builtin_shufflevector(__p0_512, __p0_512, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __ret_512; - uint8x8_t __a1_512 = __noswap_vget_high_u8(__rev0_512); - __ret_512 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_512, 0)); - __ret_512 = __builtin_shufflevector(__ret_512, __ret_512, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_512; +__ai uint16x8_t vmovl_high_u8(uint8x16_t __p0_604) { + uint8x16_t __rev0_604; __rev0_604 = __builtin_shufflevector(__p0_604, __p0_604, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __ret_604; + uint8x8_t __a1_604 = __noswap_vget_high_u8(__rev0_604); + __ret_604 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_604, 0)); + __ret_604 = __builtin_shufflevector(__ret_604, __ret_604, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret_604; } -__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_513) { - uint16x8_t __ret_513; - uint8x8_t __a1_513 = __noswap_vget_high_u8(__p0_513); - __ret_513 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_513, 0)); - return __ret_513; +__ai uint16x8_t __noswap_vmovl_high_u8(uint8x16_t __p0_605) { + uint16x8_t __ret_605; + uint8x8_t __a1_605 = __noswap_vget_high_u8(__p0_605); + __ret_605 = (uint16x8_t)(__noswap_vshll_n_u8(__a1_605, 0)); + return __ret_605; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_514) { - uint64x2_t __ret_514; - uint32x2_t __a1_514 = vget_high_u32(__p0_514); - __ret_514 = (uint64x2_t)(vshll_n_u32(__a1_514, 0)); - return __ret_514; +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_606) { + uint64x2_t __ret_606; + uint32x2_t __a1_606 = vget_high_u32(__p0_606); + __ret_606 = (uint64x2_t)(vshll_n_u32(__a1_606, 0)); + return __ret_606; } #else -__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_515) { - uint32x4_t __rev0_515; __rev0_515 = __builtin_shufflevector(__p0_515, __p0_515, 3, 2, 1, 0); - uint64x2_t __ret_515; - uint32x2_t __a1_515 = __noswap_vget_high_u32(__rev0_515); - __ret_515 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_515, 0)); - __ret_515 = __builtin_shufflevector(__ret_515, __ret_515, 1, 0); - return __ret_515; +__ai uint64x2_t vmovl_high_u32(uint32x4_t __p0_607) { + uint32x4_t __rev0_607; __rev0_607 = __builtin_shufflevector(__p0_607, __p0_607, 3, 2, 1, 0); + uint64x2_t __ret_607; + uint32x2_t __a1_607 = __noswap_vget_high_u32(__rev0_607); + __ret_607 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_607, 0)); + __ret_607 = __builtin_shufflevector(__ret_607, __ret_607, 1, 0); + return __ret_607; } -__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_516) { - uint64x2_t __ret_516; - uint32x2_t __a1_516 = __noswap_vget_high_u32(__p0_516); - __ret_516 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_516, 0)); - return __ret_516; +__ai uint64x2_t __noswap_vmovl_high_u32(uint32x4_t __p0_608) { + uint64x2_t __ret_608; + uint32x2_t __a1_608 = __noswap_vget_high_u32(__p0_608); + __ret_608 = (uint64x2_t)(__noswap_vshll_n_u32(__a1_608, 0)); + return __ret_608; } #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_517) { - uint32x4_t __ret_517; - uint16x4_t __a1_517 = vget_high_u16(__p0_517); - __ret_517 = (uint32x4_t)(vshll_n_u16(__a1_517, 0)); - return __ret_517; +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_609) { + uint32x4_t __ret_609; + uint16x4_t __a1_609 = vget_high_u16(__p0_609); + __ret_609 = (uint32x4_t)(vshll_n_u16(__a1_609, 0)); + return __ret_609; } #else -__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_518) { - uint16x8_t __rev0_518; __rev0_518 = __builtin_shufflevector(__p0_518, __p0_518, 7, 6, 5, 4, 3, 2, 1, 0); - uint32x4_t __ret_518; - uint16x4_t __a1_518 = __noswap_vget_high_u16(__rev0_518); - __ret_518 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_518, 0)); - __ret_518 = __builtin_shufflevector(__ret_518, __ret_518, 3, 2, 1, 0); - return __ret_518; +__ai uint32x4_t vmovl_high_u16(uint16x8_t __p0_610) { + uint16x8_t __rev0_610; __rev0_610 = __builtin_shufflevector(__p0_610, __p0_610, 7, 6, 5, 4, 3, 2, 1, 0); + uint32x4_t __ret_610; + uint16x4_t __a1_610 = __noswap_vget_high_u16(__rev0_610); + __ret_610 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_610, 0)); + __ret_610 = __builtin_shufflevector(__ret_610, __ret_610, 3, 2, 1, 0); + return __ret_610; } -__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_519) { - uint32x4_t __ret_519; - uint16x4_t __a1_519 = __noswap_vget_high_u16(__p0_519); - __ret_519 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_519, 0)); - return __ret_519; +__ai uint32x4_t __noswap_vmovl_high_u16(uint16x8_t __p0_611) { + uint32x4_t __ret_611; + uint16x4_t __a1_611 = __noswap_vget_high_u16(__p0_611); + __ret_611 = (uint32x4_t)(__noswap_vshll_n_u16(__a1_611, 0)); + return __ret_611; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_520) { - int16x8_t __ret_520; - int8x8_t __a1_520 = vget_high_s8(__p0_520); - __ret_520 = (int16x8_t)(vshll_n_s8(__a1_520, 0)); - return __ret_520; +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_612) { + int16x8_t __ret_612; + int8x8_t __a1_612 = vget_high_s8(__p0_612); + __ret_612 = (int16x8_t)(vshll_n_s8(__a1_612, 0)); + return __ret_612; } #else -__ai int16x8_t vmovl_high_s8(int8x16_t __p0_521) { - int8x16_t __rev0_521; __rev0_521 = __builtin_shufflevector(__p0_521, __p0_521, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __ret_521; - int8x8_t __a1_521 = __noswap_vget_high_s8(__rev0_521); - __ret_521 = (int16x8_t)(__noswap_vshll_n_s8(__a1_521, 0)); - __ret_521 = __builtin_shufflevector(__ret_521, __ret_521, 7, 6, 5, 4, 3, 2, 1, 0); - return __ret_521; +__ai int16x8_t vmovl_high_s8(int8x16_t __p0_613) { + int8x16_t __rev0_613; __rev0_613 = __builtin_shufflevector(__p0_613, __p0_613, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __ret_613; + int8x8_t __a1_613 = __noswap_vget_high_s8(__rev0_613); + __ret_613 = (int16x8_t)(__noswap_vshll_n_s8(__a1_613, 0)); + __ret_613 = __builtin_shufflevector(__ret_613, __ret_613, 7, 6, 5, 4, 3, 2, 1, 0); + return __ret_613; } -__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_522) { - int16x8_t __ret_522; - int8x8_t __a1_522 = __noswap_vget_high_s8(__p0_522); - __ret_522 = (int16x8_t)(__noswap_vshll_n_s8(__a1_522, 0)); - return __ret_522; +__ai int16x8_t __noswap_vmovl_high_s8(int8x16_t __p0_614) { + int16x8_t __ret_614; + int8x8_t __a1_614 = __noswap_vget_high_s8(__p0_614); + __ret_614 = (int16x8_t)(__noswap_vshll_n_s8(__a1_614, 0)); + return __ret_614; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_523) { - int64x2_t __ret_523; - int32x2_t __a1_523 = vget_high_s32(__p0_523); - __ret_523 = (int64x2_t)(vshll_n_s32(__a1_523, 0)); - return __ret_523; +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_615) { + int64x2_t __ret_615; + int32x2_t __a1_615 = vget_high_s32(__p0_615); + __ret_615 = (int64x2_t)(vshll_n_s32(__a1_615, 0)); + return __ret_615; } #else -__ai int64x2_t vmovl_high_s32(int32x4_t __p0_524) { - int32x4_t __rev0_524; __rev0_524 = __builtin_shufflevector(__p0_524, __p0_524, 3, 2, 1, 0); - int64x2_t __ret_524; - int32x2_t __a1_524 = __noswap_vget_high_s32(__rev0_524); - __ret_524 = (int64x2_t)(__noswap_vshll_n_s32(__a1_524, 0)); - __ret_524 = __builtin_shufflevector(__ret_524, __ret_524, 1, 0); - return __ret_524; +__ai int64x2_t vmovl_high_s32(int32x4_t __p0_616) { + int32x4_t __rev0_616; __rev0_616 = __builtin_shufflevector(__p0_616, __p0_616, 3, 2, 1, 0); + int64x2_t __ret_616; + int32x2_t __a1_616 = __noswap_vget_high_s32(__rev0_616); + __ret_616 = (int64x2_t)(__noswap_vshll_n_s32(__a1_616, 0)); + __ret_616 = __builtin_shufflevector(__ret_616, __ret_616, 1, 0); + return __ret_616; } -__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_525) { - int64x2_t __ret_525; - int32x2_t __a1_525 = __noswap_vget_high_s32(__p0_525); - __ret_525 = (int64x2_t)(__noswap_vshll_n_s32(__a1_525, 0)); - return __ret_525; +__ai int64x2_t __noswap_vmovl_high_s32(int32x4_t __p0_617) { + int64x2_t __ret_617; + int32x2_t __a1_617 = __noswap_vget_high_s32(__p0_617); + __ret_617 = (int64x2_t)(__noswap_vshll_n_s32(__a1_617, 0)); + return __ret_617; } #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_526) { - int32x4_t __ret_526; - int16x4_t __a1_526 = vget_high_s16(__p0_526); - __ret_526 = (int32x4_t)(vshll_n_s16(__a1_526, 0)); - return __ret_526; +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_618) { + int32x4_t __ret_618; + int16x4_t __a1_618 = vget_high_s16(__p0_618); + __ret_618 = (int32x4_t)(vshll_n_s16(__a1_618, 0)); + return __ret_618; } #else -__ai int32x4_t vmovl_high_s16(int16x8_t __p0_527) { - int16x8_t __rev0_527; __rev0_527 = __builtin_shufflevector(__p0_527, __p0_527, 7, 6, 5, 4, 3, 2, 1, 0); - int32x4_t __ret_527; - int16x4_t __a1_527 = __noswap_vget_high_s16(__rev0_527); - __ret_527 = (int32x4_t)(__noswap_vshll_n_s16(__a1_527, 0)); - __ret_527 = __builtin_shufflevector(__ret_527, __ret_527, 3, 2, 1, 0); - return __ret_527; +__ai int32x4_t vmovl_high_s16(int16x8_t __p0_619) { + int16x8_t __rev0_619; __rev0_619 = __builtin_shufflevector(__p0_619, __p0_619, 7, 6, 5, 4, 3, 2, 1, 0); + int32x4_t __ret_619; + int16x4_t __a1_619 = __noswap_vget_high_s16(__rev0_619); + __ret_619 = (int32x4_t)(__noswap_vshll_n_s16(__a1_619, 0)); + __ret_619 = __builtin_shufflevector(__ret_619, __ret_619, 3, 2, 1, 0); + return __ret_619; } -__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_528) { - int32x4_t __ret_528; - int16x4_t __a1_528 = __noswap_vget_high_s16(__p0_528); - __ret_528 = (int32x4_t)(__noswap_vshll_n_s16(__a1_528, 0)); - return __ret_528; +__ai int32x4_t __noswap_vmovl_high_s16(int16x8_t __p0_620) { + int32x4_t __ret_620; + int16x4_t __a1_620 = __noswap_vget_high_s16(__p0_620); + __ret_620 = (int32x4_t)(__noswap_vshll_n_s16(__a1_620, 0)); + return __ret_620; } #endif @@ -53968,29 +55638,29 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { __ret = __p0 * __p1; return __ret; } -#define vmuld_lane_f64(__p0_529, __p1_529, __p2_529) __extension__ ({ \ - float64_t __s0_529 = __p0_529; \ - float64x1_t __s1_529 = __p1_529; \ - float64_t __ret_529; \ - __ret_529 = __s0_529 * vget_lane_f64(__s1_529, __p2_529); \ - __ret_529; \ +#define vmuld_lane_f64(__p0_621, __p1_621, __p2_621) __extension__ ({ \ + float64_t __s0_621 = __p0_621; \ + float64x1_t __s1_621 = __p1_621; \ + float64_t __ret_621; \ + __ret_621 = __s0_621 * vget_lane_f64(__s1_621, __p2_621); \ + __ret_621; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmuls_lane_f32(__p0_530, __p1_530, __p2_530) __extension__ ({ \ - float32_t __s0_530 = __p0_530; \ - float32x2_t __s1_530 = __p1_530; \ - float32_t __ret_530; \ - __ret_530 = __s0_530 * vget_lane_f32(__s1_530, __p2_530); \ - __ret_530; \ +#define vmuls_lane_f32(__p0_622, __p1_622, __p2_622) __extension__ ({ \ + float32_t __s0_622 = __p0_622; \ + float32x2_t __s1_622 = __p1_622; \ + float32_t __ret_622; \ + __ret_622 = __s0_622 * vget_lane_f32(__s1_622, __p2_622); \ + __ret_622; \ }) #else -#define vmuls_lane_f32(__p0_531, __p1_531, __p2_531) __extension__ ({ \ - float32_t __s0_531 = __p0_531; \ - float32x2_t __s1_531 = __p1_531; \ - float32x2_t __rev1_531; __rev1_531 = __builtin_shufflevector(__s1_531, __s1_531, 1, 0); \ - float32_t __ret_531; \ - __ret_531 = __s0_531 * __noswap_vget_lane_f32(__rev1_531, __p2_531); \ - __ret_531; \ +#define vmuls_lane_f32(__p0_623, __p1_623, __p2_623) __extension__ ({ \ + float32_t __s0_623 = __p0_623; \ + float32x2_t __s1_623 = __p1_623; \ + float32x2_t __rev1_623; __rev1_623 = __builtin_shufflevector(__s1_623, __s1_623, 1, 0); \ + float32_t __ret_623; \ + __ret_623 = __s0_623 * __noswap_vget_lane_f32(__rev1_623, __p2_623); \ + __ret_623; \ }) #endif @@ -54002,60 +55672,60 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulq_lane_f64(__p0_532, __p1_532, __p2_532) __extension__ ({ \ - float64x2_t __s0_532 = __p0_532; \ - float64x1_t __s1_532 = __p1_532; \ - float64x2_t __ret_532; \ - __ret_532 = __s0_532 * splatq_lane_f64(__s1_532, __p2_532); \ - __ret_532; \ +#define vmulq_lane_f64(__p0_624, __p1_624, __p2_624) __extension__ ({ \ + float64x2_t __s0_624 = __p0_624; \ + float64x1_t __s1_624 = __p1_624; \ + float64x2_t __ret_624; \ + __ret_624 = __s0_624 * splatq_lane_f64(__s1_624, __p2_624); \ + __ret_624; \ }) #else -#define vmulq_lane_f64(__p0_533, __p1_533, __p2_533) __extension__ ({ \ - float64x2_t __s0_533 = __p0_533; \ - float64x1_t __s1_533 = __p1_533; \ - float64x2_t __rev0_533; __rev0_533 = __builtin_shufflevector(__s0_533, __s0_533, 1, 0); \ - float64x2_t __ret_533; \ - __ret_533 = __rev0_533 * __noswap_splatq_lane_f64(__s1_533, __p2_533); \ - __ret_533 = __builtin_shufflevector(__ret_533, __ret_533, 1, 0); \ - __ret_533; \ +#define vmulq_lane_f64(__p0_625, __p1_625, __p2_625) __extension__ ({ \ + float64x2_t __s0_625 = __p0_625; \ + float64x1_t __s1_625 = __p1_625; \ + float64x2_t __rev0_625; __rev0_625 = __builtin_shufflevector(__s0_625, __s0_625, 1, 0); \ + float64x2_t __ret_625; \ + __ret_625 = __rev0_625 * __noswap_splatq_lane_f64(__s1_625, __p2_625); \ + __ret_625 = __builtin_shufflevector(__ret_625, __ret_625, 1, 0); \ + __ret_625; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmuld_laneq_f64(__p0_534, __p1_534, __p2_534) __extension__ ({ \ - float64_t __s0_534 = __p0_534; \ - float64x2_t __s1_534 = __p1_534; \ - float64_t __ret_534; \ - __ret_534 = __s0_534 * vgetq_lane_f64(__s1_534, __p2_534); \ - __ret_534; \ +#define vmuld_laneq_f64(__p0_626, __p1_626, __p2_626) __extension__ ({ \ + float64_t __s0_626 = __p0_626; \ + float64x2_t __s1_626 = __p1_626; \ + float64_t __ret_626; \ + __ret_626 = __s0_626 * vgetq_lane_f64(__s1_626, __p2_626); \ + __ret_626; \ }) #else -#define vmuld_laneq_f64(__p0_535, __p1_535, __p2_535) __extension__ ({ \ - float64_t __s0_535 = __p0_535; \ - float64x2_t __s1_535 = __p1_535; \ - float64x2_t __rev1_535; __rev1_535 = __builtin_shufflevector(__s1_535, __s1_535, 1, 0); \ - float64_t __ret_535; \ - __ret_535 = __s0_535 * __noswap_vgetq_lane_f64(__rev1_535, __p2_535); \ - __ret_535; \ +#define vmuld_laneq_f64(__p0_627, __p1_627, __p2_627) __extension__ ({ \ + float64_t __s0_627 = __p0_627; \ + float64x2_t __s1_627 = __p1_627; \ + float64x2_t __rev1_627; __rev1_627 = __builtin_shufflevector(__s1_627, __s1_627, 1, 0); \ + float64_t __ret_627; \ + __ret_627 = __s0_627 * __noswap_vgetq_lane_f64(__rev1_627, __p2_627); \ + __ret_627; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmuls_laneq_f32(__p0_536, __p1_536, __p2_536) __extension__ ({ \ - float32_t __s0_536 = __p0_536; \ - float32x4_t __s1_536 = __p1_536; \ - float32_t __ret_536; \ - __ret_536 = __s0_536 * vgetq_lane_f32(__s1_536, __p2_536); \ - __ret_536; \ +#define vmuls_laneq_f32(__p0_628, __p1_628, __p2_628) __extension__ ({ \ + float32_t __s0_628 = __p0_628; \ + float32x4_t __s1_628 = __p1_628; \ + float32_t __ret_628; \ + __ret_628 = __s0_628 * vgetq_lane_f32(__s1_628, __p2_628); \ + __ret_628; \ }) #else -#define vmuls_laneq_f32(__p0_537, __p1_537, __p2_537) __extension__ ({ \ - float32_t __s0_537 = __p0_537; \ - float32x4_t __s1_537 = __p1_537; \ - float32x4_t __rev1_537; __rev1_537 = __builtin_shufflevector(__s1_537, __s1_537, 3, 2, 1, 0); \ - float32_t __ret_537; \ - __ret_537 = __s0_537 * __noswap_vgetq_lane_f32(__rev1_537, __p2_537); \ - __ret_537; \ +#define vmuls_laneq_f32(__p0_629, __p1_629, __p2_629) __extension__ ({ \ + float32_t __s0_629 = __p0_629; \ + float32x4_t __s1_629 = __p1_629; \ + float32x4_t __rev1_629; __rev1_629 = __builtin_shufflevector(__s1_629, __s1_629, 3, 2, 1, 0); \ + float32_t __ret_629; \ + __ret_629 = __s0_629 * __noswap_vgetq_lane_f32(__rev1_629, __p2_629); \ + __ret_629; \ }) #endif @@ -54079,233 +55749,233 @@ __ai float64x1_t vmul_f64(float64x1_t __p0, float64x1_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u32(__p0_538, __p1_538, __p2_538) __extension__ ({ \ - uint32x4_t __s0_538 = __p0_538; \ - uint32x4_t __s1_538 = __p1_538; \ - uint32x4_t __ret_538; \ - __ret_538 = __s0_538 * splatq_laneq_u32(__s1_538, __p2_538); \ - __ret_538; \ +#define vmulq_laneq_u32(__p0_630, __p1_630, __p2_630) __extension__ ({ \ + uint32x4_t __s0_630 = __p0_630; \ + uint32x4_t __s1_630 = __p1_630; \ + uint32x4_t __ret_630; \ + __ret_630 = __s0_630 * splatq_laneq_u32(__s1_630, __p2_630); \ + __ret_630; \ }) #else -#define vmulq_laneq_u32(__p0_539, __p1_539, __p2_539) __extension__ ({ \ - uint32x4_t __s0_539 = __p0_539; \ - uint32x4_t __s1_539 = __p1_539; \ - uint32x4_t __rev0_539; __rev0_539 = __builtin_shufflevector(__s0_539, __s0_539, 3, 2, 1, 0); \ - uint32x4_t __rev1_539; __rev1_539 = __builtin_shufflevector(__s1_539, __s1_539, 3, 2, 1, 0); \ - uint32x4_t __ret_539; \ - __ret_539 = __rev0_539 * __noswap_splatq_laneq_u32(__rev1_539, __p2_539); \ - __ret_539 = __builtin_shufflevector(__ret_539, __ret_539, 3, 2, 1, 0); \ - __ret_539; \ +#define vmulq_laneq_u32(__p0_631, __p1_631, __p2_631) __extension__ ({ \ + uint32x4_t __s0_631 = __p0_631; \ + uint32x4_t __s1_631 = __p1_631; \ + uint32x4_t __rev0_631; __rev0_631 = __builtin_shufflevector(__s0_631, __s0_631, 3, 2, 1, 0); \ + uint32x4_t __rev1_631; __rev1_631 = __builtin_shufflevector(__s1_631, __s1_631, 3, 2, 1, 0); \ + uint32x4_t __ret_631; \ + __ret_631 = __rev0_631 * __noswap_splatq_laneq_u32(__rev1_631, __p2_631); \ + __ret_631 = __builtin_shufflevector(__ret_631, __ret_631, 3, 2, 1, 0); \ + __ret_631; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_u16(__p0_540, __p1_540, __p2_540) __extension__ ({ \ - uint16x8_t __s0_540 = __p0_540; \ - uint16x8_t __s1_540 = __p1_540; \ - uint16x8_t __ret_540; \ - __ret_540 = __s0_540 * splatq_laneq_u16(__s1_540, __p2_540); \ - __ret_540; \ +#define vmulq_laneq_u16(__p0_632, __p1_632, __p2_632) __extension__ ({ \ + uint16x8_t __s0_632 = __p0_632; \ + uint16x8_t __s1_632 = __p1_632; \ + uint16x8_t __ret_632; \ + __ret_632 = __s0_632 * splatq_laneq_u16(__s1_632, __p2_632); \ + __ret_632; \ }) #else -#define vmulq_laneq_u16(__p0_541, __p1_541, __p2_541) __extension__ ({ \ - uint16x8_t __s0_541 = __p0_541; \ - uint16x8_t __s1_541 = __p1_541; \ - uint16x8_t __rev0_541; __rev0_541 = __builtin_shufflevector(__s0_541, __s0_541, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_541; __rev1_541 = __builtin_shufflevector(__s1_541, __s1_541, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_541; \ - __ret_541 = __rev0_541 * __noswap_splatq_laneq_u16(__rev1_541, __p2_541); \ - __ret_541 = __builtin_shufflevector(__ret_541, __ret_541, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_541; \ +#define vmulq_laneq_u16(__p0_633, __p1_633, __p2_633) __extension__ ({ \ + uint16x8_t __s0_633 = __p0_633; \ + uint16x8_t __s1_633 = __p1_633; \ + uint16x8_t __rev0_633; __rev0_633 = __builtin_shufflevector(__s0_633, __s0_633, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_633; __rev1_633 = __builtin_shufflevector(__s1_633, __s1_633, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_633; \ + __ret_633 = __rev0_633 * __noswap_splatq_laneq_u16(__rev1_633, __p2_633); \ + __ret_633 = __builtin_shufflevector(__ret_633, __ret_633, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_633; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f64(__p0_542, __p1_542, __p2_542) __extension__ ({ \ - float64x2_t __s0_542 = __p0_542; \ - float64x2_t __s1_542 = __p1_542; \ - float64x2_t __ret_542; \ - __ret_542 = __s0_542 * splatq_laneq_f64(__s1_542, __p2_542); \ - __ret_542; \ +#define vmulq_laneq_f64(__p0_634, __p1_634, __p2_634) __extension__ ({ \ + float64x2_t __s0_634 = __p0_634; \ + float64x2_t __s1_634 = __p1_634; \ + float64x2_t __ret_634; \ + __ret_634 = __s0_634 * splatq_laneq_f64(__s1_634, __p2_634); \ + __ret_634; \ }) #else -#define vmulq_laneq_f64(__p0_543, __p1_543, __p2_543) __extension__ ({ \ - float64x2_t __s0_543 = __p0_543; \ - float64x2_t __s1_543 = __p1_543; \ - float64x2_t __rev0_543; __rev0_543 = __builtin_shufflevector(__s0_543, __s0_543, 1, 0); \ - float64x2_t __rev1_543; __rev1_543 = __builtin_shufflevector(__s1_543, __s1_543, 1, 0); \ - float64x2_t __ret_543; \ - __ret_543 = __rev0_543 * __noswap_splatq_laneq_f64(__rev1_543, __p2_543); \ - __ret_543 = __builtin_shufflevector(__ret_543, __ret_543, 1, 0); \ - __ret_543; \ +#define vmulq_laneq_f64(__p0_635, __p1_635, __p2_635) __extension__ ({ \ + float64x2_t __s0_635 = __p0_635; \ + float64x2_t __s1_635 = __p1_635; \ + float64x2_t __rev0_635; __rev0_635 = __builtin_shufflevector(__s0_635, __s0_635, 1, 0); \ + float64x2_t __rev1_635; __rev1_635 = __builtin_shufflevector(__s1_635, __s1_635, 1, 0); \ + float64x2_t __ret_635; \ + __ret_635 = __rev0_635 * __noswap_splatq_laneq_f64(__rev1_635, __p2_635); \ + __ret_635 = __builtin_shufflevector(__ret_635, __ret_635, 1, 0); \ + __ret_635; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_f32(__p0_544, __p1_544, __p2_544) __extension__ ({ \ - float32x4_t __s0_544 = __p0_544; \ - float32x4_t __s1_544 = __p1_544; \ - float32x4_t __ret_544; \ - __ret_544 = __s0_544 * splatq_laneq_f32(__s1_544, __p2_544); \ - __ret_544; \ +#define vmulq_laneq_f32(__p0_636, __p1_636, __p2_636) __extension__ ({ \ + float32x4_t __s0_636 = __p0_636; \ + float32x4_t __s1_636 = __p1_636; \ + float32x4_t __ret_636; \ + __ret_636 = __s0_636 * splatq_laneq_f32(__s1_636, __p2_636); \ + __ret_636; \ }) #else -#define vmulq_laneq_f32(__p0_545, __p1_545, __p2_545) __extension__ ({ \ - float32x4_t __s0_545 = __p0_545; \ - float32x4_t __s1_545 = __p1_545; \ - float32x4_t __rev0_545; __rev0_545 = __builtin_shufflevector(__s0_545, __s0_545, 3, 2, 1, 0); \ - float32x4_t __rev1_545; __rev1_545 = __builtin_shufflevector(__s1_545, __s1_545, 3, 2, 1, 0); \ - float32x4_t __ret_545; \ - __ret_545 = __rev0_545 * __noswap_splatq_laneq_f32(__rev1_545, __p2_545); \ - __ret_545 = __builtin_shufflevector(__ret_545, __ret_545, 3, 2, 1, 0); \ - __ret_545; \ +#define vmulq_laneq_f32(__p0_637, __p1_637, __p2_637) __extension__ ({ \ + float32x4_t __s0_637 = __p0_637; \ + float32x4_t __s1_637 = __p1_637; \ + float32x4_t __rev0_637; __rev0_637 = __builtin_shufflevector(__s0_637, __s0_637, 3, 2, 1, 0); \ + float32x4_t __rev1_637; __rev1_637 = __builtin_shufflevector(__s1_637, __s1_637, 3, 2, 1, 0); \ + float32x4_t __ret_637; \ + __ret_637 = __rev0_637 * __noswap_splatq_laneq_f32(__rev1_637, __p2_637); \ + __ret_637 = __builtin_shufflevector(__ret_637, __ret_637, 3, 2, 1, 0); \ + __ret_637; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s32(__p0_546, __p1_546, __p2_546) __extension__ ({ \ - int32x4_t __s0_546 = __p0_546; \ - int32x4_t __s1_546 = __p1_546; \ - int32x4_t __ret_546; \ - __ret_546 = __s0_546 * splatq_laneq_s32(__s1_546, __p2_546); \ - __ret_546; \ +#define vmulq_laneq_s32(__p0_638, __p1_638, __p2_638) __extension__ ({ \ + int32x4_t __s0_638 = __p0_638; \ + int32x4_t __s1_638 = __p1_638; \ + int32x4_t __ret_638; \ + __ret_638 = __s0_638 * splatq_laneq_s32(__s1_638, __p2_638); \ + __ret_638; \ }) #else -#define vmulq_laneq_s32(__p0_547, __p1_547, __p2_547) __extension__ ({ \ - int32x4_t __s0_547 = __p0_547; \ - int32x4_t __s1_547 = __p1_547; \ - int32x4_t __rev0_547; __rev0_547 = __builtin_shufflevector(__s0_547, __s0_547, 3, 2, 1, 0); \ - int32x4_t __rev1_547; __rev1_547 = __builtin_shufflevector(__s1_547, __s1_547, 3, 2, 1, 0); \ - int32x4_t __ret_547; \ - __ret_547 = __rev0_547 * __noswap_splatq_laneq_s32(__rev1_547, __p2_547); \ - __ret_547 = __builtin_shufflevector(__ret_547, __ret_547, 3, 2, 1, 0); \ - __ret_547; \ +#define vmulq_laneq_s32(__p0_639, __p1_639, __p2_639) __extension__ ({ \ + int32x4_t __s0_639 = __p0_639; \ + int32x4_t __s1_639 = __p1_639; \ + int32x4_t __rev0_639; __rev0_639 = __builtin_shufflevector(__s0_639, __s0_639, 3, 2, 1, 0); \ + int32x4_t __rev1_639; __rev1_639 = __builtin_shufflevector(__s1_639, __s1_639, 3, 2, 1, 0); \ + int32x4_t __ret_639; \ + __ret_639 = __rev0_639 * __noswap_splatq_laneq_s32(__rev1_639, __p2_639); \ + __ret_639 = __builtin_shufflevector(__ret_639, __ret_639, 3, 2, 1, 0); \ + __ret_639; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulq_laneq_s16(__p0_548, __p1_548, __p2_548) __extension__ ({ \ - int16x8_t __s0_548 = __p0_548; \ - int16x8_t __s1_548 = __p1_548; \ - int16x8_t __ret_548; \ - __ret_548 = __s0_548 * splatq_laneq_s16(__s1_548, __p2_548); \ - __ret_548; \ +#define vmulq_laneq_s16(__p0_640, __p1_640, __p2_640) __extension__ ({ \ + int16x8_t __s0_640 = __p0_640; \ + int16x8_t __s1_640 = __p1_640; \ + int16x8_t __ret_640; \ + __ret_640 = __s0_640 * splatq_laneq_s16(__s1_640, __p2_640); \ + __ret_640; \ }) #else -#define vmulq_laneq_s16(__p0_549, __p1_549, __p2_549) __extension__ ({ \ - int16x8_t __s0_549 = __p0_549; \ - int16x8_t __s1_549 = __p1_549; \ - int16x8_t __rev0_549; __rev0_549 = __builtin_shufflevector(__s0_549, __s0_549, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_549; __rev1_549 = __builtin_shufflevector(__s1_549, __s1_549, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_549; \ - __ret_549 = __rev0_549 * __noswap_splatq_laneq_s16(__rev1_549, __p2_549); \ - __ret_549 = __builtin_shufflevector(__ret_549, __ret_549, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_549; \ +#define vmulq_laneq_s16(__p0_641, __p1_641, __p2_641) __extension__ ({ \ + int16x8_t __s0_641 = __p0_641; \ + int16x8_t __s1_641 = __p1_641; \ + int16x8_t __rev0_641; __rev0_641 = __builtin_shufflevector(__s0_641, __s0_641, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_641; __rev1_641 = __builtin_shufflevector(__s1_641, __s1_641, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_641; \ + __ret_641 = __rev0_641 * __noswap_splatq_laneq_s16(__rev1_641, __p2_641); \ + __ret_641 = __builtin_shufflevector(__ret_641, __ret_641, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_641; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u32(__p0_550, __p1_550, __p2_550) __extension__ ({ \ - uint32x2_t __s0_550 = __p0_550; \ - uint32x4_t __s1_550 = __p1_550; \ - uint32x2_t __ret_550; \ - __ret_550 = __s0_550 * splat_laneq_u32(__s1_550, __p2_550); \ - __ret_550; \ +#define vmul_laneq_u32(__p0_642, __p1_642, __p2_642) __extension__ ({ \ + uint32x2_t __s0_642 = __p0_642; \ + uint32x4_t __s1_642 = __p1_642; \ + uint32x2_t __ret_642; \ + __ret_642 = __s0_642 * splat_laneq_u32(__s1_642, __p2_642); \ + __ret_642; \ }) #else -#define vmul_laneq_u32(__p0_551, __p1_551, __p2_551) __extension__ ({ \ - uint32x2_t __s0_551 = __p0_551; \ - uint32x4_t __s1_551 = __p1_551; \ - uint32x2_t __rev0_551; __rev0_551 = __builtin_shufflevector(__s0_551, __s0_551, 1, 0); \ - uint32x4_t __rev1_551; __rev1_551 = __builtin_shufflevector(__s1_551, __s1_551, 3, 2, 1, 0); \ - uint32x2_t __ret_551; \ - __ret_551 = __rev0_551 * __noswap_splat_laneq_u32(__rev1_551, __p2_551); \ - __ret_551 = __builtin_shufflevector(__ret_551, __ret_551, 1, 0); \ - __ret_551; \ +#define vmul_laneq_u32(__p0_643, __p1_643, __p2_643) __extension__ ({ \ + uint32x2_t __s0_643 = __p0_643; \ + uint32x4_t __s1_643 = __p1_643; \ + uint32x2_t __rev0_643; __rev0_643 = __builtin_shufflevector(__s0_643, __s0_643, 1, 0); \ + uint32x4_t __rev1_643; __rev1_643 = __builtin_shufflevector(__s1_643, __s1_643, 3, 2, 1, 0); \ + uint32x2_t __ret_643; \ + __ret_643 = __rev0_643 * __noswap_splat_laneq_u32(__rev1_643, __p2_643); \ + __ret_643 = __builtin_shufflevector(__ret_643, __ret_643, 1, 0); \ + __ret_643; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_u16(__p0_552, __p1_552, __p2_552) __extension__ ({ \ - uint16x4_t __s0_552 = __p0_552; \ - uint16x8_t __s1_552 = __p1_552; \ - uint16x4_t __ret_552; \ - __ret_552 = __s0_552 * splat_laneq_u16(__s1_552, __p2_552); \ - __ret_552; \ +#define vmul_laneq_u16(__p0_644, __p1_644, __p2_644) __extension__ ({ \ + uint16x4_t __s0_644 = __p0_644; \ + uint16x8_t __s1_644 = __p1_644; \ + uint16x4_t __ret_644; \ + __ret_644 = __s0_644 * splat_laneq_u16(__s1_644, __p2_644); \ + __ret_644; \ }) #else -#define vmul_laneq_u16(__p0_553, __p1_553, __p2_553) __extension__ ({ \ - uint16x4_t __s0_553 = __p0_553; \ - uint16x8_t __s1_553 = __p1_553; \ - uint16x4_t __rev0_553; __rev0_553 = __builtin_shufflevector(__s0_553, __s0_553, 3, 2, 1, 0); \ - uint16x8_t __rev1_553; __rev1_553 = __builtin_shufflevector(__s1_553, __s1_553, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __ret_553; \ - __ret_553 = __rev0_553 * __noswap_splat_laneq_u16(__rev1_553, __p2_553); \ - __ret_553 = __builtin_shufflevector(__ret_553, __ret_553, 3, 2, 1, 0); \ - __ret_553; \ +#define vmul_laneq_u16(__p0_645, __p1_645, __p2_645) __extension__ ({ \ + uint16x4_t __s0_645 = __p0_645; \ + uint16x8_t __s1_645 = __p1_645; \ + uint16x4_t __rev0_645; __rev0_645 = __builtin_shufflevector(__s0_645, __s0_645, 3, 2, 1, 0); \ + uint16x8_t __rev1_645; __rev1_645 = __builtin_shufflevector(__s1_645, __s1_645, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __ret_645; \ + __ret_645 = __rev0_645 * __noswap_splat_laneq_u16(__rev1_645, __p2_645); \ + __ret_645 = __builtin_shufflevector(__ret_645, __ret_645, 3, 2, 1, 0); \ + __ret_645; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_f32(__p0_554, __p1_554, __p2_554) __extension__ ({ \ - float32x2_t __s0_554 = __p0_554; \ - float32x4_t __s1_554 = __p1_554; \ - float32x2_t __ret_554; \ - __ret_554 = __s0_554 * splat_laneq_f32(__s1_554, __p2_554); \ - __ret_554; \ +#define vmul_laneq_f32(__p0_646, __p1_646, __p2_646) __extension__ ({ \ + float32x2_t __s0_646 = __p0_646; \ + float32x4_t __s1_646 = __p1_646; \ + float32x2_t __ret_646; \ + __ret_646 = __s0_646 * splat_laneq_f32(__s1_646, __p2_646); \ + __ret_646; \ }) #else -#define vmul_laneq_f32(__p0_555, __p1_555, __p2_555) __extension__ ({ \ - float32x2_t __s0_555 = __p0_555; \ - float32x4_t __s1_555 = __p1_555; \ - float32x2_t __rev0_555; __rev0_555 = __builtin_shufflevector(__s0_555, __s0_555, 1, 0); \ - float32x4_t __rev1_555; __rev1_555 = __builtin_shufflevector(__s1_555, __s1_555, 3, 2, 1, 0); \ - float32x2_t __ret_555; \ - __ret_555 = __rev0_555 * __noswap_splat_laneq_f32(__rev1_555, __p2_555); \ - __ret_555 = __builtin_shufflevector(__ret_555, __ret_555, 1, 0); \ - __ret_555; \ +#define vmul_laneq_f32(__p0_647, __p1_647, __p2_647) __extension__ ({ \ + float32x2_t __s0_647 = __p0_647; \ + float32x4_t __s1_647 = __p1_647; \ + float32x2_t __rev0_647; __rev0_647 = __builtin_shufflevector(__s0_647, __s0_647, 1, 0); \ + float32x4_t __rev1_647; __rev1_647 = __builtin_shufflevector(__s1_647, __s1_647, 3, 2, 1, 0); \ + float32x2_t __ret_647; \ + __ret_647 = __rev0_647 * __noswap_splat_laneq_f32(__rev1_647, __p2_647); \ + __ret_647 = __builtin_shufflevector(__ret_647, __ret_647, 1, 0); \ + __ret_647; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s32(__p0_556, __p1_556, __p2_556) __extension__ ({ \ - int32x2_t __s0_556 = __p0_556; \ - int32x4_t __s1_556 = __p1_556; \ - int32x2_t __ret_556; \ - __ret_556 = __s0_556 * splat_laneq_s32(__s1_556, __p2_556); \ - __ret_556; \ +#define vmul_laneq_s32(__p0_648, __p1_648, __p2_648) __extension__ ({ \ + int32x2_t __s0_648 = __p0_648; \ + int32x4_t __s1_648 = __p1_648; \ + int32x2_t __ret_648; \ + __ret_648 = __s0_648 * splat_laneq_s32(__s1_648, __p2_648); \ + __ret_648; \ }) #else -#define vmul_laneq_s32(__p0_557, __p1_557, __p2_557) __extension__ ({ \ - int32x2_t __s0_557 = __p0_557; \ - int32x4_t __s1_557 = __p1_557; \ - int32x2_t __rev0_557; __rev0_557 = __builtin_shufflevector(__s0_557, __s0_557, 1, 0); \ - int32x4_t __rev1_557; __rev1_557 = __builtin_shufflevector(__s1_557, __s1_557, 3, 2, 1, 0); \ - int32x2_t __ret_557; \ - __ret_557 = __rev0_557 * __noswap_splat_laneq_s32(__rev1_557, __p2_557); \ - __ret_557 = __builtin_shufflevector(__ret_557, __ret_557, 1, 0); \ - __ret_557; \ +#define vmul_laneq_s32(__p0_649, __p1_649, __p2_649) __extension__ ({ \ + int32x2_t __s0_649 = __p0_649; \ + int32x4_t __s1_649 = __p1_649; \ + int32x2_t __rev0_649; __rev0_649 = __builtin_shufflevector(__s0_649, __s0_649, 1, 0); \ + int32x4_t __rev1_649; __rev1_649 = __builtin_shufflevector(__s1_649, __s1_649, 3, 2, 1, 0); \ + int32x2_t __ret_649; \ + __ret_649 = __rev0_649 * __noswap_splat_laneq_s32(__rev1_649, __p2_649); \ + __ret_649 = __builtin_shufflevector(__ret_649, __ret_649, 1, 0); \ + __ret_649; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmul_laneq_s16(__p0_558, __p1_558, __p2_558) __extension__ ({ \ - int16x4_t __s0_558 = __p0_558; \ - int16x8_t __s1_558 = __p1_558; \ - int16x4_t __ret_558; \ - __ret_558 = __s0_558 * splat_laneq_s16(__s1_558, __p2_558); \ - __ret_558; \ +#define vmul_laneq_s16(__p0_650, __p1_650, __p2_650) __extension__ ({ \ + int16x4_t __s0_650 = __p0_650; \ + int16x8_t __s1_650 = __p1_650; \ + int16x4_t __ret_650; \ + __ret_650 = __s0_650 * splat_laneq_s16(__s1_650, __p2_650); \ + __ret_650; \ }) #else -#define vmul_laneq_s16(__p0_559, __p1_559, __p2_559) __extension__ ({ \ - int16x4_t __s0_559 = __p0_559; \ - int16x8_t __s1_559 = __p1_559; \ - int16x4_t __rev0_559; __rev0_559 = __builtin_shufflevector(__s0_559, __s0_559, 3, 2, 1, 0); \ - int16x8_t __rev1_559; __rev1_559 = __builtin_shufflevector(__s1_559, __s1_559, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __ret_559; \ - __ret_559 = __rev0_559 * __noswap_splat_laneq_s16(__rev1_559, __p2_559); \ - __ret_559 = __builtin_shufflevector(__ret_559, __ret_559, 3, 2, 1, 0); \ - __ret_559; \ +#define vmul_laneq_s16(__p0_651, __p1_651, __p2_651) __extension__ ({ \ + int16x4_t __s0_651 = __p0_651; \ + int16x8_t __s1_651 = __p1_651; \ + int16x4_t __rev0_651; __rev0_651 = __builtin_shufflevector(__s0_651, __s0_651, 3, 2, 1, 0); \ + int16x8_t __rev1_651; __rev1_651 = __builtin_shufflevector(__s1_651, __s1_651, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __ret_651; \ + __ret_651 = __rev0_651 * __noswap_splat_laneq_s16(__rev1_651, __p2_651); \ + __ret_651 = __builtin_shufflevector(__ret_651, __ret_651, 3, 2, 1, 0); \ + __ret_651; \ }) #endif @@ -54471,170 +56141,170 @@ __ai poly128_t vmull_high_p64(poly64x2_t __p0, poly64x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u32(__p0_560, __p1_560, __p2_560) __extension__ ({ \ - uint32x4_t __s0_560 = __p0_560; \ - uint32x2_t __s1_560 = __p1_560; \ - uint64x2_t __ret_560; \ - __ret_560 = vmull_u32(vget_high_u32(__s0_560), splat_lane_u32(__s1_560, __p2_560)); \ - __ret_560; \ +#define vmull_high_lane_u32(__p0_652, __p1_652, __p2_652) __extension__ ({ \ + uint32x4_t __s0_652 = __p0_652; \ + uint32x2_t __s1_652 = __p1_652; \ + uint64x2_t __ret_652; \ + __ret_652 = vmull_u32(vget_high_u32(__s0_652), splat_lane_u32(__s1_652, __p2_652)); \ + __ret_652; \ }) #else -#define vmull_high_lane_u32(__p0_561, __p1_561, __p2_561) __extension__ ({ \ - uint32x4_t __s0_561 = __p0_561; \ - uint32x2_t __s1_561 = __p1_561; \ - uint32x4_t __rev0_561; __rev0_561 = __builtin_shufflevector(__s0_561, __s0_561, 3, 2, 1, 0); \ - uint32x2_t __rev1_561; __rev1_561 = __builtin_shufflevector(__s1_561, __s1_561, 1, 0); \ - uint64x2_t __ret_561; \ - __ret_561 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_561), __noswap_splat_lane_u32(__rev1_561, __p2_561)); \ - __ret_561 = __builtin_shufflevector(__ret_561, __ret_561, 1, 0); \ - __ret_561; \ +#define vmull_high_lane_u32(__p0_653, __p1_653, __p2_653) __extension__ ({ \ + uint32x4_t __s0_653 = __p0_653; \ + uint32x2_t __s1_653 = __p1_653; \ + uint32x4_t __rev0_653; __rev0_653 = __builtin_shufflevector(__s0_653, __s0_653, 3, 2, 1, 0); \ + uint32x2_t __rev1_653; __rev1_653 = __builtin_shufflevector(__s1_653, __s1_653, 1, 0); \ + uint64x2_t __ret_653; \ + __ret_653 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_653), __noswap_splat_lane_u32(__rev1_653, __p2_653)); \ + __ret_653 = __builtin_shufflevector(__ret_653, __ret_653, 1, 0); \ + __ret_653; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_u16(__p0_562, __p1_562, __p2_562) __extension__ ({ \ - uint16x8_t __s0_562 = __p0_562; \ - uint16x4_t __s1_562 = __p1_562; \ - uint32x4_t __ret_562; \ - __ret_562 = vmull_u16(vget_high_u16(__s0_562), splat_lane_u16(__s1_562, __p2_562)); \ - __ret_562; \ +#define vmull_high_lane_u16(__p0_654, __p1_654, __p2_654) __extension__ ({ \ + uint16x8_t __s0_654 = __p0_654; \ + uint16x4_t __s1_654 = __p1_654; \ + uint32x4_t __ret_654; \ + __ret_654 = vmull_u16(vget_high_u16(__s0_654), splat_lane_u16(__s1_654, __p2_654)); \ + __ret_654; \ }) #else -#define vmull_high_lane_u16(__p0_563, __p1_563, __p2_563) __extension__ ({ \ - uint16x8_t __s0_563 = __p0_563; \ - uint16x4_t __s1_563 = __p1_563; \ - uint16x8_t __rev0_563; __rev0_563 = __builtin_shufflevector(__s0_563, __s0_563, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x4_t __rev1_563; __rev1_563 = __builtin_shufflevector(__s1_563, __s1_563, 3, 2, 1, 0); \ - uint32x4_t __ret_563; \ - __ret_563 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_563), __noswap_splat_lane_u16(__rev1_563, __p2_563)); \ - __ret_563 = __builtin_shufflevector(__ret_563, __ret_563, 3, 2, 1, 0); \ - __ret_563; \ +#define vmull_high_lane_u16(__p0_655, __p1_655, __p2_655) __extension__ ({ \ + uint16x8_t __s0_655 = __p0_655; \ + uint16x4_t __s1_655 = __p1_655; \ + uint16x8_t __rev0_655; __rev0_655 = __builtin_shufflevector(__s0_655, __s0_655, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x4_t __rev1_655; __rev1_655 = __builtin_shufflevector(__s1_655, __s1_655, 3, 2, 1, 0); \ + uint32x4_t __ret_655; \ + __ret_655 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_655), __noswap_splat_lane_u16(__rev1_655, __p2_655)); \ + __ret_655 = __builtin_shufflevector(__ret_655, __ret_655, 3, 2, 1, 0); \ + __ret_655; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s32(__p0_564, __p1_564, __p2_564) __extension__ ({ \ - int32x4_t __s0_564 = __p0_564; \ - int32x2_t __s1_564 = __p1_564; \ - int64x2_t __ret_564; \ - __ret_564 = vmull_s32(vget_high_s32(__s0_564), splat_lane_s32(__s1_564, __p2_564)); \ - __ret_564; \ +#define vmull_high_lane_s32(__p0_656, __p1_656, __p2_656) __extension__ ({ \ + int32x4_t __s0_656 = __p0_656; \ + int32x2_t __s1_656 = __p1_656; \ + int64x2_t __ret_656; \ + __ret_656 = vmull_s32(vget_high_s32(__s0_656), splat_lane_s32(__s1_656, __p2_656)); \ + __ret_656; \ }) #else -#define vmull_high_lane_s32(__p0_565, __p1_565, __p2_565) __extension__ ({ \ - int32x4_t __s0_565 = __p0_565; \ - int32x2_t __s1_565 = __p1_565; \ - int32x4_t __rev0_565; __rev0_565 = __builtin_shufflevector(__s0_565, __s0_565, 3, 2, 1, 0); \ - int32x2_t __rev1_565; __rev1_565 = __builtin_shufflevector(__s1_565, __s1_565, 1, 0); \ - int64x2_t __ret_565; \ - __ret_565 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_565), __noswap_splat_lane_s32(__rev1_565, __p2_565)); \ - __ret_565 = __builtin_shufflevector(__ret_565, __ret_565, 1, 0); \ - __ret_565; \ +#define vmull_high_lane_s32(__p0_657, __p1_657, __p2_657) __extension__ ({ \ + int32x4_t __s0_657 = __p0_657; \ + int32x2_t __s1_657 = __p1_657; \ + int32x4_t __rev0_657; __rev0_657 = __builtin_shufflevector(__s0_657, __s0_657, 3, 2, 1, 0); \ + int32x2_t __rev1_657; __rev1_657 = __builtin_shufflevector(__s1_657, __s1_657, 1, 0); \ + int64x2_t __ret_657; \ + __ret_657 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_657), __noswap_splat_lane_s32(__rev1_657, __p2_657)); \ + __ret_657 = __builtin_shufflevector(__ret_657, __ret_657, 1, 0); \ + __ret_657; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_lane_s16(__p0_566, __p1_566, __p2_566) __extension__ ({ \ - int16x8_t __s0_566 = __p0_566; \ - int16x4_t __s1_566 = __p1_566; \ - int32x4_t __ret_566; \ - __ret_566 = vmull_s16(vget_high_s16(__s0_566), splat_lane_s16(__s1_566, __p2_566)); \ - __ret_566; \ +#define vmull_high_lane_s16(__p0_658, __p1_658, __p2_658) __extension__ ({ \ + int16x8_t __s0_658 = __p0_658; \ + int16x4_t __s1_658 = __p1_658; \ + int32x4_t __ret_658; \ + __ret_658 = vmull_s16(vget_high_s16(__s0_658), splat_lane_s16(__s1_658, __p2_658)); \ + __ret_658; \ }) #else -#define vmull_high_lane_s16(__p0_567, __p1_567, __p2_567) __extension__ ({ \ - int16x8_t __s0_567 = __p0_567; \ - int16x4_t __s1_567 = __p1_567; \ - int16x8_t __rev0_567; __rev0_567 = __builtin_shufflevector(__s0_567, __s0_567, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_567; __rev1_567 = __builtin_shufflevector(__s1_567, __s1_567, 3, 2, 1, 0); \ - int32x4_t __ret_567; \ - __ret_567 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_567), __noswap_splat_lane_s16(__rev1_567, __p2_567)); \ - __ret_567 = __builtin_shufflevector(__ret_567, __ret_567, 3, 2, 1, 0); \ - __ret_567; \ +#define vmull_high_lane_s16(__p0_659, __p1_659, __p2_659) __extension__ ({ \ + int16x8_t __s0_659 = __p0_659; \ + int16x4_t __s1_659 = __p1_659; \ + int16x8_t __rev0_659; __rev0_659 = __builtin_shufflevector(__s0_659, __s0_659, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev1_659; __rev1_659 = __builtin_shufflevector(__s1_659, __s1_659, 3, 2, 1, 0); \ + int32x4_t __ret_659; \ + __ret_659 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_659), __noswap_splat_lane_s16(__rev1_659, __p2_659)); \ + __ret_659 = __builtin_shufflevector(__ret_659, __ret_659, 3, 2, 1, 0); \ + __ret_659; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u32(__p0_568, __p1_568, __p2_568) __extension__ ({ \ - uint32x4_t __s0_568 = __p0_568; \ - uint32x4_t __s1_568 = __p1_568; \ - uint64x2_t __ret_568; \ - __ret_568 = vmull_u32(vget_high_u32(__s0_568), splat_laneq_u32(__s1_568, __p2_568)); \ - __ret_568; \ +#define vmull_high_laneq_u32(__p0_660, __p1_660, __p2_660) __extension__ ({ \ + uint32x4_t __s0_660 = __p0_660; \ + uint32x4_t __s1_660 = __p1_660; \ + uint64x2_t __ret_660; \ + __ret_660 = vmull_u32(vget_high_u32(__s0_660), splat_laneq_u32(__s1_660, __p2_660)); \ + __ret_660; \ }) #else -#define vmull_high_laneq_u32(__p0_569, __p1_569, __p2_569) __extension__ ({ \ - uint32x4_t __s0_569 = __p0_569; \ - uint32x4_t __s1_569 = __p1_569; \ - uint32x4_t __rev0_569; __rev0_569 = __builtin_shufflevector(__s0_569, __s0_569, 3, 2, 1, 0); \ - uint32x4_t __rev1_569; __rev1_569 = __builtin_shufflevector(__s1_569, __s1_569, 3, 2, 1, 0); \ - uint64x2_t __ret_569; \ - __ret_569 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_569), __noswap_splat_laneq_u32(__rev1_569, __p2_569)); \ - __ret_569 = __builtin_shufflevector(__ret_569, __ret_569, 1, 0); \ - __ret_569; \ +#define vmull_high_laneq_u32(__p0_661, __p1_661, __p2_661) __extension__ ({ \ + uint32x4_t __s0_661 = __p0_661; \ + uint32x4_t __s1_661 = __p1_661; \ + uint32x4_t __rev0_661; __rev0_661 = __builtin_shufflevector(__s0_661, __s0_661, 3, 2, 1, 0); \ + uint32x4_t __rev1_661; __rev1_661 = __builtin_shufflevector(__s1_661, __s1_661, 3, 2, 1, 0); \ + uint64x2_t __ret_661; \ + __ret_661 = __noswap_vmull_u32(__noswap_vget_high_u32(__rev0_661), __noswap_splat_laneq_u32(__rev1_661, __p2_661)); \ + __ret_661 = __builtin_shufflevector(__ret_661, __ret_661, 1, 0); \ + __ret_661; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_u16(__p0_570, __p1_570, __p2_570) __extension__ ({ \ - uint16x8_t __s0_570 = __p0_570; \ - uint16x8_t __s1_570 = __p1_570; \ - uint32x4_t __ret_570; \ - __ret_570 = vmull_u16(vget_high_u16(__s0_570), splat_laneq_u16(__s1_570, __p2_570)); \ - __ret_570; \ +#define vmull_high_laneq_u16(__p0_662, __p1_662, __p2_662) __extension__ ({ \ + uint16x8_t __s0_662 = __p0_662; \ + uint16x8_t __s1_662 = __p1_662; \ + uint32x4_t __ret_662; \ + __ret_662 = vmull_u16(vget_high_u16(__s0_662), splat_laneq_u16(__s1_662, __p2_662)); \ + __ret_662; \ }) #else -#define vmull_high_laneq_u16(__p0_571, __p1_571, __p2_571) __extension__ ({ \ - uint16x8_t __s0_571 = __p0_571; \ - uint16x8_t __s1_571 = __p1_571; \ - uint16x8_t __rev0_571; __rev0_571 = __builtin_shufflevector(__s0_571, __s0_571, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_571; __rev1_571 = __builtin_shufflevector(__s1_571, __s1_571, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_571; \ - __ret_571 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_571), __noswap_splat_laneq_u16(__rev1_571, __p2_571)); \ - __ret_571 = __builtin_shufflevector(__ret_571, __ret_571, 3, 2, 1, 0); \ - __ret_571; \ +#define vmull_high_laneq_u16(__p0_663, __p1_663, __p2_663) __extension__ ({ \ + uint16x8_t __s0_663 = __p0_663; \ + uint16x8_t __s1_663 = __p1_663; \ + uint16x8_t __rev0_663; __rev0_663 = __builtin_shufflevector(__s0_663, __s0_663, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_663; __rev1_663 = __builtin_shufflevector(__s1_663, __s1_663, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_663; \ + __ret_663 = __noswap_vmull_u16(__noswap_vget_high_u16(__rev0_663), __noswap_splat_laneq_u16(__rev1_663, __p2_663)); \ + __ret_663 = __builtin_shufflevector(__ret_663, __ret_663, 3, 2, 1, 0); \ + __ret_663; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s32(__p0_572, __p1_572, __p2_572) __extension__ ({ \ - int32x4_t __s0_572 = __p0_572; \ - int32x4_t __s1_572 = __p1_572; \ - int64x2_t __ret_572; \ - __ret_572 = vmull_s32(vget_high_s32(__s0_572), splat_laneq_s32(__s1_572, __p2_572)); \ - __ret_572; \ +#define vmull_high_laneq_s32(__p0_664, __p1_664, __p2_664) __extension__ ({ \ + int32x4_t __s0_664 = __p0_664; \ + int32x4_t __s1_664 = __p1_664; \ + int64x2_t __ret_664; \ + __ret_664 = vmull_s32(vget_high_s32(__s0_664), splat_laneq_s32(__s1_664, __p2_664)); \ + __ret_664; \ }) #else -#define vmull_high_laneq_s32(__p0_573, __p1_573, __p2_573) __extension__ ({ \ - int32x4_t __s0_573 = __p0_573; \ - int32x4_t __s1_573 = __p1_573; \ - int32x4_t __rev0_573; __rev0_573 = __builtin_shufflevector(__s0_573, __s0_573, 3, 2, 1, 0); \ - int32x4_t __rev1_573; __rev1_573 = __builtin_shufflevector(__s1_573, __s1_573, 3, 2, 1, 0); \ - int64x2_t __ret_573; \ - __ret_573 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_573), __noswap_splat_laneq_s32(__rev1_573, __p2_573)); \ - __ret_573 = __builtin_shufflevector(__ret_573, __ret_573, 1, 0); \ - __ret_573; \ +#define vmull_high_laneq_s32(__p0_665, __p1_665, __p2_665) __extension__ ({ \ + int32x4_t __s0_665 = __p0_665; \ + int32x4_t __s1_665 = __p1_665; \ + int32x4_t __rev0_665; __rev0_665 = __builtin_shufflevector(__s0_665, __s0_665, 3, 2, 1, 0); \ + int32x4_t __rev1_665; __rev1_665 = __builtin_shufflevector(__s1_665, __s1_665, 3, 2, 1, 0); \ + int64x2_t __ret_665; \ + __ret_665 = __noswap_vmull_s32(__noswap_vget_high_s32(__rev0_665), __noswap_splat_laneq_s32(__rev1_665, __p2_665)); \ + __ret_665 = __builtin_shufflevector(__ret_665, __ret_665, 1, 0); \ + __ret_665; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_high_laneq_s16(__p0_574, __p1_574, __p2_574) __extension__ ({ \ - int16x8_t __s0_574 = __p0_574; \ - int16x8_t __s1_574 = __p1_574; \ - int32x4_t __ret_574; \ - __ret_574 = vmull_s16(vget_high_s16(__s0_574), splat_laneq_s16(__s1_574, __p2_574)); \ - __ret_574; \ +#define vmull_high_laneq_s16(__p0_666, __p1_666, __p2_666) __extension__ ({ \ + int16x8_t __s0_666 = __p0_666; \ + int16x8_t __s1_666 = __p1_666; \ + int32x4_t __ret_666; \ + __ret_666 = vmull_s16(vget_high_s16(__s0_666), splat_laneq_s16(__s1_666, __p2_666)); \ + __ret_666; \ }) #else -#define vmull_high_laneq_s16(__p0_575, __p1_575, __p2_575) __extension__ ({ \ - int16x8_t __s0_575 = __p0_575; \ - int16x8_t __s1_575 = __p1_575; \ - int16x8_t __rev0_575; __rev0_575 = __builtin_shufflevector(__s0_575, __s0_575, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_575; __rev1_575 = __builtin_shufflevector(__s1_575, __s1_575, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_575; \ - __ret_575 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_575), __noswap_splat_laneq_s16(__rev1_575, __p2_575)); \ - __ret_575 = __builtin_shufflevector(__ret_575, __ret_575, 3, 2, 1, 0); \ - __ret_575; \ +#define vmull_high_laneq_s16(__p0_667, __p1_667, __p2_667) __extension__ ({ \ + int16x8_t __s0_667 = __p0_667; \ + int16x8_t __s1_667 = __p1_667; \ + int16x8_t __rev0_667; __rev0_667 = __builtin_shufflevector(__s0_667, __s0_667, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_667; __rev1_667 = __builtin_shufflevector(__s1_667, __s1_667, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_667; \ + __ret_667 = __noswap_vmull_s16(__noswap_vget_high_s16(__rev0_667), __noswap_splat_laneq_s16(__rev1_667, __p2_667)); \ + __ret_667 = __builtin_shufflevector(__ret_667, __ret_667, 3, 2, 1, 0); \ + __ret_667; \ }) #endif @@ -54703,86 +56373,86 @@ __ai int32x4_t vmull_high_n_s16(int16x8_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u32(__p0_576, __p1_576, __p2_576) __extension__ ({ \ - uint32x2_t __s0_576 = __p0_576; \ - uint32x4_t __s1_576 = __p1_576; \ - uint64x2_t __ret_576; \ - __ret_576 = vmull_u32(__s0_576, splat_laneq_u32(__s1_576, __p2_576)); \ - __ret_576; \ +#define vmull_laneq_u32(__p0_668, __p1_668, __p2_668) __extension__ ({ \ + uint32x2_t __s0_668 = __p0_668; \ + uint32x4_t __s1_668 = __p1_668; \ + uint64x2_t __ret_668; \ + __ret_668 = vmull_u32(__s0_668, splat_laneq_u32(__s1_668, __p2_668)); \ + __ret_668; \ }) #else -#define vmull_laneq_u32(__p0_577, __p1_577, __p2_577) __extension__ ({ \ - uint32x2_t __s0_577 = __p0_577; \ - uint32x4_t __s1_577 = __p1_577; \ - uint32x2_t __rev0_577; __rev0_577 = __builtin_shufflevector(__s0_577, __s0_577, 1, 0); \ - uint32x4_t __rev1_577; __rev1_577 = __builtin_shufflevector(__s1_577, __s1_577, 3, 2, 1, 0); \ - uint64x2_t __ret_577; \ - __ret_577 = __noswap_vmull_u32(__rev0_577, __noswap_splat_laneq_u32(__rev1_577, __p2_577)); \ - __ret_577 = __builtin_shufflevector(__ret_577, __ret_577, 1, 0); \ - __ret_577; \ +#define vmull_laneq_u32(__p0_669, __p1_669, __p2_669) __extension__ ({ \ + uint32x2_t __s0_669 = __p0_669; \ + uint32x4_t __s1_669 = __p1_669; \ + uint32x2_t __rev0_669; __rev0_669 = __builtin_shufflevector(__s0_669, __s0_669, 1, 0); \ + uint32x4_t __rev1_669; __rev1_669 = __builtin_shufflevector(__s1_669, __s1_669, 3, 2, 1, 0); \ + uint64x2_t __ret_669; \ + __ret_669 = __noswap_vmull_u32(__rev0_669, __noswap_splat_laneq_u32(__rev1_669, __p2_669)); \ + __ret_669 = __builtin_shufflevector(__ret_669, __ret_669, 1, 0); \ + __ret_669; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_u16(__p0_578, __p1_578, __p2_578) __extension__ ({ \ - uint16x4_t __s0_578 = __p0_578; \ - uint16x8_t __s1_578 = __p1_578; \ - uint32x4_t __ret_578; \ - __ret_578 = vmull_u16(__s0_578, splat_laneq_u16(__s1_578, __p2_578)); \ - __ret_578; \ +#define vmull_laneq_u16(__p0_670, __p1_670, __p2_670) __extension__ ({ \ + uint16x4_t __s0_670 = __p0_670; \ + uint16x8_t __s1_670 = __p1_670; \ + uint32x4_t __ret_670; \ + __ret_670 = vmull_u16(__s0_670, splat_laneq_u16(__s1_670, __p2_670)); \ + __ret_670; \ }) #else -#define vmull_laneq_u16(__p0_579, __p1_579, __p2_579) __extension__ ({ \ - uint16x4_t __s0_579 = __p0_579; \ - uint16x8_t __s1_579 = __p1_579; \ - uint16x4_t __rev0_579; __rev0_579 = __builtin_shufflevector(__s0_579, __s0_579, 3, 2, 1, 0); \ - uint16x8_t __rev1_579; __rev1_579 = __builtin_shufflevector(__s1_579, __s1_579, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_579; \ - __ret_579 = __noswap_vmull_u16(__rev0_579, __noswap_splat_laneq_u16(__rev1_579, __p2_579)); \ - __ret_579 = __builtin_shufflevector(__ret_579, __ret_579, 3, 2, 1, 0); \ - __ret_579; \ +#define vmull_laneq_u16(__p0_671, __p1_671, __p2_671) __extension__ ({ \ + uint16x4_t __s0_671 = __p0_671; \ + uint16x8_t __s1_671 = __p1_671; \ + uint16x4_t __rev0_671; __rev0_671 = __builtin_shufflevector(__s0_671, __s0_671, 3, 2, 1, 0); \ + uint16x8_t __rev1_671; __rev1_671 = __builtin_shufflevector(__s1_671, __s1_671, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_671; \ + __ret_671 = __noswap_vmull_u16(__rev0_671, __noswap_splat_laneq_u16(__rev1_671, __p2_671)); \ + __ret_671 = __builtin_shufflevector(__ret_671, __ret_671, 3, 2, 1, 0); \ + __ret_671; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s32(__p0_580, __p1_580, __p2_580) __extension__ ({ \ - int32x2_t __s0_580 = __p0_580; \ - int32x4_t __s1_580 = __p1_580; \ - int64x2_t __ret_580; \ - __ret_580 = vmull_s32(__s0_580, splat_laneq_s32(__s1_580, __p2_580)); \ - __ret_580; \ +#define vmull_laneq_s32(__p0_672, __p1_672, __p2_672) __extension__ ({ \ + int32x2_t __s0_672 = __p0_672; \ + int32x4_t __s1_672 = __p1_672; \ + int64x2_t __ret_672; \ + __ret_672 = vmull_s32(__s0_672, splat_laneq_s32(__s1_672, __p2_672)); \ + __ret_672; \ }) #else -#define vmull_laneq_s32(__p0_581, __p1_581, __p2_581) __extension__ ({ \ - int32x2_t __s0_581 = __p0_581; \ - int32x4_t __s1_581 = __p1_581; \ - int32x2_t __rev0_581; __rev0_581 = __builtin_shufflevector(__s0_581, __s0_581, 1, 0); \ - int32x4_t __rev1_581; __rev1_581 = __builtin_shufflevector(__s1_581, __s1_581, 3, 2, 1, 0); \ - int64x2_t __ret_581; \ - __ret_581 = __noswap_vmull_s32(__rev0_581, __noswap_splat_laneq_s32(__rev1_581, __p2_581)); \ - __ret_581 = __builtin_shufflevector(__ret_581, __ret_581, 1, 0); \ - __ret_581; \ +#define vmull_laneq_s32(__p0_673, __p1_673, __p2_673) __extension__ ({ \ + int32x2_t __s0_673 = __p0_673; \ + int32x4_t __s1_673 = __p1_673; \ + int32x2_t __rev0_673; __rev0_673 = __builtin_shufflevector(__s0_673, __s0_673, 1, 0); \ + int32x4_t __rev1_673; __rev1_673 = __builtin_shufflevector(__s1_673, __s1_673, 3, 2, 1, 0); \ + int64x2_t __ret_673; \ + __ret_673 = __noswap_vmull_s32(__rev0_673, __noswap_splat_laneq_s32(__rev1_673, __p2_673)); \ + __ret_673 = __builtin_shufflevector(__ret_673, __ret_673, 1, 0); \ + __ret_673; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmull_laneq_s16(__p0_582, __p1_582, __p2_582) __extension__ ({ \ - int16x4_t __s0_582 = __p0_582; \ - int16x8_t __s1_582 = __p1_582; \ - int32x4_t __ret_582; \ - __ret_582 = vmull_s16(__s0_582, splat_laneq_s16(__s1_582, __p2_582)); \ - __ret_582; \ +#define vmull_laneq_s16(__p0_674, __p1_674, __p2_674) __extension__ ({ \ + int16x4_t __s0_674 = __p0_674; \ + int16x8_t __s1_674 = __p1_674; \ + int32x4_t __ret_674; \ + __ret_674 = vmull_s16(__s0_674, splat_laneq_s16(__s1_674, __p2_674)); \ + __ret_674; \ }) #else -#define vmull_laneq_s16(__p0_583, __p1_583, __p2_583) __extension__ ({ \ - int16x4_t __s0_583 = __p0_583; \ - int16x8_t __s1_583 = __p1_583; \ - int16x4_t __rev0_583; __rev0_583 = __builtin_shufflevector(__s0_583, __s0_583, 3, 2, 1, 0); \ - int16x8_t __rev1_583; __rev1_583 = __builtin_shufflevector(__s1_583, __s1_583, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_583; \ - __ret_583 = __noswap_vmull_s16(__rev0_583, __noswap_splat_laneq_s16(__rev1_583, __p2_583)); \ - __ret_583 = __builtin_shufflevector(__ret_583, __ret_583, 3, 2, 1, 0); \ - __ret_583; \ +#define vmull_laneq_s16(__p0_675, __p1_675, __p2_675) __extension__ ({ \ + int16x4_t __s0_675 = __p0_675; \ + int16x8_t __s1_675 = __p1_675; \ + int16x4_t __rev0_675; __rev0_675 = __builtin_shufflevector(__s0_675, __s0_675, 3, 2, 1, 0); \ + int16x8_t __rev1_675; __rev1_675 = __builtin_shufflevector(__s1_675, __s1_675, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_675; \ + __ret_675 = __noswap_vmull_s16(__rev0_675, __noswap_splat_laneq_s16(__rev1_675, __p2_675)); \ + __ret_675 = __builtin_shufflevector(__ret_675, __ret_675, 3, 2, 1, 0); \ + __ret_675; \ }) #endif @@ -54867,192 +56537,192 @@ __ai float32_t vmulxs_f32(float32_t __p0, float32_t __p1) { __ret = (float32_t) __builtin_neon_vmulxs_f32(__p0, __p1); return __ret; } -#define vmulxd_lane_f64(__p0_584, __p1_584, __p2_584) __extension__ ({ \ - float64_t __s0_584 = __p0_584; \ - float64x1_t __s1_584 = __p1_584; \ - float64_t __ret_584; \ - __ret_584 = vmulxd_f64(__s0_584, vget_lane_f64(__s1_584, __p2_584)); \ - __ret_584; \ +#define vmulxd_lane_f64(__p0_676, __p1_676, __p2_676) __extension__ ({ \ + float64_t __s0_676 = __p0_676; \ + float64x1_t __s1_676 = __p1_676; \ + float64_t __ret_676; \ + __ret_676 = vmulxd_f64(__s0_676, vget_lane_f64(__s1_676, __p2_676)); \ + __ret_676; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulxs_lane_f32(__p0_585, __p1_585, __p2_585) __extension__ ({ \ - float32_t __s0_585 = __p0_585; \ - float32x2_t __s1_585 = __p1_585; \ - float32_t __ret_585; \ - __ret_585 = vmulxs_f32(__s0_585, vget_lane_f32(__s1_585, __p2_585)); \ - __ret_585; \ +#define vmulxs_lane_f32(__p0_677, __p1_677, __p2_677) __extension__ ({ \ + float32_t __s0_677 = __p0_677; \ + float32x2_t __s1_677 = __p1_677; \ + float32_t __ret_677; \ + __ret_677 = vmulxs_f32(__s0_677, vget_lane_f32(__s1_677, __p2_677)); \ + __ret_677; \ }) #else -#define vmulxs_lane_f32(__p0_586, __p1_586, __p2_586) __extension__ ({ \ - float32_t __s0_586 = __p0_586; \ - float32x2_t __s1_586 = __p1_586; \ - float32x2_t __rev1_586; __rev1_586 = __builtin_shufflevector(__s1_586, __s1_586, 1, 0); \ - float32_t __ret_586; \ - __ret_586 = vmulxs_f32(__s0_586, __noswap_vget_lane_f32(__rev1_586, __p2_586)); \ - __ret_586; \ +#define vmulxs_lane_f32(__p0_678, __p1_678, __p2_678) __extension__ ({ \ + float32_t __s0_678 = __p0_678; \ + float32x2_t __s1_678 = __p1_678; \ + float32x2_t __rev1_678; __rev1_678 = __builtin_shufflevector(__s1_678, __s1_678, 1, 0); \ + float32_t __ret_678; \ + __ret_678 = vmulxs_f32(__s0_678, __noswap_vget_lane_f32(__rev1_678, __p2_678)); \ + __ret_678; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f64(__p0_587, __p1_587, __p2_587) __extension__ ({ \ - float64x2_t __s0_587 = __p0_587; \ - float64x1_t __s1_587 = __p1_587; \ - float64x2_t __ret_587; \ - __ret_587 = vmulxq_f64(__s0_587, splatq_lane_f64(__s1_587, __p2_587)); \ - __ret_587; \ +#define vmulxq_lane_f64(__p0_679, __p1_679, __p2_679) __extension__ ({ \ + float64x2_t __s0_679 = __p0_679; \ + float64x1_t __s1_679 = __p1_679; \ + float64x2_t __ret_679; \ + __ret_679 = vmulxq_f64(__s0_679, splatq_lane_f64(__s1_679, __p2_679)); \ + __ret_679; \ }) #else -#define vmulxq_lane_f64(__p0_588, __p1_588, __p2_588) __extension__ ({ \ - float64x2_t __s0_588 = __p0_588; \ - float64x1_t __s1_588 = __p1_588; \ - float64x2_t __rev0_588; __rev0_588 = __builtin_shufflevector(__s0_588, __s0_588, 1, 0); \ - float64x2_t __ret_588; \ - __ret_588 = __noswap_vmulxq_f64(__rev0_588, __noswap_splatq_lane_f64(__s1_588, __p2_588)); \ - __ret_588 = __builtin_shufflevector(__ret_588, __ret_588, 1, 0); \ - __ret_588; \ +#define vmulxq_lane_f64(__p0_680, __p1_680, __p2_680) __extension__ ({ \ + float64x2_t __s0_680 = __p0_680; \ + float64x1_t __s1_680 = __p1_680; \ + float64x2_t __rev0_680; __rev0_680 = __builtin_shufflevector(__s0_680, __s0_680, 1, 0); \ + float64x2_t __ret_680; \ + __ret_680 = __noswap_vmulxq_f64(__rev0_680, __noswap_splatq_lane_f64(__s1_680, __p2_680)); \ + __ret_680 = __builtin_shufflevector(__ret_680, __ret_680, 1, 0); \ + __ret_680; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_lane_f32(__p0_589, __p1_589, __p2_589) __extension__ ({ \ - float32x4_t __s0_589 = __p0_589; \ - float32x2_t __s1_589 = __p1_589; \ - float32x4_t __ret_589; \ - __ret_589 = vmulxq_f32(__s0_589, splatq_lane_f32(__s1_589, __p2_589)); \ - __ret_589; \ +#define vmulxq_lane_f32(__p0_681, __p1_681, __p2_681) __extension__ ({ \ + float32x4_t __s0_681 = __p0_681; \ + float32x2_t __s1_681 = __p1_681; \ + float32x4_t __ret_681; \ + __ret_681 = vmulxq_f32(__s0_681, splatq_lane_f32(__s1_681, __p2_681)); \ + __ret_681; \ }) #else -#define vmulxq_lane_f32(__p0_590, __p1_590, __p2_590) __extension__ ({ \ - float32x4_t __s0_590 = __p0_590; \ - float32x2_t __s1_590 = __p1_590; \ - float32x4_t __rev0_590; __rev0_590 = __builtin_shufflevector(__s0_590, __s0_590, 3, 2, 1, 0); \ - float32x2_t __rev1_590; __rev1_590 = __builtin_shufflevector(__s1_590, __s1_590, 1, 0); \ - float32x4_t __ret_590; \ - __ret_590 = __noswap_vmulxq_f32(__rev0_590, __noswap_splatq_lane_f32(__rev1_590, __p2_590)); \ - __ret_590 = __builtin_shufflevector(__ret_590, __ret_590, 3, 2, 1, 0); \ - __ret_590; \ +#define vmulxq_lane_f32(__p0_682, __p1_682, __p2_682) __extension__ ({ \ + float32x4_t __s0_682 = __p0_682; \ + float32x2_t __s1_682 = __p1_682; \ + float32x4_t __rev0_682; __rev0_682 = __builtin_shufflevector(__s0_682, __s0_682, 3, 2, 1, 0); \ + float32x2_t __rev1_682; __rev1_682 = __builtin_shufflevector(__s1_682, __s1_682, 1, 0); \ + float32x4_t __ret_682; \ + __ret_682 = __noswap_vmulxq_f32(__rev0_682, __noswap_splatq_lane_f32(__rev1_682, __p2_682)); \ + __ret_682 = __builtin_shufflevector(__ret_682, __ret_682, 3, 2, 1, 0); \ + __ret_682; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_lane_f32(__p0_591, __p1_591, __p2_591) __extension__ ({ \ - float32x2_t __s0_591 = __p0_591; \ - float32x2_t __s1_591 = __p1_591; \ - float32x2_t __ret_591; \ - __ret_591 = vmulx_f32(__s0_591, splat_lane_f32(__s1_591, __p2_591)); \ - __ret_591; \ +#define vmulx_lane_f32(__p0_683, __p1_683, __p2_683) __extension__ ({ \ + float32x2_t __s0_683 = __p0_683; \ + float32x2_t __s1_683 = __p1_683; \ + float32x2_t __ret_683; \ + __ret_683 = vmulx_f32(__s0_683, splat_lane_f32(__s1_683, __p2_683)); \ + __ret_683; \ }) #else -#define vmulx_lane_f32(__p0_592, __p1_592, __p2_592) __extension__ ({ \ - float32x2_t __s0_592 = __p0_592; \ - float32x2_t __s1_592 = __p1_592; \ - float32x2_t __rev0_592; __rev0_592 = __builtin_shufflevector(__s0_592, __s0_592, 1, 0); \ - float32x2_t __rev1_592; __rev1_592 = __builtin_shufflevector(__s1_592, __s1_592, 1, 0); \ - float32x2_t __ret_592; \ - __ret_592 = __noswap_vmulx_f32(__rev0_592, __noswap_splat_lane_f32(__rev1_592, __p2_592)); \ - __ret_592 = __builtin_shufflevector(__ret_592, __ret_592, 1, 0); \ - __ret_592; \ +#define vmulx_lane_f32(__p0_684, __p1_684, __p2_684) __extension__ ({ \ + float32x2_t __s0_684 = __p0_684; \ + float32x2_t __s1_684 = __p1_684; \ + float32x2_t __rev0_684; __rev0_684 = __builtin_shufflevector(__s0_684, __s0_684, 1, 0); \ + float32x2_t __rev1_684; __rev1_684 = __builtin_shufflevector(__s1_684, __s1_684, 1, 0); \ + float32x2_t __ret_684; \ + __ret_684 = __noswap_vmulx_f32(__rev0_684, __noswap_splat_lane_f32(__rev1_684, __p2_684)); \ + __ret_684 = __builtin_shufflevector(__ret_684, __ret_684, 1, 0); \ + __ret_684; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxd_laneq_f64(__p0_593, __p1_593, __p2_593) __extension__ ({ \ - float64_t __s0_593 = __p0_593; \ - float64x2_t __s1_593 = __p1_593; \ - float64_t __ret_593; \ - __ret_593 = vmulxd_f64(__s0_593, vgetq_lane_f64(__s1_593, __p2_593)); \ - __ret_593; \ +#define vmulxd_laneq_f64(__p0_685, __p1_685, __p2_685) __extension__ ({ \ + float64_t __s0_685 = __p0_685; \ + float64x2_t __s1_685 = __p1_685; \ + float64_t __ret_685; \ + __ret_685 = vmulxd_f64(__s0_685, vgetq_lane_f64(__s1_685, __p2_685)); \ + __ret_685; \ }) #else -#define vmulxd_laneq_f64(__p0_594, __p1_594, __p2_594) __extension__ ({ \ - float64_t __s0_594 = __p0_594; \ - float64x2_t __s1_594 = __p1_594; \ - float64x2_t __rev1_594; __rev1_594 = __builtin_shufflevector(__s1_594, __s1_594, 1, 0); \ - float64_t __ret_594; \ - __ret_594 = vmulxd_f64(__s0_594, __noswap_vgetq_lane_f64(__rev1_594, __p2_594)); \ - __ret_594; \ +#define vmulxd_laneq_f64(__p0_686, __p1_686, __p2_686) __extension__ ({ \ + float64_t __s0_686 = __p0_686; \ + float64x2_t __s1_686 = __p1_686; \ + float64x2_t __rev1_686; __rev1_686 = __builtin_shufflevector(__s1_686, __s1_686, 1, 0); \ + float64_t __ret_686; \ + __ret_686 = vmulxd_f64(__s0_686, __noswap_vgetq_lane_f64(__rev1_686, __p2_686)); \ + __ret_686; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxs_laneq_f32(__p0_595, __p1_595, __p2_595) __extension__ ({ \ - float32_t __s0_595 = __p0_595; \ - float32x4_t __s1_595 = __p1_595; \ - float32_t __ret_595; \ - __ret_595 = vmulxs_f32(__s0_595, vgetq_lane_f32(__s1_595, __p2_595)); \ - __ret_595; \ +#define vmulxs_laneq_f32(__p0_687, __p1_687, __p2_687) __extension__ ({ \ + float32_t __s0_687 = __p0_687; \ + float32x4_t __s1_687 = __p1_687; \ + float32_t __ret_687; \ + __ret_687 = vmulxs_f32(__s0_687, vgetq_lane_f32(__s1_687, __p2_687)); \ + __ret_687; \ }) #else -#define vmulxs_laneq_f32(__p0_596, __p1_596, __p2_596) __extension__ ({ \ - float32_t __s0_596 = __p0_596; \ - float32x4_t __s1_596 = __p1_596; \ - float32x4_t __rev1_596; __rev1_596 = __builtin_shufflevector(__s1_596, __s1_596, 3, 2, 1, 0); \ - float32_t __ret_596; \ - __ret_596 = vmulxs_f32(__s0_596, __noswap_vgetq_lane_f32(__rev1_596, __p2_596)); \ - __ret_596; \ +#define vmulxs_laneq_f32(__p0_688, __p1_688, __p2_688) __extension__ ({ \ + float32_t __s0_688 = __p0_688; \ + float32x4_t __s1_688 = __p1_688; \ + float32x4_t __rev1_688; __rev1_688 = __builtin_shufflevector(__s1_688, __s1_688, 3, 2, 1, 0); \ + float32_t __ret_688; \ + __ret_688 = vmulxs_f32(__s0_688, __noswap_vgetq_lane_f32(__rev1_688, __p2_688)); \ + __ret_688; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f64(__p0_597, __p1_597, __p2_597) __extension__ ({ \ - float64x2_t __s0_597 = __p0_597; \ - float64x2_t __s1_597 = __p1_597; \ - float64x2_t __ret_597; \ - __ret_597 = vmulxq_f64(__s0_597, splatq_laneq_f64(__s1_597, __p2_597)); \ - __ret_597; \ +#define vmulxq_laneq_f64(__p0_689, __p1_689, __p2_689) __extension__ ({ \ + float64x2_t __s0_689 = __p0_689; \ + float64x2_t __s1_689 = __p1_689; \ + float64x2_t __ret_689; \ + __ret_689 = vmulxq_f64(__s0_689, splatq_laneq_f64(__s1_689, __p2_689)); \ + __ret_689; \ }) #else -#define vmulxq_laneq_f64(__p0_598, __p1_598, __p2_598) __extension__ ({ \ - float64x2_t __s0_598 = __p0_598; \ - float64x2_t __s1_598 = __p1_598; \ - float64x2_t __rev0_598; __rev0_598 = __builtin_shufflevector(__s0_598, __s0_598, 1, 0); \ - float64x2_t __rev1_598; __rev1_598 = __builtin_shufflevector(__s1_598, __s1_598, 1, 0); \ - float64x2_t __ret_598; \ - __ret_598 = __noswap_vmulxq_f64(__rev0_598, __noswap_splatq_laneq_f64(__rev1_598, __p2_598)); \ - __ret_598 = __builtin_shufflevector(__ret_598, __ret_598, 1, 0); \ - __ret_598; \ +#define vmulxq_laneq_f64(__p0_690, __p1_690, __p2_690) __extension__ ({ \ + float64x2_t __s0_690 = __p0_690; \ + float64x2_t __s1_690 = __p1_690; \ + float64x2_t __rev0_690; __rev0_690 = __builtin_shufflevector(__s0_690, __s0_690, 1, 0); \ + float64x2_t __rev1_690; __rev1_690 = __builtin_shufflevector(__s1_690, __s1_690, 1, 0); \ + float64x2_t __ret_690; \ + __ret_690 = __noswap_vmulxq_f64(__rev0_690, __noswap_splatq_laneq_f64(__rev1_690, __p2_690)); \ + __ret_690 = __builtin_shufflevector(__ret_690, __ret_690, 1, 0); \ + __ret_690; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulxq_laneq_f32(__p0_599, __p1_599, __p2_599) __extension__ ({ \ - float32x4_t __s0_599 = __p0_599; \ - float32x4_t __s1_599 = __p1_599; \ - float32x4_t __ret_599; \ - __ret_599 = vmulxq_f32(__s0_599, splatq_laneq_f32(__s1_599, __p2_599)); \ - __ret_599; \ +#define vmulxq_laneq_f32(__p0_691, __p1_691, __p2_691) __extension__ ({ \ + float32x4_t __s0_691 = __p0_691; \ + float32x4_t __s1_691 = __p1_691; \ + float32x4_t __ret_691; \ + __ret_691 = vmulxq_f32(__s0_691, splatq_laneq_f32(__s1_691, __p2_691)); \ + __ret_691; \ }) #else -#define vmulxq_laneq_f32(__p0_600, __p1_600, __p2_600) __extension__ ({ \ - float32x4_t __s0_600 = __p0_600; \ - float32x4_t __s1_600 = __p1_600; \ - float32x4_t __rev0_600; __rev0_600 = __builtin_shufflevector(__s0_600, __s0_600, 3, 2, 1, 0); \ - float32x4_t __rev1_600; __rev1_600 = __builtin_shufflevector(__s1_600, __s1_600, 3, 2, 1, 0); \ - float32x4_t __ret_600; \ - __ret_600 = __noswap_vmulxq_f32(__rev0_600, __noswap_splatq_laneq_f32(__rev1_600, __p2_600)); \ - __ret_600 = __builtin_shufflevector(__ret_600, __ret_600, 3, 2, 1, 0); \ - __ret_600; \ +#define vmulxq_laneq_f32(__p0_692, __p1_692, __p2_692) __extension__ ({ \ + float32x4_t __s0_692 = __p0_692; \ + float32x4_t __s1_692 = __p1_692; \ + float32x4_t __rev0_692; __rev0_692 = __builtin_shufflevector(__s0_692, __s0_692, 3, 2, 1, 0); \ + float32x4_t __rev1_692; __rev1_692 = __builtin_shufflevector(__s1_692, __s1_692, 3, 2, 1, 0); \ + float32x4_t __ret_692; \ + __ret_692 = __noswap_vmulxq_f32(__rev0_692, __noswap_splatq_laneq_f32(__rev1_692, __p2_692)); \ + __ret_692 = __builtin_shufflevector(__ret_692, __ret_692, 3, 2, 1, 0); \ + __ret_692; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f32(__p0_601, __p1_601, __p2_601) __extension__ ({ \ - float32x2_t __s0_601 = __p0_601; \ - float32x4_t __s1_601 = __p1_601; \ - float32x2_t __ret_601; \ - __ret_601 = vmulx_f32(__s0_601, splat_laneq_f32(__s1_601, __p2_601)); \ - __ret_601; \ +#define vmulx_laneq_f32(__p0_693, __p1_693, __p2_693) __extension__ ({ \ + float32x2_t __s0_693 = __p0_693; \ + float32x4_t __s1_693 = __p1_693; \ + float32x2_t __ret_693; \ + __ret_693 = vmulx_f32(__s0_693, splat_laneq_f32(__s1_693, __p2_693)); \ + __ret_693; \ }) #else -#define vmulx_laneq_f32(__p0_602, __p1_602, __p2_602) __extension__ ({ \ - float32x2_t __s0_602 = __p0_602; \ - float32x4_t __s1_602 = __p1_602; \ - float32x2_t __rev0_602; __rev0_602 = __builtin_shufflevector(__s0_602, __s0_602, 1, 0); \ - float32x4_t __rev1_602; __rev1_602 = __builtin_shufflevector(__s1_602, __s1_602, 3, 2, 1, 0); \ - float32x2_t __ret_602; \ - __ret_602 = __noswap_vmulx_f32(__rev0_602, __noswap_splat_laneq_f32(__rev1_602, __p2_602)); \ - __ret_602 = __builtin_shufflevector(__ret_602, __ret_602, 1, 0); \ - __ret_602; \ +#define vmulx_laneq_f32(__p0_694, __p1_694, __p2_694) __extension__ ({ \ + float32x2_t __s0_694 = __p0_694; \ + float32x4_t __s1_694 = __p1_694; \ + float32x2_t __rev0_694; __rev0_694 = __builtin_shufflevector(__s0_694, __s0_694, 1, 0); \ + float32x4_t __rev1_694; __rev1_694 = __builtin_shufflevector(__s1_694, __s1_694, 3, 2, 1, 0); \ + float32x2_t __ret_694; \ + __ret_694 = __noswap_vmulx_f32(__rev0_694, __noswap_splat_laneq_f32(__rev1_694, __p2_694)); \ + __ret_694 = __builtin_shufflevector(__ret_694, __ret_694, 1, 0); \ + __ret_694; \ }) #endif @@ -55955,98 +57625,98 @@ __ai int32x4_t vqdmlal_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s32(__p0_603, __p1_603, __p2_603, __p3_603) __extension__ ({ \ - int64x2_t __s0_603 = __p0_603; \ - int32x4_t __s1_603 = __p1_603; \ - int32x2_t __s2_603 = __p2_603; \ - int64x2_t __ret_603; \ - __ret_603 = vqdmlal_s32(__s0_603, vget_high_s32(__s1_603), splat_lane_s32(__s2_603, __p3_603)); \ - __ret_603; \ +#define vqdmlal_high_lane_s32(__p0_695, __p1_695, __p2_695, __p3_695) __extension__ ({ \ + int64x2_t __s0_695 = __p0_695; \ + int32x4_t __s1_695 = __p1_695; \ + int32x2_t __s2_695 = __p2_695; \ + int64x2_t __ret_695; \ + __ret_695 = vqdmlal_s32(__s0_695, vget_high_s32(__s1_695), splat_lane_s32(__s2_695, __p3_695)); \ + __ret_695; \ }) #else -#define vqdmlal_high_lane_s32(__p0_604, __p1_604, __p2_604, __p3_604) __extension__ ({ \ - int64x2_t __s0_604 = __p0_604; \ - int32x4_t __s1_604 = __p1_604; \ - int32x2_t __s2_604 = __p2_604; \ - int64x2_t __rev0_604; __rev0_604 = __builtin_shufflevector(__s0_604, __s0_604, 1, 0); \ - int32x4_t __rev1_604; __rev1_604 = __builtin_shufflevector(__s1_604, __s1_604, 3, 2, 1, 0); \ - int32x2_t __rev2_604; __rev2_604 = __builtin_shufflevector(__s2_604, __s2_604, 1, 0); \ - int64x2_t __ret_604; \ - __ret_604 = __noswap_vqdmlal_s32(__rev0_604, __noswap_vget_high_s32(__rev1_604), __noswap_splat_lane_s32(__rev2_604, __p3_604)); \ - __ret_604 = __builtin_shufflevector(__ret_604, __ret_604, 1, 0); \ - __ret_604; \ +#define vqdmlal_high_lane_s32(__p0_696, __p1_696, __p2_696, __p3_696) __extension__ ({ \ + int64x2_t __s0_696 = __p0_696; \ + int32x4_t __s1_696 = __p1_696; \ + int32x2_t __s2_696 = __p2_696; \ + int64x2_t __rev0_696; __rev0_696 = __builtin_shufflevector(__s0_696, __s0_696, 1, 0); \ + int32x4_t __rev1_696; __rev1_696 = __builtin_shufflevector(__s1_696, __s1_696, 3, 2, 1, 0); \ + int32x2_t __rev2_696; __rev2_696 = __builtin_shufflevector(__s2_696, __s2_696, 1, 0); \ + int64x2_t __ret_696; \ + __ret_696 = __noswap_vqdmlal_s32(__rev0_696, __noswap_vget_high_s32(__rev1_696), __noswap_splat_lane_s32(__rev2_696, __p3_696)); \ + __ret_696 = __builtin_shufflevector(__ret_696, __ret_696, 1, 0); \ + __ret_696; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_lane_s16(__p0_605, __p1_605, __p2_605, __p3_605) __extension__ ({ \ - int32x4_t __s0_605 = __p0_605; \ - int16x8_t __s1_605 = __p1_605; \ - int16x4_t __s2_605 = __p2_605; \ - int32x4_t __ret_605; \ - __ret_605 = vqdmlal_s16(__s0_605, vget_high_s16(__s1_605), splat_lane_s16(__s2_605, __p3_605)); \ - __ret_605; \ +#define vqdmlal_high_lane_s16(__p0_697, __p1_697, __p2_697, __p3_697) __extension__ ({ \ + int32x4_t __s0_697 = __p0_697; \ + int16x8_t __s1_697 = __p1_697; \ + int16x4_t __s2_697 = __p2_697; \ + int32x4_t __ret_697; \ + __ret_697 = vqdmlal_s16(__s0_697, vget_high_s16(__s1_697), splat_lane_s16(__s2_697, __p3_697)); \ + __ret_697; \ }) #else -#define vqdmlal_high_lane_s16(__p0_606, __p1_606, __p2_606, __p3_606) __extension__ ({ \ - int32x4_t __s0_606 = __p0_606; \ - int16x8_t __s1_606 = __p1_606; \ - int16x4_t __s2_606 = __p2_606; \ - int32x4_t __rev0_606; __rev0_606 = __builtin_shufflevector(__s0_606, __s0_606, 3, 2, 1, 0); \ - int16x8_t __rev1_606; __rev1_606 = __builtin_shufflevector(__s1_606, __s1_606, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_606; __rev2_606 = __builtin_shufflevector(__s2_606, __s2_606, 3, 2, 1, 0); \ - int32x4_t __ret_606; \ - __ret_606 = __noswap_vqdmlal_s16(__rev0_606, __noswap_vget_high_s16(__rev1_606), __noswap_splat_lane_s16(__rev2_606, __p3_606)); \ - __ret_606 = __builtin_shufflevector(__ret_606, __ret_606, 3, 2, 1, 0); \ - __ret_606; \ +#define vqdmlal_high_lane_s16(__p0_698, __p1_698, __p2_698, __p3_698) __extension__ ({ \ + int32x4_t __s0_698 = __p0_698; \ + int16x8_t __s1_698 = __p1_698; \ + int16x4_t __s2_698 = __p2_698; \ + int32x4_t __rev0_698; __rev0_698 = __builtin_shufflevector(__s0_698, __s0_698, 3, 2, 1, 0); \ + int16x8_t __rev1_698; __rev1_698 = __builtin_shufflevector(__s1_698, __s1_698, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_698; __rev2_698 = __builtin_shufflevector(__s2_698, __s2_698, 3, 2, 1, 0); \ + int32x4_t __ret_698; \ + __ret_698 = __noswap_vqdmlal_s16(__rev0_698, __noswap_vget_high_s16(__rev1_698), __noswap_splat_lane_s16(__rev2_698, __p3_698)); \ + __ret_698 = __builtin_shufflevector(__ret_698, __ret_698, 3, 2, 1, 0); \ + __ret_698; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s32(__p0_607, __p1_607, __p2_607, __p3_607) __extension__ ({ \ - int64x2_t __s0_607 = __p0_607; \ - int32x4_t __s1_607 = __p1_607; \ - int32x4_t __s2_607 = __p2_607; \ - int64x2_t __ret_607; \ - __ret_607 = vqdmlal_s32(__s0_607, vget_high_s32(__s1_607), splat_laneq_s32(__s2_607, __p3_607)); \ - __ret_607; \ +#define vqdmlal_high_laneq_s32(__p0_699, __p1_699, __p2_699, __p3_699) __extension__ ({ \ + int64x2_t __s0_699 = __p0_699; \ + int32x4_t __s1_699 = __p1_699; \ + int32x4_t __s2_699 = __p2_699; \ + int64x2_t __ret_699; \ + __ret_699 = vqdmlal_s32(__s0_699, vget_high_s32(__s1_699), splat_laneq_s32(__s2_699, __p3_699)); \ + __ret_699; \ }) #else -#define vqdmlal_high_laneq_s32(__p0_608, __p1_608, __p2_608, __p3_608) __extension__ ({ \ - int64x2_t __s0_608 = __p0_608; \ - int32x4_t __s1_608 = __p1_608; \ - int32x4_t __s2_608 = __p2_608; \ - int64x2_t __rev0_608; __rev0_608 = __builtin_shufflevector(__s0_608, __s0_608, 1, 0); \ - int32x4_t __rev1_608; __rev1_608 = __builtin_shufflevector(__s1_608, __s1_608, 3, 2, 1, 0); \ - int32x4_t __rev2_608; __rev2_608 = __builtin_shufflevector(__s2_608, __s2_608, 3, 2, 1, 0); \ - int64x2_t __ret_608; \ - __ret_608 = __noswap_vqdmlal_s32(__rev0_608, __noswap_vget_high_s32(__rev1_608), __noswap_splat_laneq_s32(__rev2_608, __p3_608)); \ - __ret_608 = __builtin_shufflevector(__ret_608, __ret_608, 1, 0); \ - __ret_608; \ +#define vqdmlal_high_laneq_s32(__p0_700, __p1_700, __p2_700, __p3_700) __extension__ ({ \ + int64x2_t __s0_700 = __p0_700; \ + int32x4_t __s1_700 = __p1_700; \ + int32x4_t __s2_700 = __p2_700; \ + int64x2_t __rev0_700; __rev0_700 = __builtin_shufflevector(__s0_700, __s0_700, 1, 0); \ + int32x4_t __rev1_700; __rev1_700 = __builtin_shufflevector(__s1_700, __s1_700, 3, 2, 1, 0); \ + int32x4_t __rev2_700; __rev2_700 = __builtin_shufflevector(__s2_700, __s2_700, 3, 2, 1, 0); \ + int64x2_t __ret_700; \ + __ret_700 = __noswap_vqdmlal_s32(__rev0_700, __noswap_vget_high_s32(__rev1_700), __noswap_splat_laneq_s32(__rev2_700, __p3_700)); \ + __ret_700 = __builtin_shufflevector(__ret_700, __ret_700, 1, 0); \ + __ret_700; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_high_laneq_s16(__p0_609, __p1_609, __p2_609, __p3_609) __extension__ ({ \ - int32x4_t __s0_609 = __p0_609; \ - int16x8_t __s1_609 = __p1_609; \ - int16x8_t __s2_609 = __p2_609; \ - int32x4_t __ret_609; \ - __ret_609 = vqdmlal_s16(__s0_609, vget_high_s16(__s1_609), splat_laneq_s16(__s2_609, __p3_609)); \ - __ret_609; \ +#define vqdmlal_high_laneq_s16(__p0_701, __p1_701, __p2_701, __p3_701) __extension__ ({ \ + int32x4_t __s0_701 = __p0_701; \ + int16x8_t __s1_701 = __p1_701; \ + int16x8_t __s2_701 = __p2_701; \ + int32x4_t __ret_701; \ + __ret_701 = vqdmlal_s16(__s0_701, vget_high_s16(__s1_701), splat_laneq_s16(__s2_701, __p3_701)); \ + __ret_701; \ }) #else -#define vqdmlal_high_laneq_s16(__p0_610, __p1_610, __p2_610, __p3_610) __extension__ ({ \ - int32x4_t __s0_610 = __p0_610; \ - int16x8_t __s1_610 = __p1_610; \ - int16x8_t __s2_610 = __p2_610; \ - int32x4_t __rev0_610; __rev0_610 = __builtin_shufflevector(__s0_610, __s0_610, 3, 2, 1, 0); \ - int16x8_t __rev1_610; __rev1_610 = __builtin_shufflevector(__s1_610, __s1_610, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_610; __rev2_610 = __builtin_shufflevector(__s2_610, __s2_610, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_610; \ - __ret_610 = __noswap_vqdmlal_s16(__rev0_610, __noswap_vget_high_s16(__rev1_610), __noswap_splat_laneq_s16(__rev2_610, __p3_610)); \ - __ret_610 = __builtin_shufflevector(__ret_610, __ret_610, 3, 2, 1, 0); \ - __ret_610; \ +#define vqdmlal_high_laneq_s16(__p0_702, __p1_702, __p2_702, __p3_702) __extension__ ({ \ + int32x4_t __s0_702 = __p0_702; \ + int16x8_t __s1_702 = __p1_702; \ + int16x8_t __s2_702 = __p2_702; \ + int32x4_t __rev0_702; __rev0_702 = __builtin_shufflevector(__s0_702, __s0_702, 3, 2, 1, 0); \ + int16x8_t __rev1_702; __rev1_702 = __builtin_shufflevector(__s1_702, __s1_702, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_702; __rev2_702 = __builtin_shufflevector(__s2_702, __s2_702, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_702; \ + __ret_702 = __noswap_vqdmlal_s16(__rev0_702, __noswap_vget_high_s16(__rev1_702), __noswap_splat_laneq_s16(__rev2_702, __p3_702)); \ + __ret_702 = __builtin_shufflevector(__ret_702, __ret_702, 3, 2, 1, 0); \ + __ret_702; \ }) #endif @@ -56169,50 +57839,50 @@ __ai int32x4_t vqdmlal_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s32(__p0_611, __p1_611, __p2_611, __p3_611) __extension__ ({ \ - int64x2_t __s0_611 = __p0_611; \ - int32x2_t __s1_611 = __p1_611; \ - int32x4_t __s2_611 = __p2_611; \ - int64x2_t __ret_611; \ - __ret_611 = vqdmlal_s32(__s0_611, __s1_611, splat_laneq_s32(__s2_611, __p3_611)); \ - __ret_611; \ +#define vqdmlal_laneq_s32(__p0_703, __p1_703, __p2_703, __p3_703) __extension__ ({ \ + int64x2_t __s0_703 = __p0_703; \ + int32x2_t __s1_703 = __p1_703; \ + int32x4_t __s2_703 = __p2_703; \ + int64x2_t __ret_703; \ + __ret_703 = vqdmlal_s32(__s0_703, __s1_703, splat_laneq_s32(__s2_703, __p3_703)); \ + __ret_703; \ }) #else -#define vqdmlal_laneq_s32(__p0_612, __p1_612, __p2_612, __p3_612) __extension__ ({ \ - int64x2_t __s0_612 = __p0_612; \ - int32x2_t __s1_612 = __p1_612; \ - int32x4_t __s2_612 = __p2_612; \ - int64x2_t __rev0_612; __rev0_612 = __builtin_shufflevector(__s0_612, __s0_612, 1, 0); \ - int32x2_t __rev1_612; __rev1_612 = __builtin_shufflevector(__s1_612, __s1_612, 1, 0); \ - int32x4_t __rev2_612; __rev2_612 = __builtin_shufflevector(__s2_612, __s2_612, 3, 2, 1, 0); \ - int64x2_t __ret_612; \ - __ret_612 = __noswap_vqdmlal_s32(__rev0_612, __rev1_612, __noswap_splat_laneq_s32(__rev2_612, __p3_612)); \ - __ret_612 = __builtin_shufflevector(__ret_612, __ret_612, 1, 0); \ - __ret_612; \ +#define vqdmlal_laneq_s32(__p0_704, __p1_704, __p2_704, __p3_704) __extension__ ({ \ + int64x2_t __s0_704 = __p0_704; \ + int32x2_t __s1_704 = __p1_704; \ + int32x4_t __s2_704 = __p2_704; \ + int64x2_t __rev0_704; __rev0_704 = __builtin_shufflevector(__s0_704, __s0_704, 1, 0); \ + int32x2_t __rev1_704; __rev1_704 = __builtin_shufflevector(__s1_704, __s1_704, 1, 0); \ + int32x4_t __rev2_704; __rev2_704 = __builtin_shufflevector(__s2_704, __s2_704, 3, 2, 1, 0); \ + int64x2_t __ret_704; \ + __ret_704 = __noswap_vqdmlal_s32(__rev0_704, __rev1_704, __noswap_splat_laneq_s32(__rev2_704, __p3_704)); \ + __ret_704 = __builtin_shufflevector(__ret_704, __ret_704, 1, 0); \ + __ret_704; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlal_laneq_s16(__p0_613, __p1_613, __p2_613, __p3_613) __extension__ ({ \ - int32x4_t __s0_613 = __p0_613; \ - int16x4_t __s1_613 = __p1_613; \ - int16x8_t __s2_613 = __p2_613; \ - int32x4_t __ret_613; \ - __ret_613 = vqdmlal_s16(__s0_613, __s1_613, splat_laneq_s16(__s2_613, __p3_613)); \ - __ret_613; \ +#define vqdmlal_laneq_s16(__p0_705, __p1_705, __p2_705, __p3_705) __extension__ ({ \ + int32x4_t __s0_705 = __p0_705; \ + int16x4_t __s1_705 = __p1_705; \ + int16x8_t __s2_705 = __p2_705; \ + int32x4_t __ret_705; \ + __ret_705 = vqdmlal_s16(__s0_705, __s1_705, splat_laneq_s16(__s2_705, __p3_705)); \ + __ret_705; \ }) #else -#define vqdmlal_laneq_s16(__p0_614, __p1_614, __p2_614, __p3_614) __extension__ ({ \ - int32x4_t __s0_614 = __p0_614; \ - int16x4_t __s1_614 = __p1_614; \ - int16x8_t __s2_614 = __p2_614; \ - int32x4_t __rev0_614; __rev0_614 = __builtin_shufflevector(__s0_614, __s0_614, 3, 2, 1, 0); \ - int16x4_t __rev1_614; __rev1_614 = __builtin_shufflevector(__s1_614, __s1_614, 3, 2, 1, 0); \ - int16x8_t __rev2_614; __rev2_614 = __builtin_shufflevector(__s2_614, __s2_614, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_614; \ - __ret_614 = __noswap_vqdmlal_s16(__rev0_614, __rev1_614, __noswap_splat_laneq_s16(__rev2_614, __p3_614)); \ - __ret_614 = __builtin_shufflevector(__ret_614, __ret_614, 3, 2, 1, 0); \ - __ret_614; \ +#define vqdmlal_laneq_s16(__p0_706, __p1_706, __p2_706, __p3_706) __extension__ ({ \ + int32x4_t __s0_706 = __p0_706; \ + int16x4_t __s1_706 = __p1_706; \ + int16x8_t __s2_706 = __p2_706; \ + int32x4_t __rev0_706; __rev0_706 = __builtin_shufflevector(__s0_706, __s0_706, 3, 2, 1, 0); \ + int16x4_t __rev1_706; __rev1_706 = __builtin_shufflevector(__s1_706, __s1_706, 3, 2, 1, 0); \ + int16x8_t __rev2_706; __rev2_706 = __builtin_shufflevector(__s2_706, __s2_706, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_706; \ + __ret_706 = __noswap_vqdmlal_s16(__rev0_706, __rev1_706, __noswap_splat_laneq_s16(__rev2_706, __p3_706)); \ + __ret_706 = __builtin_shufflevector(__ret_706, __ret_706, 3, 2, 1, 0); \ + __ret_706; \ }) #endif @@ -56263,98 +57933,98 @@ __ai int32x4_t vqdmlsl_high_s16(int32x4_t __p0, int16x8_t __p1, int16x8_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s32(__p0_615, __p1_615, __p2_615, __p3_615) __extension__ ({ \ - int64x2_t __s0_615 = __p0_615; \ - int32x4_t __s1_615 = __p1_615; \ - int32x2_t __s2_615 = __p2_615; \ - int64x2_t __ret_615; \ - __ret_615 = vqdmlsl_s32(__s0_615, vget_high_s32(__s1_615), splat_lane_s32(__s2_615, __p3_615)); \ - __ret_615; \ +#define vqdmlsl_high_lane_s32(__p0_707, __p1_707, __p2_707, __p3_707) __extension__ ({ \ + int64x2_t __s0_707 = __p0_707; \ + int32x4_t __s1_707 = __p1_707; \ + int32x2_t __s2_707 = __p2_707; \ + int64x2_t __ret_707; \ + __ret_707 = vqdmlsl_s32(__s0_707, vget_high_s32(__s1_707), splat_lane_s32(__s2_707, __p3_707)); \ + __ret_707; \ }) #else -#define vqdmlsl_high_lane_s32(__p0_616, __p1_616, __p2_616, __p3_616) __extension__ ({ \ - int64x2_t __s0_616 = __p0_616; \ - int32x4_t __s1_616 = __p1_616; \ - int32x2_t __s2_616 = __p2_616; \ - int64x2_t __rev0_616; __rev0_616 = __builtin_shufflevector(__s0_616, __s0_616, 1, 0); \ - int32x4_t __rev1_616; __rev1_616 = __builtin_shufflevector(__s1_616, __s1_616, 3, 2, 1, 0); \ - int32x2_t __rev2_616; __rev2_616 = __builtin_shufflevector(__s2_616, __s2_616, 1, 0); \ - int64x2_t __ret_616; \ - __ret_616 = __noswap_vqdmlsl_s32(__rev0_616, __noswap_vget_high_s32(__rev1_616), __noswap_splat_lane_s32(__rev2_616, __p3_616)); \ - __ret_616 = __builtin_shufflevector(__ret_616, __ret_616, 1, 0); \ - __ret_616; \ +#define vqdmlsl_high_lane_s32(__p0_708, __p1_708, __p2_708, __p3_708) __extension__ ({ \ + int64x2_t __s0_708 = __p0_708; \ + int32x4_t __s1_708 = __p1_708; \ + int32x2_t __s2_708 = __p2_708; \ + int64x2_t __rev0_708; __rev0_708 = __builtin_shufflevector(__s0_708, __s0_708, 1, 0); \ + int32x4_t __rev1_708; __rev1_708 = __builtin_shufflevector(__s1_708, __s1_708, 3, 2, 1, 0); \ + int32x2_t __rev2_708; __rev2_708 = __builtin_shufflevector(__s2_708, __s2_708, 1, 0); \ + int64x2_t __ret_708; \ + __ret_708 = __noswap_vqdmlsl_s32(__rev0_708, __noswap_vget_high_s32(__rev1_708), __noswap_splat_lane_s32(__rev2_708, __p3_708)); \ + __ret_708 = __builtin_shufflevector(__ret_708, __ret_708, 1, 0); \ + __ret_708; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_lane_s16(__p0_617, __p1_617, __p2_617, __p3_617) __extension__ ({ \ - int32x4_t __s0_617 = __p0_617; \ - int16x8_t __s1_617 = __p1_617; \ - int16x4_t __s2_617 = __p2_617; \ - int32x4_t __ret_617; \ - __ret_617 = vqdmlsl_s16(__s0_617, vget_high_s16(__s1_617), splat_lane_s16(__s2_617, __p3_617)); \ - __ret_617; \ +#define vqdmlsl_high_lane_s16(__p0_709, __p1_709, __p2_709, __p3_709) __extension__ ({ \ + int32x4_t __s0_709 = __p0_709; \ + int16x8_t __s1_709 = __p1_709; \ + int16x4_t __s2_709 = __p2_709; \ + int32x4_t __ret_709; \ + __ret_709 = vqdmlsl_s16(__s0_709, vget_high_s16(__s1_709), splat_lane_s16(__s2_709, __p3_709)); \ + __ret_709; \ }) #else -#define vqdmlsl_high_lane_s16(__p0_618, __p1_618, __p2_618, __p3_618) __extension__ ({ \ - int32x4_t __s0_618 = __p0_618; \ - int16x8_t __s1_618 = __p1_618; \ - int16x4_t __s2_618 = __p2_618; \ - int32x4_t __rev0_618; __rev0_618 = __builtin_shufflevector(__s0_618, __s0_618, 3, 2, 1, 0); \ - int16x8_t __rev1_618; __rev1_618 = __builtin_shufflevector(__s1_618, __s1_618, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev2_618; __rev2_618 = __builtin_shufflevector(__s2_618, __s2_618, 3, 2, 1, 0); \ - int32x4_t __ret_618; \ - __ret_618 = __noswap_vqdmlsl_s16(__rev0_618, __noswap_vget_high_s16(__rev1_618), __noswap_splat_lane_s16(__rev2_618, __p3_618)); \ - __ret_618 = __builtin_shufflevector(__ret_618, __ret_618, 3, 2, 1, 0); \ - __ret_618; \ +#define vqdmlsl_high_lane_s16(__p0_710, __p1_710, __p2_710, __p3_710) __extension__ ({ \ + int32x4_t __s0_710 = __p0_710; \ + int16x8_t __s1_710 = __p1_710; \ + int16x4_t __s2_710 = __p2_710; \ + int32x4_t __rev0_710; __rev0_710 = __builtin_shufflevector(__s0_710, __s0_710, 3, 2, 1, 0); \ + int16x8_t __rev1_710; __rev1_710 = __builtin_shufflevector(__s1_710, __s1_710, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev2_710; __rev2_710 = __builtin_shufflevector(__s2_710, __s2_710, 3, 2, 1, 0); \ + int32x4_t __ret_710; \ + __ret_710 = __noswap_vqdmlsl_s16(__rev0_710, __noswap_vget_high_s16(__rev1_710), __noswap_splat_lane_s16(__rev2_710, __p3_710)); \ + __ret_710 = __builtin_shufflevector(__ret_710, __ret_710, 3, 2, 1, 0); \ + __ret_710; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s32(__p0_619, __p1_619, __p2_619, __p3_619) __extension__ ({ \ - int64x2_t __s0_619 = __p0_619; \ - int32x4_t __s1_619 = __p1_619; \ - int32x4_t __s2_619 = __p2_619; \ - int64x2_t __ret_619; \ - __ret_619 = vqdmlsl_s32(__s0_619, vget_high_s32(__s1_619), splat_laneq_s32(__s2_619, __p3_619)); \ - __ret_619; \ +#define vqdmlsl_high_laneq_s32(__p0_711, __p1_711, __p2_711, __p3_711) __extension__ ({ \ + int64x2_t __s0_711 = __p0_711; \ + int32x4_t __s1_711 = __p1_711; \ + int32x4_t __s2_711 = __p2_711; \ + int64x2_t __ret_711; \ + __ret_711 = vqdmlsl_s32(__s0_711, vget_high_s32(__s1_711), splat_laneq_s32(__s2_711, __p3_711)); \ + __ret_711; \ }) #else -#define vqdmlsl_high_laneq_s32(__p0_620, __p1_620, __p2_620, __p3_620) __extension__ ({ \ - int64x2_t __s0_620 = __p0_620; \ - int32x4_t __s1_620 = __p1_620; \ - int32x4_t __s2_620 = __p2_620; \ - int64x2_t __rev0_620; __rev0_620 = __builtin_shufflevector(__s0_620, __s0_620, 1, 0); \ - int32x4_t __rev1_620; __rev1_620 = __builtin_shufflevector(__s1_620, __s1_620, 3, 2, 1, 0); \ - int32x4_t __rev2_620; __rev2_620 = __builtin_shufflevector(__s2_620, __s2_620, 3, 2, 1, 0); \ - int64x2_t __ret_620; \ - __ret_620 = __noswap_vqdmlsl_s32(__rev0_620, __noswap_vget_high_s32(__rev1_620), __noswap_splat_laneq_s32(__rev2_620, __p3_620)); \ - __ret_620 = __builtin_shufflevector(__ret_620, __ret_620, 1, 0); \ - __ret_620; \ +#define vqdmlsl_high_laneq_s32(__p0_712, __p1_712, __p2_712, __p3_712) __extension__ ({ \ + int64x2_t __s0_712 = __p0_712; \ + int32x4_t __s1_712 = __p1_712; \ + int32x4_t __s2_712 = __p2_712; \ + int64x2_t __rev0_712; __rev0_712 = __builtin_shufflevector(__s0_712, __s0_712, 1, 0); \ + int32x4_t __rev1_712; __rev1_712 = __builtin_shufflevector(__s1_712, __s1_712, 3, 2, 1, 0); \ + int32x4_t __rev2_712; __rev2_712 = __builtin_shufflevector(__s2_712, __s2_712, 3, 2, 1, 0); \ + int64x2_t __ret_712; \ + __ret_712 = __noswap_vqdmlsl_s32(__rev0_712, __noswap_vget_high_s32(__rev1_712), __noswap_splat_laneq_s32(__rev2_712, __p3_712)); \ + __ret_712 = __builtin_shufflevector(__ret_712, __ret_712, 1, 0); \ + __ret_712; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_high_laneq_s16(__p0_621, __p1_621, __p2_621, __p3_621) __extension__ ({ \ - int32x4_t __s0_621 = __p0_621; \ - int16x8_t __s1_621 = __p1_621; \ - int16x8_t __s2_621 = __p2_621; \ - int32x4_t __ret_621; \ - __ret_621 = vqdmlsl_s16(__s0_621, vget_high_s16(__s1_621), splat_laneq_s16(__s2_621, __p3_621)); \ - __ret_621; \ +#define vqdmlsl_high_laneq_s16(__p0_713, __p1_713, __p2_713, __p3_713) __extension__ ({ \ + int32x4_t __s0_713 = __p0_713; \ + int16x8_t __s1_713 = __p1_713; \ + int16x8_t __s2_713 = __p2_713; \ + int32x4_t __ret_713; \ + __ret_713 = vqdmlsl_s16(__s0_713, vget_high_s16(__s1_713), splat_laneq_s16(__s2_713, __p3_713)); \ + __ret_713; \ }) #else -#define vqdmlsl_high_laneq_s16(__p0_622, __p1_622, __p2_622, __p3_622) __extension__ ({ \ - int32x4_t __s0_622 = __p0_622; \ - int16x8_t __s1_622 = __p1_622; \ - int16x8_t __s2_622 = __p2_622; \ - int32x4_t __rev0_622; __rev0_622 = __builtin_shufflevector(__s0_622, __s0_622, 3, 2, 1, 0); \ - int16x8_t __rev1_622; __rev1_622 = __builtin_shufflevector(__s1_622, __s1_622, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev2_622; __rev2_622 = __builtin_shufflevector(__s2_622, __s2_622, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_622; \ - __ret_622 = __noswap_vqdmlsl_s16(__rev0_622, __noswap_vget_high_s16(__rev1_622), __noswap_splat_laneq_s16(__rev2_622, __p3_622)); \ - __ret_622 = __builtin_shufflevector(__ret_622, __ret_622, 3, 2, 1, 0); \ - __ret_622; \ +#define vqdmlsl_high_laneq_s16(__p0_714, __p1_714, __p2_714, __p3_714) __extension__ ({ \ + int32x4_t __s0_714 = __p0_714; \ + int16x8_t __s1_714 = __p1_714; \ + int16x8_t __s2_714 = __p2_714; \ + int32x4_t __rev0_714; __rev0_714 = __builtin_shufflevector(__s0_714, __s0_714, 3, 2, 1, 0); \ + int16x8_t __rev1_714; __rev1_714 = __builtin_shufflevector(__s1_714, __s1_714, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev2_714; __rev2_714 = __builtin_shufflevector(__s2_714, __s2_714, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_714; \ + __ret_714 = __noswap_vqdmlsl_s16(__rev0_714, __noswap_vget_high_s16(__rev1_714), __noswap_splat_laneq_s16(__rev2_714, __p3_714)); \ + __ret_714 = __builtin_shufflevector(__ret_714, __ret_714, 3, 2, 1, 0); \ + __ret_714; \ }) #endif @@ -56477,50 +58147,50 @@ __ai int32x4_t vqdmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s32(__p0_623, __p1_623, __p2_623, __p3_623) __extension__ ({ \ - int64x2_t __s0_623 = __p0_623; \ - int32x2_t __s1_623 = __p1_623; \ - int32x4_t __s2_623 = __p2_623; \ - int64x2_t __ret_623; \ - __ret_623 = vqdmlsl_s32(__s0_623, __s1_623, splat_laneq_s32(__s2_623, __p3_623)); \ - __ret_623; \ +#define vqdmlsl_laneq_s32(__p0_715, __p1_715, __p2_715, __p3_715) __extension__ ({ \ + int64x2_t __s0_715 = __p0_715; \ + int32x2_t __s1_715 = __p1_715; \ + int32x4_t __s2_715 = __p2_715; \ + int64x2_t __ret_715; \ + __ret_715 = vqdmlsl_s32(__s0_715, __s1_715, splat_laneq_s32(__s2_715, __p3_715)); \ + __ret_715; \ }) #else -#define vqdmlsl_laneq_s32(__p0_624, __p1_624, __p2_624, __p3_624) __extension__ ({ \ - int64x2_t __s0_624 = __p0_624; \ - int32x2_t __s1_624 = __p1_624; \ - int32x4_t __s2_624 = __p2_624; \ - int64x2_t __rev0_624; __rev0_624 = __builtin_shufflevector(__s0_624, __s0_624, 1, 0); \ - int32x2_t __rev1_624; __rev1_624 = __builtin_shufflevector(__s1_624, __s1_624, 1, 0); \ - int32x4_t __rev2_624; __rev2_624 = __builtin_shufflevector(__s2_624, __s2_624, 3, 2, 1, 0); \ - int64x2_t __ret_624; \ - __ret_624 = __noswap_vqdmlsl_s32(__rev0_624, __rev1_624, __noswap_splat_laneq_s32(__rev2_624, __p3_624)); \ - __ret_624 = __builtin_shufflevector(__ret_624, __ret_624, 1, 0); \ - __ret_624; \ +#define vqdmlsl_laneq_s32(__p0_716, __p1_716, __p2_716, __p3_716) __extension__ ({ \ + int64x2_t __s0_716 = __p0_716; \ + int32x2_t __s1_716 = __p1_716; \ + int32x4_t __s2_716 = __p2_716; \ + int64x2_t __rev0_716; __rev0_716 = __builtin_shufflevector(__s0_716, __s0_716, 1, 0); \ + int32x2_t __rev1_716; __rev1_716 = __builtin_shufflevector(__s1_716, __s1_716, 1, 0); \ + int32x4_t __rev2_716; __rev2_716 = __builtin_shufflevector(__s2_716, __s2_716, 3, 2, 1, 0); \ + int64x2_t __ret_716; \ + __ret_716 = __noswap_vqdmlsl_s32(__rev0_716, __rev1_716, __noswap_splat_laneq_s32(__rev2_716, __p3_716)); \ + __ret_716 = __builtin_shufflevector(__ret_716, __ret_716, 1, 0); \ + __ret_716; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmlsl_laneq_s16(__p0_625, __p1_625, __p2_625, __p3_625) __extension__ ({ \ - int32x4_t __s0_625 = __p0_625; \ - int16x4_t __s1_625 = __p1_625; \ - int16x8_t __s2_625 = __p2_625; \ - int32x4_t __ret_625; \ - __ret_625 = vqdmlsl_s16(__s0_625, __s1_625, splat_laneq_s16(__s2_625, __p3_625)); \ - __ret_625; \ +#define vqdmlsl_laneq_s16(__p0_717, __p1_717, __p2_717, __p3_717) __extension__ ({ \ + int32x4_t __s0_717 = __p0_717; \ + int16x4_t __s1_717 = __p1_717; \ + int16x8_t __s2_717 = __p2_717; \ + int32x4_t __ret_717; \ + __ret_717 = vqdmlsl_s16(__s0_717, __s1_717, splat_laneq_s16(__s2_717, __p3_717)); \ + __ret_717; \ }) #else -#define vqdmlsl_laneq_s16(__p0_626, __p1_626, __p2_626, __p3_626) __extension__ ({ \ - int32x4_t __s0_626 = __p0_626; \ - int16x4_t __s1_626 = __p1_626; \ - int16x8_t __s2_626 = __p2_626; \ - int32x4_t __rev0_626; __rev0_626 = __builtin_shufflevector(__s0_626, __s0_626, 3, 2, 1, 0); \ - int16x4_t __rev1_626; __rev1_626 = __builtin_shufflevector(__s1_626, __s1_626, 3, 2, 1, 0); \ - int16x8_t __rev2_626; __rev2_626 = __builtin_shufflevector(__s2_626, __s2_626, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_626; \ - __ret_626 = __noswap_vqdmlsl_s16(__rev0_626, __rev1_626, __noswap_splat_laneq_s16(__rev2_626, __p3_626)); \ - __ret_626 = __builtin_shufflevector(__ret_626, __ret_626, 3, 2, 1, 0); \ - __ret_626; \ +#define vqdmlsl_laneq_s16(__p0_718, __p1_718, __p2_718, __p3_718) __extension__ ({ \ + int32x4_t __s0_718 = __p0_718; \ + int16x4_t __s1_718 = __p1_718; \ + int16x8_t __s2_718 = __p2_718; \ + int32x4_t __rev0_718; __rev0_718 = __builtin_shufflevector(__s0_718, __s0_718, 3, 2, 1, 0); \ + int16x4_t __rev1_718; __rev1_718 = __builtin_shufflevector(__s1_718, __s1_718, 3, 2, 1, 0); \ + int16x8_t __rev2_718; __rev2_718 = __builtin_shufflevector(__s2_718, __s2_718, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_718; \ + __ret_718 = __noswap_vqdmlsl_s16(__rev0_718, __rev1_718, __noswap_splat_laneq_s16(__rev2_718, __p3_718)); \ + __ret_718 = __builtin_shufflevector(__ret_718, __ret_718, 3, 2, 1, 0); \ + __ret_718; \ }) #endif @@ -56619,78 +58289,78 @@ __ai int16_t vqdmulhh_s16(int16_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_lane_s32(__p0_627, __p1_627, __p2_627) __extension__ ({ \ - int32_t __s0_627 = __p0_627; \ - int32x2_t __s1_627 = __p1_627; \ - int32_t __ret_627; \ - __ret_627 = vqdmulhs_s32(__s0_627, vget_lane_s32(__s1_627, __p2_627)); \ - __ret_627; \ +#define vqdmulhs_lane_s32(__p0_719, __p1_719, __p2_719) __extension__ ({ \ + int32_t __s0_719 = __p0_719; \ + int32x2_t __s1_719 = __p1_719; \ + int32_t __ret_719; \ + __ret_719 = vqdmulhs_s32(__s0_719, vget_lane_s32(__s1_719, __p2_719)); \ + __ret_719; \ }) #else -#define vqdmulhs_lane_s32(__p0_628, __p1_628, __p2_628) __extension__ ({ \ - int32_t __s0_628 = __p0_628; \ - int32x2_t __s1_628 = __p1_628; \ - int32x2_t __rev1_628; __rev1_628 = __builtin_shufflevector(__s1_628, __s1_628, 1, 0); \ - int32_t __ret_628; \ - __ret_628 = vqdmulhs_s32(__s0_628, __noswap_vget_lane_s32(__rev1_628, __p2_628)); \ - __ret_628; \ +#define vqdmulhs_lane_s32(__p0_720, __p1_720, __p2_720) __extension__ ({ \ + int32_t __s0_720 = __p0_720; \ + int32x2_t __s1_720 = __p1_720; \ + int32x2_t __rev1_720; __rev1_720 = __builtin_shufflevector(__s1_720, __s1_720, 1, 0); \ + int32_t __ret_720; \ + __ret_720 = vqdmulhs_s32(__s0_720, __noswap_vget_lane_s32(__rev1_720, __p2_720)); \ + __ret_720; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_lane_s16(__p0_629, __p1_629, __p2_629) __extension__ ({ \ - int16_t __s0_629 = __p0_629; \ - int16x4_t __s1_629 = __p1_629; \ - int16_t __ret_629; \ - __ret_629 = vqdmulhh_s16(__s0_629, vget_lane_s16(__s1_629, __p2_629)); \ - __ret_629; \ +#define vqdmulhh_lane_s16(__p0_721, __p1_721, __p2_721) __extension__ ({ \ + int16_t __s0_721 = __p0_721; \ + int16x4_t __s1_721 = __p1_721; \ + int16_t __ret_721; \ + __ret_721 = vqdmulhh_s16(__s0_721, vget_lane_s16(__s1_721, __p2_721)); \ + __ret_721; \ }) #else -#define vqdmulhh_lane_s16(__p0_630, __p1_630, __p2_630) __extension__ ({ \ - int16_t __s0_630 = __p0_630; \ - int16x4_t __s1_630 = __p1_630; \ - int16x4_t __rev1_630; __rev1_630 = __builtin_shufflevector(__s1_630, __s1_630, 3, 2, 1, 0); \ - int16_t __ret_630; \ - __ret_630 = vqdmulhh_s16(__s0_630, __noswap_vget_lane_s16(__rev1_630, __p2_630)); \ - __ret_630; \ +#define vqdmulhh_lane_s16(__p0_722, __p1_722, __p2_722) __extension__ ({ \ + int16_t __s0_722 = __p0_722; \ + int16x4_t __s1_722 = __p1_722; \ + int16x4_t __rev1_722; __rev1_722 = __builtin_shufflevector(__s1_722, __s1_722, 3, 2, 1, 0); \ + int16_t __ret_722; \ + __ret_722 = vqdmulhh_s16(__s0_722, __noswap_vget_lane_s16(__rev1_722, __p2_722)); \ + __ret_722; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhs_laneq_s32(__p0_631, __p1_631, __p2_631) __extension__ ({ \ - int32_t __s0_631 = __p0_631; \ - int32x4_t __s1_631 = __p1_631; \ - int32_t __ret_631; \ - __ret_631 = vqdmulhs_s32(__s0_631, vgetq_lane_s32(__s1_631, __p2_631)); \ - __ret_631; \ +#define vqdmulhs_laneq_s32(__p0_723, __p1_723, __p2_723) __extension__ ({ \ + int32_t __s0_723 = __p0_723; \ + int32x4_t __s1_723 = __p1_723; \ + int32_t __ret_723; \ + __ret_723 = vqdmulhs_s32(__s0_723, vgetq_lane_s32(__s1_723, __p2_723)); \ + __ret_723; \ }) #else -#define vqdmulhs_laneq_s32(__p0_632, __p1_632, __p2_632) __extension__ ({ \ - int32_t __s0_632 = __p0_632; \ - int32x4_t __s1_632 = __p1_632; \ - int32x4_t __rev1_632; __rev1_632 = __builtin_shufflevector(__s1_632, __s1_632, 3, 2, 1, 0); \ - int32_t __ret_632; \ - __ret_632 = vqdmulhs_s32(__s0_632, __noswap_vgetq_lane_s32(__rev1_632, __p2_632)); \ - __ret_632; \ +#define vqdmulhs_laneq_s32(__p0_724, __p1_724, __p2_724) __extension__ ({ \ + int32_t __s0_724 = __p0_724; \ + int32x4_t __s1_724 = __p1_724; \ + int32x4_t __rev1_724; __rev1_724 = __builtin_shufflevector(__s1_724, __s1_724, 3, 2, 1, 0); \ + int32_t __ret_724; \ + __ret_724 = vqdmulhs_s32(__s0_724, __noswap_vgetq_lane_s32(__rev1_724, __p2_724)); \ + __ret_724; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulhh_laneq_s16(__p0_633, __p1_633, __p2_633) __extension__ ({ \ - int16_t __s0_633 = __p0_633; \ - int16x8_t __s1_633 = __p1_633; \ - int16_t __ret_633; \ - __ret_633 = vqdmulhh_s16(__s0_633, vgetq_lane_s16(__s1_633, __p2_633)); \ - __ret_633; \ +#define vqdmulhh_laneq_s16(__p0_725, __p1_725, __p2_725) __extension__ ({ \ + int16_t __s0_725 = __p0_725; \ + int16x8_t __s1_725 = __p1_725; \ + int16_t __ret_725; \ + __ret_725 = vqdmulhh_s16(__s0_725, vgetq_lane_s16(__s1_725, __p2_725)); \ + __ret_725; \ }) #else -#define vqdmulhh_laneq_s16(__p0_634, __p1_634, __p2_634) __extension__ ({ \ - int16_t __s0_634 = __p0_634; \ - int16x8_t __s1_634 = __p1_634; \ - int16x8_t __rev1_634; __rev1_634 = __builtin_shufflevector(__s1_634, __s1_634, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_634; \ - __ret_634 = vqdmulhh_s16(__s0_634, __noswap_vgetq_lane_s16(__rev1_634, __p2_634)); \ - __ret_634; \ +#define vqdmulhh_laneq_s16(__p0_726, __p1_726, __p2_726) __extension__ ({ \ + int16_t __s0_726 = __p0_726; \ + int16x8_t __s1_726 = __p1_726; \ + int16x8_t __rev1_726; __rev1_726 = __builtin_shufflevector(__s1_726, __s1_726, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_726; \ + __ret_726 = vqdmulhh_s16(__s0_726, __noswap_vgetq_lane_s16(__rev1_726, __p2_726)); \ + __ret_726; \ }) #endif @@ -56823,86 +58493,86 @@ __ai int32x4_t vqdmull_high_s16(int16x8_t __p0, int16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s32(__p0_635, __p1_635, __p2_635) __extension__ ({ \ - int32x4_t __s0_635 = __p0_635; \ - int32x2_t __s1_635 = __p1_635; \ - int64x2_t __ret_635; \ - __ret_635 = vqdmull_s32(vget_high_s32(__s0_635), splat_lane_s32(__s1_635, __p2_635)); \ - __ret_635; \ +#define vqdmull_high_lane_s32(__p0_727, __p1_727, __p2_727) __extension__ ({ \ + int32x4_t __s0_727 = __p0_727; \ + int32x2_t __s1_727 = __p1_727; \ + int64x2_t __ret_727; \ + __ret_727 = vqdmull_s32(vget_high_s32(__s0_727), splat_lane_s32(__s1_727, __p2_727)); \ + __ret_727; \ }) #else -#define vqdmull_high_lane_s32(__p0_636, __p1_636, __p2_636) __extension__ ({ \ - int32x4_t __s0_636 = __p0_636; \ - int32x2_t __s1_636 = __p1_636; \ - int32x4_t __rev0_636; __rev0_636 = __builtin_shufflevector(__s0_636, __s0_636, 3, 2, 1, 0); \ - int32x2_t __rev1_636; __rev1_636 = __builtin_shufflevector(__s1_636, __s1_636, 1, 0); \ - int64x2_t __ret_636; \ - __ret_636 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_636), __noswap_splat_lane_s32(__rev1_636, __p2_636)); \ - __ret_636 = __builtin_shufflevector(__ret_636, __ret_636, 1, 0); \ - __ret_636; \ +#define vqdmull_high_lane_s32(__p0_728, __p1_728, __p2_728) __extension__ ({ \ + int32x4_t __s0_728 = __p0_728; \ + int32x2_t __s1_728 = __p1_728; \ + int32x4_t __rev0_728; __rev0_728 = __builtin_shufflevector(__s0_728, __s0_728, 3, 2, 1, 0); \ + int32x2_t __rev1_728; __rev1_728 = __builtin_shufflevector(__s1_728, __s1_728, 1, 0); \ + int64x2_t __ret_728; \ + __ret_728 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_728), __noswap_splat_lane_s32(__rev1_728, __p2_728)); \ + __ret_728 = __builtin_shufflevector(__ret_728, __ret_728, 1, 0); \ + __ret_728; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_lane_s16(__p0_637, __p1_637, __p2_637) __extension__ ({ \ - int16x8_t __s0_637 = __p0_637; \ - int16x4_t __s1_637 = __p1_637; \ - int32x4_t __ret_637; \ - __ret_637 = vqdmull_s16(vget_high_s16(__s0_637), splat_lane_s16(__s1_637, __p2_637)); \ - __ret_637; \ +#define vqdmull_high_lane_s16(__p0_729, __p1_729, __p2_729) __extension__ ({ \ + int16x8_t __s0_729 = __p0_729; \ + int16x4_t __s1_729 = __p1_729; \ + int32x4_t __ret_729; \ + __ret_729 = vqdmull_s16(vget_high_s16(__s0_729), splat_lane_s16(__s1_729, __p2_729)); \ + __ret_729; \ }) #else -#define vqdmull_high_lane_s16(__p0_638, __p1_638, __p2_638) __extension__ ({ \ - int16x8_t __s0_638 = __p0_638; \ - int16x4_t __s1_638 = __p1_638; \ - int16x8_t __rev0_638; __rev0_638 = __builtin_shufflevector(__s0_638, __s0_638, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x4_t __rev1_638; __rev1_638 = __builtin_shufflevector(__s1_638, __s1_638, 3, 2, 1, 0); \ - int32x4_t __ret_638; \ - __ret_638 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_638), __noswap_splat_lane_s16(__rev1_638, __p2_638)); \ - __ret_638 = __builtin_shufflevector(__ret_638, __ret_638, 3, 2, 1, 0); \ - __ret_638; \ +#define vqdmull_high_lane_s16(__p0_730, __p1_730, __p2_730) __extension__ ({ \ + int16x8_t __s0_730 = __p0_730; \ + int16x4_t __s1_730 = __p1_730; \ + int16x8_t __rev0_730; __rev0_730 = __builtin_shufflevector(__s0_730, __s0_730, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x4_t __rev1_730; __rev1_730 = __builtin_shufflevector(__s1_730, __s1_730, 3, 2, 1, 0); \ + int32x4_t __ret_730; \ + __ret_730 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_730), __noswap_splat_lane_s16(__rev1_730, __p2_730)); \ + __ret_730 = __builtin_shufflevector(__ret_730, __ret_730, 3, 2, 1, 0); \ + __ret_730; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s32(__p0_639, __p1_639, __p2_639) __extension__ ({ \ - int32x4_t __s0_639 = __p0_639; \ - int32x4_t __s1_639 = __p1_639; \ - int64x2_t __ret_639; \ - __ret_639 = vqdmull_s32(vget_high_s32(__s0_639), splat_laneq_s32(__s1_639, __p2_639)); \ - __ret_639; \ +#define vqdmull_high_laneq_s32(__p0_731, __p1_731, __p2_731) __extension__ ({ \ + int32x4_t __s0_731 = __p0_731; \ + int32x4_t __s1_731 = __p1_731; \ + int64x2_t __ret_731; \ + __ret_731 = vqdmull_s32(vget_high_s32(__s0_731), splat_laneq_s32(__s1_731, __p2_731)); \ + __ret_731; \ }) #else -#define vqdmull_high_laneq_s32(__p0_640, __p1_640, __p2_640) __extension__ ({ \ - int32x4_t __s0_640 = __p0_640; \ - int32x4_t __s1_640 = __p1_640; \ - int32x4_t __rev0_640; __rev0_640 = __builtin_shufflevector(__s0_640, __s0_640, 3, 2, 1, 0); \ - int32x4_t __rev1_640; __rev1_640 = __builtin_shufflevector(__s1_640, __s1_640, 3, 2, 1, 0); \ - int64x2_t __ret_640; \ - __ret_640 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_640), __noswap_splat_laneq_s32(__rev1_640, __p2_640)); \ - __ret_640 = __builtin_shufflevector(__ret_640, __ret_640, 1, 0); \ - __ret_640; \ +#define vqdmull_high_laneq_s32(__p0_732, __p1_732, __p2_732) __extension__ ({ \ + int32x4_t __s0_732 = __p0_732; \ + int32x4_t __s1_732 = __p1_732; \ + int32x4_t __rev0_732; __rev0_732 = __builtin_shufflevector(__s0_732, __s0_732, 3, 2, 1, 0); \ + int32x4_t __rev1_732; __rev1_732 = __builtin_shufflevector(__s1_732, __s1_732, 3, 2, 1, 0); \ + int64x2_t __ret_732; \ + __ret_732 = __noswap_vqdmull_s32(__noswap_vget_high_s32(__rev0_732), __noswap_splat_laneq_s32(__rev1_732, __p2_732)); \ + __ret_732 = __builtin_shufflevector(__ret_732, __ret_732, 1, 0); \ + __ret_732; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_high_laneq_s16(__p0_641, __p1_641, __p2_641) __extension__ ({ \ - int16x8_t __s0_641 = __p0_641; \ - int16x8_t __s1_641 = __p1_641; \ - int32x4_t __ret_641; \ - __ret_641 = vqdmull_s16(vget_high_s16(__s0_641), splat_laneq_s16(__s1_641, __p2_641)); \ - __ret_641; \ +#define vqdmull_high_laneq_s16(__p0_733, __p1_733, __p2_733) __extension__ ({ \ + int16x8_t __s0_733 = __p0_733; \ + int16x8_t __s1_733 = __p1_733; \ + int32x4_t __ret_733; \ + __ret_733 = vqdmull_s16(vget_high_s16(__s0_733), splat_laneq_s16(__s1_733, __p2_733)); \ + __ret_733; \ }) #else -#define vqdmull_high_laneq_s16(__p0_642, __p1_642, __p2_642) __extension__ ({ \ - int16x8_t __s0_642 = __p0_642; \ - int16x8_t __s1_642 = __p1_642; \ - int16x8_t __rev0_642; __rev0_642 = __builtin_shufflevector(__s0_642, __s0_642, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_642; __rev1_642 = __builtin_shufflevector(__s1_642, __s1_642, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_642; \ - __ret_642 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_642), __noswap_splat_laneq_s16(__rev1_642, __p2_642)); \ - __ret_642 = __builtin_shufflevector(__ret_642, __ret_642, 3, 2, 1, 0); \ - __ret_642; \ +#define vqdmull_high_laneq_s16(__p0_734, __p1_734, __p2_734) __extension__ ({ \ + int16x8_t __s0_734 = __p0_734; \ + int16x8_t __s1_734 = __p1_734; \ + int16x8_t __rev0_734; __rev0_734 = __builtin_shufflevector(__s0_734, __s0_734, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_734; __rev1_734 = __builtin_shufflevector(__s1_734, __s1_734, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_734; \ + __ret_734 = __noswap_vqdmull_s16(__noswap_vget_high_s16(__rev0_734), __noswap_splat_laneq_s16(__rev1_734, __p2_734)); \ + __ret_734 = __builtin_shufflevector(__ret_734, __ret_734, 3, 2, 1, 0); \ + __ret_734; \ }) #endif @@ -56939,120 +58609,120 @@ __ai int32x4_t vqdmull_high_n_s16(int16x8_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulls_lane_s32(__p0_643, __p1_643, __p2_643) __extension__ ({ \ - int32_t __s0_643 = __p0_643; \ - int32x2_t __s1_643 = __p1_643; \ - int64_t __ret_643; \ - __ret_643 = vqdmulls_s32(__s0_643, vget_lane_s32(__s1_643, __p2_643)); \ - __ret_643; \ +#define vqdmulls_lane_s32(__p0_735, __p1_735, __p2_735) __extension__ ({ \ + int32_t __s0_735 = __p0_735; \ + int32x2_t __s1_735 = __p1_735; \ + int64_t __ret_735; \ + __ret_735 = vqdmulls_s32(__s0_735, vget_lane_s32(__s1_735, __p2_735)); \ + __ret_735; \ }) #else -#define vqdmulls_lane_s32(__p0_644, __p1_644, __p2_644) __extension__ ({ \ - int32_t __s0_644 = __p0_644; \ - int32x2_t __s1_644 = __p1_644; \ - int32x2_t __rev1_644; __rev1_644 = __builtin_shufflevector(__s1_644, __s1_644, 1, 0); \ - int64_t __ret_644; \ - __ret_644 = vqdmulls_s32(__s0_644, __noswap_vget_lane_s32(__rev1_644, __p2_644)); \ - __ret_644; \ +#define vqdmulls_lane_s32(__p0_736, __p1_736, __p2_736) __extension__ ({ \ + int32_t __s0_736 = __p0_736; \ + int32x2_t __s1_736 = __p1_736; \ + int32x2_t __rev1_736; __rev1_736 = __builtin_shufflevector(__s1_736, __s1_736, 1, 0); \ + int64_t __ret_736; \ + __ret_736 = vqdmulls_s32(__s0_736, __noswap_vget_lane_s32(__rev1_736, __p2_736)); \ + __ret_736; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmullh_lane_s16(__p0_645, __p1_645, __p2_645) __extension__ ({ \ - int16_t __s0_645 = __p0_645; \ - int16x4_t __s1_645 = __p1_645; \ - int32_t __ret_645; \ - __ret_645 = vqdmullh_s16(__s0_645, vget_lane_s16(__s1_645, __p2_645)); \ - __ret_645; \ +#define vqdmullh_lane_s16(__p0_737, __p1_737, __p2_737) __extension__ ({ \ + int16_t __s0_737 = __p0_737; \ + int16x4_t __s1_737 = __p1_737; \ + int32_t __ret_737; \ + __ret_737 = vqdmullh_s16(__s0_737, vget_lane_s16(__s1_737, __p2_737)); \ + __ret_737; \ }) #else -#define vqdmullh_lane_s16(__p0_646, __p1_646, __p2_646) __extension__ ({ \ - int16_t __s0_646 = __p0_646; \ - int16x4_t __s1_646 = __p1_646; \ - int16x4_t __rev1_646; __rev1_646 = __builtin_shufflevector(__s1_646, __s1_646, 3, 2, 1, 0); \ - int32_t __ret_646; \ - __ret_646 = vqdmullh_s16(__s0_646, __noswap_vget_lane_s16(__rev1_646, __p2_646)); \ - __ret_646; \ +#define vqdmullh_lane_s16(__p0_738, __p1_738, __p2_738) __extension__ ({ \ + int16_t __s0_738 = __p0_738; \ + int16x4_t __s1_738 = __p1_738; \ + int16x4_t __rev1_738; __rev1_738 = __builtin_shufflevector(__s1_738, __s1_738, 3, 2, 1, 0); \ + int32_t __ret_738; \ + __ret_738 = vqdmullh_s16(__s0_738, __noswap_vget_lane_s16(__rev1_738, __p2_738)); \ + __ret_738; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmulls_laneq_s32(__p0_647, __p1_647, __p2_647) __extension__ ({ \ - int32_t __s0_647 = __p0_647; \ - int32x4_t __s1_647 = __p1_647; \ - int64_t __ret_647; \ - __ret_647 = vqdmulls_s32(__s0_647, vgetq_lane_s32(__s1_647, __p2_647)); \ - __ret_647; \ +#define vqdmulls_laneq_s32(__p0_739, __p1_739, __p2_739) __extension__ ({ \ + int32_t __s0_739 = __p0_739; \ + int32x4_t __s1_739 = __p1_739; \ + int64_t __ret_739; \ + __ret_739 = vqdmulls_s32(__s0_739, vgetq_lane_s32(__s1_739, __p2_739)); \ + __ret_739; \ }) #else -#define vqdmulls_laneq_s32(__p0_648, __p1_648, __p2_648) __extension__ ({ \ - int32_t __s0_648 = __p0_648; \ - int32x4_t __s1_648 = __p1_648; \ - int32x4_t __rev1_648; __rev1_648 = __builtin_shufflevector(__s1_648, __s1_648, 3, 2, 1, 0); \ - int64_t __ret_648; \ - __ret_648 = vqdmulls_s32(__s0_648, __noswap_vgetq_lane_s32(__rev1_648, __p2_648)); \ - __ret_648; \ +#define vqdmulls_laneq_s32(__p0_740, __p1_740, __p2_740) __extension__ ({ \ + int32_t __s0_740 = __p0_740; \ + int32x4_t __s1_740 = __p1_740; \ + int32x4_t __rev1_740; __rev1_740 = __builtin_shufflevector(__s1_740, __s1_740, 3, 2, 1, 0); \ + int64_t __ret_740; \ + __ret_740 = vqdmulls_s32(__s0_740, __noswap_vgetq_lane_s32(__rev1_740, __p2_740)); \ + __ret_740; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmullh_laneq_s16(__p0_649, __p1_649, __p2_649) __extension__ ({ \ - int16_t __s0_649 = __p0_649; \ - int16x8_t __s1_649 = __p1_649; \ - int32_t __ret_649; \ - __ret_649 = vqdmullh_s16(__s0_649, vgetq_lane_s16(__s1_649, __p2_649)); \ - __ret_649; \ +#define vqdmullh_laneq_s16(__p0_741, __p1_741, __p2_741) __extension__ ({ \ + int16_t __s0_741 = __p0_741; \ + int16x8_t __s1_741 = __p1_741; \ + int32_t __ret_741; \ + __ret_741 = vqdmullh_s16(__s0_741, vgetq_lane_s16(__s1_741, __p2_741)); \ + __ret_741; \ }) #else -#define vqdmullh_laneq_s16(__p0_650, __p1_650, __p2_650) __extension__ ({ \ - int16_t __s0_650 = __p0_650; \ - int16x8_t __s1_650 = __p1_650; \ - int16x8_t __rev1_650; __rev1_650 = __builtin_shufflevector(__s1_650, __s1_650, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32_t __ret_650; \ - __ret_650 = vqdmullh_s16(__s0_650, __noswap_vgetq_lane_s16(__rev1_650, __p2_650)); \ - __ret_650; \ +#define vqdmullh_laneq_s16(__p0_742, __p1_742, __p2_742) __extension__ ({ \ + int16_t __s0_742 = __p0_742; \ + int16x8_t __s1_742 = __p1_742; \ + int16x8_t __rev1_742; __rev1_742 = __builtin_shufflevector(__s1_742, __s1_742, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32_t __ret_742; \ + __ret_742 = vqdmullh_s16(__s0_742, __noswap_vgetq_lane_s16(__rev1_742, __p2_742)); \ + __ret_742; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s32(__p0_651, __p1_651, __p2_651) __extension__ ({ \ - int32x2_t __s0_651 = __p0_651; \ - int32x4_t __s1_651 = __p1_651; \ - int64x2_t __ret_651; \ - __ret_651 = vqdmull_s32(__s0_651, splat_laneq_s32(__s1_651, __p2_651)); \ - __ret_651; \ +#define vqdmull_laneq_s32(__p0_743, __p1_743, __p2_743) __extension__ ({ \ + int32x2_t __s0_743 = __p0_743; \ + int32x4_t __s1_743 = __p1_743; \ + int64x2_t __ret_743; \ + __ret_743 = vqdmull_s32(__s0_743, splat_laneq_s32(__s1_743, __p2_743)); \ + __ret_743; \ }) #else -#define vqdmull_laneq_s32(__p0_652, __p1_652, __p2_652) __extension__ ({ \ - int32x2_t __s0_652 = __p0_652; \ - int32x4_t __s1_652 = __p1_652; \ - int32x2_t __rev0_652; __rev0_652 = __builtin_shufflevector(__s0_652, __s0_652, 1, 0); \ - int32x4_t __rev1_652; __rev1_652 = __builtin_shufflevector(__s1_652, __s1_652, 3, 2, 1, 0); \ - int64x2_t __ret_652; \ - __ret_652 = __noswap_vqdmull_s32(__rev0_652, __noswap_splat_laneq_s32(__rev1_652, __p2_652)); \ - __ret_652 = __builtin_shufflevector(__ret_652, __ret_652, 1, 0); \ - __ret_652; \ +#define vqdmull_laneq_s32(__p0_744, __p1_744, __p2_744) __extension__ ({ \ + int32x2_t __s0_744 = __p0_744; \ + int32x4_t __s1_744 = __p1_744; \ + int32x2_t __rev0_744; __rev0_744 = __builtin_shufflevector(__s0_744, __s0_744, 1, 0); \ + int32x4_t __rev1_744; __rev1_744 = __builtin_shufflevector(__s1_744, __s1_744, 3, 2, 1, 0); \ + int64x2_t __ret_744; \ + __ret_744 = __noswap_vqdmull_s32(__rev0_744, __noswap_splat_laneq_s32(__rev1_744, __p2_744)); \ + __ret_744 = __builtin_shufflevector(__ret_744, __ret_744, 1, 0); \ + __ret_744; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqdmull_laneq_s16(__p0_653, __p1_653, __p2_653) __extension__ ({ \ - int16x4_t __s0_653 = __p0_653; \ - int16x8_t __s1_653 = __p1_653; \ - int32x4_t __ret_653; \ - __ret_653 = vqdmull_s16(__s0_653, splat_laneq_s16(__s1_653, __p2_653)); \ - __ret_653; \ +#define vqdmull_laneq_s16(__p0_745, __p1_745, __p2_745) __extension__ ({ \ + int16x4_t __s0_745 = __p0_745; \ + int16x8_t __s1_745 = __p1_745; \ + int32x4_t __ret_745; \ + __ret_745 = vqdmull_s16(__s0_745, splat_laneq_s16(__s1_745, __p2_745)); \ + __ret_745; \ }) #else -#define vqdmull_laneq_s16(__p0_654, __p1_654, __p2_654) __extension__ ({ \ - int16x4_t __s0_654 = __p0_654; \ - int16x8_t __s1_654 = __p1_654; \ - int16x4_t __rev0_654; __rev0_654 = __builtin_shufflevector(__s0_654, __s0_654, 3, 2, 1, 0); \ - int16x8_t __rev1_654; __rev1_654 = __builtin_shufflevector(__s1_654, __s1_654, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_654; \ - __ret_654 = __noswap_vqdmull_s16(__rev0_654, __noswap_splat_laneq_s16(__rev1_654, __p2_654)); \ - __ret_654 = __builtin_shufflevector(__ret_654, __ret_654, 3, 2, 1, 0); \ - __ret_654; \ +#define vqdmull_laneq_s16(__p0_746, __p1_746, __p2_746) __extension__ ({ \ + int16x4_t __s0_746 = __p0_746; \ + int16x8_t __s1_746 = __p1_746; \ + int16x4_t __rev0_746; __rev0_746 = __builtin_shufflevector(__s0_746, __s0_746, 3, 2, 1, 0); \ + int16x8_t __rev1_746; __rev1_746 = __builtin_shufflevector(__s1_746, __s1_746, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_746; \ + __ret_746 = __noswap_vqdmull_s16(__rev0_746, __noswap_splat_laneq_s16(__rev1_746, __p2_746)); \ + __ret_746 = __builtin_shufflevector(__ret_746, __ret_746, 3, 2, 1, 0); \ + __ret_746; \ }) #endif @@ -57188,30 +58858,30 @@ __ai int8x16_t vqmovn_high_s16(int8x8_t __p0, int16x8_t __p1) { } #endif -__ai int16_t vqmovuns_s32(int32_t __p0) { - int16_t __ret; - __ret = (int16_t) __builtin_neon_vqmovuns_s32(__p0); +__ai uint16_t vqmovuns_s32(int32_t __p0) { + uint16_t __ret; + __ret = (uint16_t) __builtin_neon_vqmovuns_s32(__p0); return __ret; } -__ai int32_t vqmovund_s64(int64_t __p0) { - int32_t __ret; - __ret = (int32_t) __builtin_neon_vqmovund_s64(__p0); +__ai uint32_t vqmovund_s64(int64_t __p0) { + uint32_t __ret; + __ret = (uint32_t) __builtin_neon_vqmovund_s64(__p0); return __ret; } -__ai int8_t vqmovunh_s16(int16_t __p0) { - int8_t __ret; - __ret = (int8_t) __builtin_neon_vqmovunh_s16(__p0); +__ai uint8_t vqmovunh_s16(int16_t __p0) { + uint8_t __ret; + __ret = (uint8_t) __builtin_neon_vqmovunh_s16(__p0); return __ret; } #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { +__ai uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { uint16x8_t __ret; __ret = vcombine_u16((uint16x4_t)(__p0), vqmovun_s32(__p1)); return __ret; } #else -__ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { - int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); +__ai uint16x8_t vqmovun_high_s32(uint16x4_t __p0, int32x4_t __p1) { + uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x8_t __ret; __ret = __noswap_vcombine_u16((uint16x4_t)(__rev0), __noswap_vqmovun_s32(__rev1)); @@ -57221,14 +58891,14 @@ __ai uint16x8_t vqmovun_high_s32(int16x4_t __p0, int32x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { +__ai uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { uint32x4_t __ret; __ret = vcombine_u32((uint32x2_t)(__p0), vqmovun_s64(__p1)); return __ret; } #else -__ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { - int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); +__ai uint32x4_t vqmovun_high_s64(uint32x2_t __p0, int64x2_t __p1) { + uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x4_t __ret; __ret = __noswap_vcombine_u32((uint32x2_t)(__rev0), __noswap_vqmovun_s64(__rev1)); @@ -57238,14 +58908,14 @@ __ai uint32x4_t vqmovun_high_s64(int32x2_t __p0, int64x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { +__ai uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { uint8x16_t __ret; __ret = vcombine_u8((uint8x8_t)(__p0), vqmovun_s16(__p1)); return __ret; } #else -__ai uint8x16_t vqmovun_high_s16(int8x8_t __p0, int16x8_t __p1) { - int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); +__ai uint8x16_t vqmovun_high_s16(uint8x8_t __p0, int16x8_t __p1) { + uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; __ret = __noswap_vcombine_u8((uint8x8_t)(__rev0), __noswap_vqmovun_s16(__rev1)); @@ -57390,78 +59060,78 @@ __ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_lane_s32(__p0_655, __p1_655, __p2_655) __extension__ ({ \ - int32_t __s0_655 = __p0_655; \ - int32x2_t __s1_655 = __p1_655; \ - int32_t __ret_655; \ - __ret_655 = vqrdmulhs_s32(__s0_655, vget_lane_s32(__s1_655, __p2_655)); \ - __ret_655; \ +#define vqrdmulhs_lane_s32(__p0_747, __p1_747, __p2_747) __extension__ ({ \ + int32_t __s0_747 = __p0_747; \ + int32x2_t __s1_747 = __p1_747; \ + int32_t __ret_747; \ + __ret_747 = vqrdmulhs_s32(__s0_747, vget_lane_s32(__s1_747, __p2_747)); \ + __ret_747; \ }) #else -#define vqrdmulhs_lane_s32(__p0_656, __p1_656, __p2_656) __extension__ ({ \ - int32_t __s0_656 = __p0_656; \ - int32x2_t __s1_656 = __p1_656; \ - int32x2_t __rev1_656; __rev1_656 = __builtin_shufflevector(__s1_656, __s1_656, 1, 0); \ - int32_t __ret_656; \ - __ret_656 = vqrdmulhs_s32(__s0_656, __noswap_vget_lane_s32(__rev1_656, __p2_656)); \ - __ret_656; \ +#define vqrdmulhs_lane_s32(__p0_748, __p1_748, __p2_748) __extension__ ({ \ + int32_t __s0_748 = __p0_748; \ + int32x2_t __s1_748 = __p1_748; \ + int32x2_t __rev1_748; __rev1_748 = __builtin_shufflevector(__s1_748, __s1_748, 1, 0); \ + int32_t __ret_748; \ + __ret_748 = vqrdmulhs_s32(__s0_748, __noswap_vget_lane_s32(__rev1_748, __p2_748)); \ + __ret_748; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_lane_s16(__p0_657, __p1_657, __p2_657) __extension__ ({ \ - int16_t __s0_657 = __p0_657; \ - int16x4_t __s1_657 = __p1_657; \ - int16_t __ret_657; \ - __ret_657 = vqrdmulhh_s16(__s0_657, vget_lane_s16(__s1_657, __p2_657)); \ - __ret_657; \ +#define vqrdmulhh_lane_s16(__p0_749, __p1_749, __p2_749) __extension__ ({ \ + int16_t __s0_749 = __p0_749; \ + int16x4_t __s1_749 = __p1_749; \ + int16_t __ret_749; \ + __ret_749 = vqrdmulhh_s16(__s0_749, vget_lane_s16(__s1_749, __p2_749)); \ + __ret_749; \ }) #else -#define vqrdmulhh_lane_s16(__p0_658, __p1_658, __p2_658) __extension__ ({ \ - int16_t __s0_658 = __p0_658; \ - int16x4_t __s1_658 = __p1_658; \ - int16x4_t __rev1_658; __rev1_658 = __builtin_shufflevector(__s1_658, __s1_658, 3, 2, 1, 0); \ - int16_t __ret_658; \ - __ret_658 = vqrdmulhh_s16(__s0_658, __noswap_vget_lane_s16(__rev1_658, __p2_658)); \ - __ret_658; \ +#define vqrdmulhh_lane_s16(__p0_750, __p1_750, __p2_750) __extension__ ({ \ + int16_t __s0_750 = __p0_750; \ + int16x4_t __s1_750 = __p1_750; \ + int16x4_t __rev1_750; __rev1_750 = __builtin_shufflevector(__s1_750, __s1_750, 3, 2, 1, 0); \ + int16_t __ret_750; \ + __ret_750 = vqrdmulhh_s16(__s0_750, __noswap_vget_lane_s16(__rev1_750, __p2_750)); \ + __ret_750; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhs_laneq_s32(__p0_659, __p1_659, __p2_659) __extension__ ({ \ - int32_t __s0_659 = __p0_659; \ - int32x4_t __s1_659 = __p1_659; \ - int32_t __ret_659; \ - __ret_659 = vqrdmulhs_s32(__s0_659, vgetq_lane_s32(__s1_659, __p2_659)); \ - __ret_659; \ +#define vqrdmulhs_laneq_s32(__p0_751, __p1_751, __p2_751) __extension__ ({ \ + int32_t __s0_751 = __p0_751; \ + int32x4_t __s1_751 = __p1_751; \ + int32_t __ret_751; \ + __ret_751 = vqrdmulhs_s32(__s0_751, vgetq_lane_s32(__s1_751, __p2_751)); \ + __ret_751; \ }) #else -#define vqrdmulhs_laneq_s32(__p0_660, __p1_660, __p2_660) __extension__ ({ \ - int32_t __s0_660 = __p0_660; \ - int32x4_t __s1_660 = __p1_660; \ - int32x4_t __rev1_660; __rev1_660 = __builtin_shufflevector(__s1_660, __s1_660, 3, 2, 1, 0); \ - int32_t __ret_660; \ - __ret_660 = vqrdmulhs_s32(__s0_660, __noswap_vgetq_lane_s32(__rev1_660, __p2_660)); \ - __ret_660; \ +#define vqrdmulhs_laneq_s32(__p0_752, __p1_752, __p2_752) __extension__ ({ \ + int32_t __s0_752 = __p0_752; \ + int32x4_t __s1_752 = __p1_752; \ + int32x4_t __rev1_752; __rev1_752 = __builtin_shufflevector(__s1_752, __s1_752, 3, 2, 1, 0); \ + int32_t __ret_752; \ + __ret_752 = vqrdmulhs_s32(__s0_752, __noswap_vgetq_lane_s32(__rev1_752, __p2_752)); \ + __ret_752; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmulhh_laneq_s16(__p0_661, __p1_661, __p2_661) __extension__ ({ \ - int16_t __s0_661 = __p0_661; \ - int16x8_t __s1_661 = __p1_661; \ - int16_t __ret_661; \ - __ret_661 = vqrdmulhh_s16(__s0_661, vgetq_lane_s16(__s1_661, __p2_661)); \ - __ret_661; \ +#define vqrdmulhh_laneq_s16(__p0_753, __p1_753, __p2_753) __extension__ ({ \ + int16_t __s0_753 = __p0_753; \ + int16x8_t __s1_753 = __p1_753; \ + int16_t __ret_753; \ + __ret_753 = vqrdmulhh_s16(__s0_753, vgetq_lane_s16(__s1_753, __p2_753)); \ + __ret_753; \ }) #else -#define vqrdmulhh_laneq_s16(__p0_662, __p1_662, __p2_662) __extension__ ({ \ - int16_t __s0_662 = __p0_662; \ - int16x8_t __s1_662 = __p1_662; \ - int16x8_t __rev1_662; __rev1_662 = __builtin_shufflevector(__s1_662, __s1_662, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_662; \ - __ret_662 = vqrdmulhh_s16(__s0_662, __noswap_vgetq_lane_s16(__rev1_662, __p2_662)); \ - __ret_662; \ +#define vqrdmulhh_laneq_s16(__p0_754, __p1_754, __p2_754) __extension__ ({ \ + int16_t __s0_754 = __p0_754; \ + int16x8_t __s1_754 = __p1_754; \ + int16x8_t __rev1_754; __rev1_754 = __builtin_shufflevector(__s1_754, __s1_754, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_754; \ + __ret_754 = vqrdmulhh_s16(__s0_754, __noswap_vgetq_lane_s16(__rev1_754, __p2_754)); \ + __ret_754; \ }) #endif @@ -57549,22 +59219,22 @@ __ai int16_t vqrdmulhh_s16(int16_t __p0, int16_t __p1) { }) #endif -__ai uint8_t vqrshlb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vqrshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqrshlb_u8(__p0, __p1); return __ret; } -__ai uint32_t vqrshls_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vqrshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqrshls_u32(__p0, __p1); return __ret; } -__ai uint64_t vqrshld_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vqrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqrshld_u64(__p0, __p1); return __ret; } -__ai uint16_t vqrshlh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqrshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqrshlh_u16(__p0, __p1); return __ret; @@ -57590,128 +59260,128 @@ __ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u32(__p0_663, __p1_663, __p2_663) __extension__ ({ \ - uint16x4_t __s0_663 = __p0_663; \ - uint32x4_t __s1_663 = __p1_663; \ - uint16x8_t __ret_663; \ - __ret_663 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_663), (uint16x4_t)(vqrshrn_n_u32(__s1_663, __p2_663)))); \ - __ret_663; \ +#define vqrshrn_high_n_u32(__p0_755, __p1_755, __p2_755) __extension__ ({ \ + uint16x4_t __s0_755 = __p0_755; \ + uint32x4_t __s1_755 = __p1_755; \ + uint16x8_t __ret_755; \ + __ret_755 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_755), (uint16x4_t)(vqrshrn_n_u32(__s1_755, __p2_755)))); \ + __ret_755; \ }) #else -#define vqrshrn_high_n_u32(__p0_664, __p1_664, __p2_664) __extension__ ({ \ - uint16x4_t __s0_664 = __p0_664; \ - uint32x4_t __s1_664 = __p1_664; \ - uint16x4_t __rev0_664; __rev0_664 = __builtin_shufflevector(__s0_664, __s0_664, 3, 2, 1, 0); \ - uint32x4_t __rev1_664; __rev1_664 = __builtin_shufflevector(__s1_664, __s1_664, 3, 2, 1, 0); \ - uint16x8_t __ret_664; \ - __ret_664 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_664), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_664, __p2_664)))); \ - __ret_664 = __builtin_shufflevector(__ret_664, __ret_664, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_664; \ +#define vqrshrn_high_n_u32(__p0_756, __p1_756, __p2_756) __extension__ ({ \ + uint16x4_t __s0_756 = __p0_756; \ + uint32x4_t __s1_756 = __p1_756; \ + uint16x4_t __rev0_756; __rev0_756 = __builtin_shufflevector(__s0_756, __s0_756, 3, 2, 1, 0); \ + uint32x4_t __rev1_756; __rev1_756 = __builtin_shufflevector(__s1_756, __s1_756, 3, 2, 1, 0); \ + uint16x8_t __ret_756; \ + __ret_756 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_756), (uint16x4_t)(__noswap_vqrshrn_n_u32(__rev1_756, __p2_756)))); \ + __ret_756 = __builtin_shufflevector(__ret_756, __ret_756, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_756; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u64(__p0_665, __p1_665, __p2_665) __extension__ ({ \ - uint32x2_t __s0_665 = __p0_665; \ - uint64x2_t __s1_665 = __p1_665; \ - uint32x4_t __ret_665; \ - __ret_665 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_665), (uint32x2_t)(vqrshrn_n_u64(__s1_665, __p2_665)))); \ - __ret_665; \ +#define vqrshrn_high_n_u64(__p0_757, __p1_757, __p2_757) __extension__ ({ \ + uint32x2_t __s0_757 = __p0_757; \ + uint64x2_t __s1_757 = __p1_757; \ + uint32x4_t __ret_757; \ + __ret_757 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_757), (uint32x2_t)(vqrshrn_n_u64(__s1_757, __p2_757)))); \ + __ret_757; \ }) #else -#define vqrshrn_high_n_u64(__p0_666, __p1_666, __p2_666) __extension__ ({ \ - uint32x2_t __s0_666 = __p0_666; \ - uint64x2_t __s1_666 = __p1_666; \ - uint32x2_t __rev0_666; __rev0_666 = __builtin_shufflevector(__s0_666, __s0_666, 1, 0); \ - uint64x2_t __rev1_666; __rev1_666 = __builtin_shufflevector(__s1_666, __s1_666, 1, 0); \ - uint32x4_t __ret_666; \ - __ret_666 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_666), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_666, __p2_666)))); \ - __ret_666 = __builtin_shufflevector(__ret_666, __ret_666, 3, 2, 1, 0); \ - __ret_666; \ +#define vqrshrn_high_n_u64(__p0_758, __p1_758, __p2_758) __extension__ ({ \ + uint32x2_t __s0_758 = __p0_758; \ + uint64x2_t __s1_758 = __p1_758; \ + uint32x2_t __rev0_758; __rev0_758 = __builtin_shufflevector(__s0_758, __s0_758, 1, 0); \ + uint64x2_t __rev1_758; __rev1_758 = __builtin_shufflevector(__s1_758, __s1_758, 1, 0); \ + uint32x4_t __ret_758; \ + __ret_758 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_758), (uint32x2_t)(__noswap_vqrshrn_n_u64(__rev1_758, __p2_758)))); \ + __ret_758 = __builtin_shufflevector(__ret_758, __ret_758, 3, 2, 1, 0); \ + __ret_758; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_u16(__p0_667, __p1_667, __p2_667) __extension__ ({ \ - uint8x8_t __s0_667 = __p0_667; \ - uint16x8_t __s1_667 = __p1_667; \ - uint8x16_t __ret_667; \ - __ret_667 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_667), (uint8x8_t)(vqrshrn_n_u16(__s1_667, __p2_667)))); \ - __ret_667; \ +#define vqrshrn_high_n_u16(__p0_759, __p1_759, __p2_759) __extension__ ({ \ + uint8x8_t __s0_759 = __p0_759; \ + uint16x8_t __s1_759 = __p1_759; \ + uint8x16_t __ret_759; \ + __ret_759 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_759), (uint8x8_t)(vqrshrn_n_u16(__s1_759, __p2_759)))); \ + __ret_759; \ }) #else -#define vqrshrn_high_n_u16(__p0_668, __p1_668, __p2_668) __extension__ ({ \ - uint8x8_t __s0_668 = __p0_668; \ - uint16x8_t __s1_668 = __p1_668; \ - uint8x8_t __rev0_668; __rev0_668 = __builtin_shufflevector(__s0_668, __s0_668, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_668; __rev1_668 = __builtin_shufflevector(__s1_668, __s1_668, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_668; \ - __ret_668 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_668), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_668, __p2_668)))); \ - __ret_668 = __builtin_shufflevector(__ret_668, __ret_668, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_668; \ +#define vqrshrn_high_n_u16(__p0_760, __p1_760, __p2_760) __extension__ ({ \ + uint8x8_t __s0_760 = __p0_760; \ + uint16x8_t __s1_760 = __p1_760; \ + uint8x8_t __rev0_760; __rev0_760 = __builtin_shufflevector(__s0_760, __s0_760, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_760; __rev1_760 = __builtin_shufflevector(__s1_760, __s1_760, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_760; \ + __ret_760 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_760), (uint8x8_t)(__noswap_vqrshrn_n_u16(__rev1_760, __p2_760)))); \ + __ret_760 = __builtin_shufflevector(__ret_760, __ret_760, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_760; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s32(__p0_669, __p1_669, __p2_669) __extension__ ({ \ - int16x4_t __s0_669 = __p0_669; \ - int32x4_t __s1_669 = __p1_669; \ - int16x8_t __ret_669; \ - __ret_669 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_669), (int16x4_t)(vqrshrn_n_s32(__s1_669, __p2_669)))); \ - __ret_669; \ +#define vqrshrn_high_n_s32(__p0_761, __p1_761, __p2_761) __extension__ ({ \ + int16x4_t __s0_761 = __p0_761; \ + int32x4_t __s1_761 = __p1_761; \ + int16x8_t __ret_761; \ + __ret_761 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_761), (int16x4_t)(vqrshrn_n_s32(__s1_761, __p2_761)))); \ + __ret_761; \ }) #else -#define vqrshrn_high_n_s32(__p0_670, __p1_670, __p2_670) __extension__ ({ \ - int16x4_t __s0_670 = __p0_670; \ - int32x4_t __s1_670 = __p1_670; \ - int16x4_t __rev0_670; __rev0_670 = __builtin_shufflevector(__s0_670, __s0_670, 3, 2, 1, 0); \ - int32x4_t __rev1_670; __rev1_670 = __builtin_shufflevector(__s1_670, __s1_670, 3, 2, 1, 0); \ - int16x8_t __ret_670; \ - __ret_670 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_670), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_670, __p2_670)))); \ - __ret_670 = __builtin_shufflevector(__ret_670, __ret_670, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_670; \ +#define vqrshrn_high_n_s32(__p0_762, __p1_762, __p2_762) __extension__ ({ \ + int16x4_t __s0_762 = __p0_762; \ + int32x4_t __s1_762 = __p1_762; \ + int16x4_t __rev0_762; __rev0_762 = __builtin_shufflevector(__s0_762, __s0_762, 3, 2, 1, 0); \ + int32x4_t __rev1_762; __rev1_762 = __builtin_shufflevector(__s1_762, __s1_762, 3, 2, 1, 0); \ + int16x8_t __ret_762; \ + __ret_762 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_762), (int16x4_t)(__noswap_vqrshrn_n_s32(__rev1_762, __p2_762)))); \ + __ret_762 = __builtin_shufflevector(__ret_762, __ret_762, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_762; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s64(__p0_671, __p1_671, __p2_671) __extension__ ({ \ - int32x2_t __s0_671 = __p0_671; \ - int64x2_t __s1_671 = __p1_671; \ - int32x4_t __ret_671; \ - __ret_671 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_671), (int32x2_t)(vqrshrn_n_s64(__s1_671, __p2_671)))); \ - __ret_671; \ +#define vqrshrn_high_n_s64(__p0_763, __p1_763, __p2_763) __extension__ ({ \ + int32x2_t __s0_763 = __p0_763; \ + int64x2_t __s1_763 = __p1_763; \ + int32x4_t __ret_763; \ + __ret_763 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_763), (int32x2_t)(vqrshrn_n_s64(__s1_763, __p2_763)))); \ + __ret_763; \ }) #else -#define vqrshrn_high_n_s64(__p0_672, __p1_672, __p2_672) __extension__ ({ \ - int32x2_t __s0_672 = __p0_672; \ - int64x2_t __s1_672 = __p1_672; \ - int32x2_t __rev0_672; __rev0_672 = __builtin_shufflevector(__s0_672, __s0_672, 1, 0); \ - int64x2_t __rev1_672; __rev1_672 = __builtin_shufflevector(__s1_672, __s1_672, 1, 0); \ - int32x4_t __ret_672; \ - __ret_672 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_672), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_672, __p2_672)))); \ - __ret_672 = __builtin_shufflevector(__ret_672, __ret_672, 3, 2, 1, 0); \ - __ret_672; \ +#define vqrshrn_high_n_s64(__p0_764, __p1_764, __p2_764) __extension__ ({ \ + int32x2_t __s0_764 = __p0_764; \ + int64x2_t __s1_764 = __p1_764; \ + int32x2_t __rev0_764; __rev0_764 = __builtin_shufflevector(__s0_764, __s0_764, 1, 0); \ + int64x2_t __rev1_764; __rev1_764 = __builtin_shufflevector(__s1_764, __s1_764, 1, 0); \ + int32x4_t __ret_764; \ + __ret_764 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_764), (int32x2_t)(__noswap_vqrshrn_n_s64(__rev1_764, __p2_764)))); \ + __ret_764 = __builtin_shufflevector(__ret_764, __ret_764, 3, 2, 1, 0); \ + __ret_764; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrn_high_n_s16(__p0_673, __p1_673, __p2_673) __extension__ ({ \ - int8x8_t __s0_673 = __p0_673; \ - int16x8_t __s1_673 = __p1_673; \ - int8x16_t __ret_673; \ - __ret_673 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_673), (int8x8_t)(vqrshrn_n_s16(__s1_673, __p2_673)))); \ - __ret_673; \ +#define vqrshrn_high_n_s16(__p0_765, __p1_765, __p2_765) __extension__ ({ \ + int8x8_t __s0_765 = __p0_765; \ + int16x8_t __s1_765 = __p1_765; \ + int8x16_t __ret_765; \ + __ret_765 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_765), (int8x8_t)(vqrshrn_n_s16(__s1_765, __p2_765)))); \ + __ret_765; \ }) #else -#define vqrshrn_high_n_s16(__p0_674, __p1_674, __p2_674) __extension__ ({ \ - int8x8_t __s0_674 = __p0_674; \ - int16x8_t __s1_674 = __p1_674; \ - int8x8_t __rev0_674; __rev0_674 = __builtin_shufflevector(__s0_674, __s0_674, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_674; __rev1_674 = __builtin_shufflevector(__s1_674, __s1_674, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_674; \ - __ret_674 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_674), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_674, __p2_674)))); \ - __ret_674 = __builtin_shufflevector(__ret_674, __ret_674, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_674; \ +#define vqrshrn_high_n_s16(__p0_766, __p1_766, __p2_766) __extension__ ({ \ + int8x8_t __s0_766 = __p0_766; \ + int16x8_t __s1_766 = __p1_766; \ + int8x8_t __rev0_766; __rev0_766 = __builtin_shufflevector(__s0_766, __s0_766, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_766; __rev1_766 = __builtin_shufflevector(__s1_766, __s1_766, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_766; \ + __ret_766 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_766), (int8x8_t)(__noswap_vqrshrn_n_s16(__rev1_766, __p2_766)))); \ + __ret_766 = __builtin_shufflevector(__ret_766, __ret_766, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_766; \ }) #endif @@ -57752,65 +59422,65 @@ __ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s32(__p0_675, __p1_675, __p2_675) __extension__ ({ \ - int16x4_t __s0_675 = __p0_675; \ - int32x4_t __s1_675 = __p1_675; \ - int16x8_t __ret_675; \ - __ret_675 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_675), (int16x4_t)(vqrshrun_n_s32(__s1_675, __p2_675)))); \ - __ret_675; \ +#define vqrshrun_high_n_s32(__p0_767, __p1_767, __p2_767) __extension__ ({ \ + int16x4_t __s0_767 = __p0_767; \ + int32x4_t __s1_767 = __p1_767; \ + int16x8_t __ret_767; \ + __ret_767 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_767), (int16x4_t)(vqrshrun_n_s32(__s1_767, __p2_767)))); \ + __ret_767; \ }) #else -#define vqrshrun_high_n_s32(__p0_676, __p1_676, __p2_676) __extension__ ({ \ - int16x4_t __s0_676 = __p0_676; \ - int32x4_t __s1_676 = __p1_676; \ - int16x4_t __rev0_676; __rev0_676 = __builtin_shufflevector(__s0_676, __s0_676, 3, 2, 1, 0); \ - int32x4_t __rev1_676; __rev1_676 = __builtin_shufflevector(__s1_676, __s1_676, 3, 2, 1, 0); \ - int16x8_t __ret_676; \ - __ret_676 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_676), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_676, __p2_676)))); \ - __ret_676 = __builtin_shufflevector(__ret_676, __ret_676, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_676; \ +#define vqrshrun_high_n_s32(__p0_768, __p1_768, __p2_768) __extension__ ({ \ + int16x4_t __s0_768 = __p0_768; \ + int32x4_t __s1_768 = __p1_768; \ + int16x4_t __rev0_768; __rev0_768 = __builtin_shufflevector(__s0_768, __s0_768, 3, 2, 1, 0); \ + int32x4_t __rev1_768; __rev1_768 = __builtin_shufflevector(__s1_768, __s1_768, 3, 2, 1, 0); \ + int16x8_t __ret_768; \ + __ret_768 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_768), (int16x4_t)(__noswap_vqrshrun_n_s32(__rev1_768, __p2_768)))); \ + __ret_768 = __builtin_shufflevector(__ret_768, __ret_768, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_768; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s64(__p0_677, __p1_677, __p2_677) __extension__ ({ \ - int32x2_t __s0_677 = __p0_677; \ - int64x2_t __s1_677 = __p1_677; \ - int32x4_t __ret_677; \ - __ret_677 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_677), (int32x2_t)(vqrshrun_n_s64(__s1_677, __p2_677)))); \ - __ret_677; \ +#define vqrshrun_high_n_s64(__p0_769, __p1_769, __p2_769) __extension__ ({ \ + int32x2_t __s0_769 = __p0_769; \ + int64x2_t __s1_769 = __p1_769; \ + int32x4_t __ret_769; \ + __ret_769 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_769), (int32x2_t)(vqrshrun_n_s64(__s1_769, __p2_769)))); \ + __ret_769; \ }) #else -#define vqrshrun_high_n_s64(__p0_678, __p1_678, __p2_678) __extension__ ({ \ - int32x2_t __s0_678 = __p0_678; \ - int64x2_t __s1_678 = __p1_678; \ - int32x2_t __rev0_678; __rev0_678 = __builtin_shufflevector(__s0_678, __s0_678, 1, 0); \ - int64x2_t __rev1_678; __rev1_678 = __builtin_shufflevector(__s1_678, __s1_678, 1, 0); \ - int32x4_t __ret_678; \ - __ret_678 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_678), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_678, __p2_678)))); \ - __ret_678 = __builtin_shufflevector(__ret_678, __ret_678, 3, 2, 1, 0); \ - __ret_678; \ +#define vqrshrun_high_n_s64(__p0_770, __p1_770, __p2_770) __extension__ ({ \ + int32x2_t __s0_770 = __p0_770; \ + int64x2_t __s1_770 = __p1_770; \ + int32x2_t __rev0_770; __rev0_770 = __builtin_shufflevector(__s0_770, __s0_770, 1, 0); \ + int64x2_t __rev1_770; __rev1_770 = __builtin_shufflevector(__s1_770, __s1_770, 1, 0); \ + int32x4_t __ret_770; \ + __ret_770 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_770), (int32x2_t)(__noswap_vqrshrun_n_s64(__rev1_770, __p2_770)))); \ + __ret_770 = __builtin_shufflevector(__ret_770, __ret_770, 3, 2, 1, 0); \ + __ret_770; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrshrun_high_n_s16(__p0_679, __p1_679, __p2_679) __extension__ ({ \ - int8x8_t __s0_679 = __p0_679; \ - int16x8_t __s1_679 = __p1_679; \ - int8x16_t __ret_679; \ - __ret_679 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_679), (int8x8_t)(vqrshrun_n_s16(__s1_679, __p2_679)))); \ - __ret_679; \ +#define vqrshrun_high_n_s16(__p0_771, __p1_771, __p2_771) __extension__ ({ \ + int8x8_t __s0_771 = __p0_771; \ + int16x8_t __s1_771 = __p1_771; \ + int8x16_t __ret_771; \ + __ret_771 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_771), (int8x8_t)(vqrshrun_n_s16(__s1_771, __p2_771)))); \ + __ret_771; \ }) #else -#define vqrshrun_high_n_s16(__p0_680, __p1_680, __p2_680) __extension__ ({ \ - int8x8_t __s0_680 = __p0_680; \ - int16x8_t __s1_680 = __p1_680; \ - int8x8_t __rev0_680; __rev0_680 = __builtin_shufflevector(__s0_680, __s0_680, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_680; __rev1_680 = __builtin_shufflevector(__s1_680, __s1_680, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_680; \ - __ret_680 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_680), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_680, __p2_680)))); \ - __ret_680 = __builtin_shufflevector(__ret_680, __ret_680, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_680; \ +#define vqrshrun_high_n_s16(__p0_772, __p1_772, __p2_772) __extension__ ({ \ + int8x8_t __s0_772 = __p0_772; \ + int16x8_t __s1_772 = __p1_772; \ + int8x8_t __rev0_772; __rev0_772 = __builtin_shufflevector(__s0_772, __s0_772, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_772; __rev1_772 = __builtin_shufflevector(__s1_772, __s1_772, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_772; \ + __ret_772 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_772), (int8x8_t)(__noswap_vqrshrun_n_s16(__rev1_772, __p2_772)))); \ + __ret_772 = __builtin_shufflevector(__ret_772, __ret_772, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_772; \ }) #endif @@ -57832,22 +59502,22 @@ __ai int16_t vqrshlh_s16(int16_t __p0, int16_t __p1) { __ret = (int8_t) __builtin_neon_vqrshrunh_n_s16(__s0, __p1); \ __ret; \ }) -__ai uint8_t vqshlb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vqshlb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vqshlb_u8(__p0, __p1); return __ret; } -__ai uint32_t vqshls_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vqshls_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vqshls_u32(__p0, __p1); return __ret; } -__ai uint64_t vqshld_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vqshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vqshld_u64(__p0, __p1); return __ret; } -__ai uint16_t vqshlh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vqshlh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vqshlh_u16(__p0, __p1); return __ret; @@ -57945,128 +59615,128 @@ __ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u32(__p0_681, __p1_681, __p2_681) __extension__ ({ \ - uint16x4_t __s0_681 = __p0_681; \ - uint32x4_t __s1_681 = __p1_681; \ - uint16x8_t __ret_681; \ - __ret_681 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_681), (uint16x4_t)(vqshrn_n_u32(__s1_681, __p2_681)))); \ - __ret_681; \ +#define vqshrn_high_n_u32(__p0_773, __p1_773, __p2_773) __extension__ ({ \ + uint16x4_t __s0_773 = __p0_773; \ + uint32x4_t __s1_773 = __p1_773; \ + uint16x8_t __ret_773; \ + __ret_773 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_773), (uint16x4_t)(vqshrn_n_u32(__s1_773, __p2_773)))); \ + __ret_773; \ }) #else -#define vqshrn_high_n_u32(__p0_682, __p1_682, __p2_682) __extension__ ({ \ - uint16x4_t __s0_682 = __p0_682; \ - uint32x4_t __s1_682 = __p1_682; \ - uint16x4_t __rev0_682; __rev0_682 = __builtin_shufflevector(__s0_682, __s0_682, 3, 2, 1, 0); \ - uint32x4_t __rev1_682; __rev1_682 = __builtin_shufflevector(__s1_682, __s1_682, 3, 2, 1, 0); \ - uint16x8_t __ret_682; \ - __ret_682 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_682), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_682, __p2_682)))); \ - __ret_682 = __builtin_shufflevector(__ret_682, __ret_682, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_682; \ +#define vqshrn_high_n_u32(__p0_774, __p1_774, __p2_774) __extension__ ({ \ + uint16x4_t __s0_774 = __p0_774; \ + uint32x4_t __s1_774 = __p1_774; \ + uint16x4_t __rev0_774; __rev0_774 = __builtin_shufflevector(__s0_774, __s0_774, 3, 2, 1, 0); \ + uint32x4_t __rev1_774; __rev1_774 = __builtin_shufflevector(__s1_774, __s1_774, 3, 2, 1, 0); \ + uint16x8_t __ret_774; \ + __ret_774 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_774), (uint16x4_t)(__noswap_vqshrn_n_u32(__rev1_774, __p2_774)))); \ + __ret_774 = __builtin_shufflevector(__ret_774, __ret_774, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_774; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u64(__p0_683, __p1_683, __p2_683) __extension__ ({ \ - uint32x2_t __s0_683 = __p0_683; \ - uint64x2_t __s1_683 = __p1_683; \ - uint32x4_t __ret_683; \ - __ret_683 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_683), (uint32x2_t)(vqshrn_n_u64(__s1_683, __p2_683)))); \ - __ret_683; \ +#define vqshrn_high_n_u64(__p0_775, __p1_775, __p2_775) __extension__ ({ \ + uint32x2_t __s0_775 = __p0_775; \ + uint64x2_t __s1_775 = __p1_775; \ + uint32x4_t __ret_775; \ + __ret_775 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_775), (uint32x2_t)(vqshrn_n_u64(__s1_775, __p2_775)))); \ + __ret_775; \ }) #else -#define vqshrn_high_n_u64(__p0_684, __p1_684, __p2_684) __extension__ ({ \ - uint32x2_t __s0_684 = __p0_684; \ - uint64x2_t __s1_684 = __p1_684; \ - uint32x2_t __rev0_684; __rev0_684 = __builtin_shufflevector(__s0_684, __s0_684, 1, 0); \ - uint64x2_t __rev1_684; __rev1_684 = __builtin_shufflevector(__s1_684, __s1_684, 1, 0); \ - uint32x4_t __ret_684; \ - __ret_684 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_684), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_684, __p2_684)))); \ - __ret_684 = __builtin_shufflevector(__ret_684, __ret_684, 3, 2, 1, 0); \ - __ret_684; \ +#define vqshrn_high_n_u64(__p0_776, __p1_776, __p2_776) __extension__ ({ \ + uint32x2_t __s0_776 = __p0_776; \ + uint64x2_t __s1_776 = __p1_776; \ + uint32x2_t __rev0_776; __rev0_776 = __builtin_shufflevector(__s0_776, __s0_776, 1, 0); \ + uint64x2_t __rev1_776; __rev1_776 = __builtin_shufflevector(__s1_776, __s1_776, 1, 0); \ + uint32x4_t __ret_776; \ + __ret_776 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_776), (uint32x2_t)(__noswap_vqshrn_n_u64(__rev1_776, __p2_776)))); \ + __ret_776 = __builtin_shufflevector(__ret_776, __ret_776, 3, 2, 1, 0); \ + __ret_776; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_u16(__p0_685, __p1_685, __p2_685) __extension__ ({ \ - uint8x8_t __s0_685 = __p0_685; \ - uint16x8_t __s1_685 = __p1_685; \ - uint8x16_t __ret_685; \ - __ret_685 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_685), (uint8x8_t)(vqshrn_n_u16(__s1_685, __p2_685)))); \ - __ret_685; \ +#define vqshrn_high_n_u16(__p0_777, __p1_777, __p2_777) __extension__ ({ \ + uint8x8_t __s0_777 = __p0_777; \ + uint16x8_t __s1_777 = __p1_777; \ + uint8x16_t __ret_777; \ + __ret_777 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_777), (uint8x8_t)(vqshrn_n_u16(__s1_777, __p2_777)))); \ + __ret_777; \ }) #else -#define vqshrn_high_n_u16(__p0_686, __p1_686, __p2_686) __extension__ ({ \ - uint8x8_t __s0_686 = __p0_686; \ - uint16x8_t __s1_686 = __p1_686; \ - uint8x8_t __rev0_686; __rev0_686 = __builtin_shufflevector(__s0_686, __s0_686, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_686; __rev1_686 = __builtin_shufflevector(__s1_686, __s1_686, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_686; \ - __ret_686 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_686), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_686, __p2_686)))); \ - __ret_686 = __builtin_shufflevector(__ret_686, __ret_686, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_686; \ +#define vqshrn_high_n_u16(__p0_778, __p1_778, __p2_778) __extension__ ({ \ + uint8x8_t __s0_778 = __p0_778; \ + uint16x8_t __s1_778 = __p1_778; \ + uint8x8_t __rev0_778; __rev0_778 = __builtin_shufflevector(__s0_778, __s0_778, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_778; __rev1_778 = __builtin_shufflevector(__s1_778, __s1_778, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_778; \ + __ret_778 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_778), (uint8x8_t)(__noswap_vqshrn_n_u16(__rev1_778, __p2_778)))); \ + __ret_778 = __builtin_shufflevector(__ret_778, __ret_778, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_778; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s32(__p0_687, __p1_687, __p2_687) __extension__ ({ \ - int16x4_t __s0_687 = __p0_687; \ - int32x4_t __s1_687 = __p1_687; \ - int16x8_t __ret_687; \ - __ret_687 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_687), (int16x4_t)(vqshrn_n_s32(__s1_687, __p2_687)))); \ - __ret_687; \ +#define vqshrn_high_n_s32(__p0_779, __p1_779, __p2_779) __extension__ ({ \ + int16x4_t __s0_779 = __p0_779; \ + int32x4_t __s1_779 = __p1_779; \ + int16x8_t __ret_779; \ + __ret_779 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_779), (int16x4_t)(vqshrn_n_s32(__s1_779, __p2_779)))); \ + __ret_779; \ }) #else -#define vqshrn_high_n_s32(__p0_688, __p1_688, __p2_688) __extension__ ({ \ - int16x4_t __s0_688 = __p0_688; \ - int32x4_t __s1_688 = __p1_688; \ - int16x4_t __rev0_688; __rev0_688 = __builtin_shufflevector(__s0_688, __s0_688, 3, 2, 1, 0); \ - int32x4_t __rev1_688; __rev1_688 = __builtin_shufflevector(__s1_688, __s1_688, 3, 2, 1, 0); \ - int16x8_t __ret_688; \ - __ret_688 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_688), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_688, __p2_688)))); \ - __ret_688 = __builtin_shufflevector(__ret_688, __ret_688, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_688; \ +#define vqshrn_high_n_s32(__p0_780, __p1_780, __p2_780) __extension__ ({ \ + int16x4_t __s0_780 = __p0_780; \ + int32x4_t __s1_780 = __p1_780; \ + int16x4_t __rev0_780; __rev0_780 = __builtin_shufflevector(__s0_780, __s0_780, 3, 2, 1, 0); \ + int32x4_t __rev1_780; __rev1_780 = __builtin_shufflevector(__s1_780, __s1_780, 3, 2, 1, 0); \ + int16x8_t __ret_780; \ + __ret_780 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_780), (int16x4_t)(__noswap_vqshrn_n_s32(__rev1_780, __p2_780)))); \ + __ret_780 = __builtin_shufflevector(__ret_780, __ret_780, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_780; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s64(__p0_689, __p1_689, __p2_689) __extension__ ({ \ - int32x2_t __s0_689 = __p0_689; \ - int64x2_t __s1_689 = __p1_689; \ - int32x4_t __ret_689; \ - __ret_689 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_689), (int32x2_t)(vqshrn_n_s64(__s1_689, __p2_689)))); \ - __ret_689; \ +#define vqshrn_high_n_s64(__p0_781, __p1_781, __p2_781) __extension__ ({ \ + int32x2_t __s0_781 = __p0_781; \ + int64x2_t __s1_781 = __p1_781; \ + int32x4_t __ret_781; \ + __ret_781 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_781), (int32x2_t)(vqshrn_n_s64(__s1_781, __p2_781)))); \ + __ret_781; \ }) #else -#define vqshrn_high_n_s64(__p0_690, __p1_690, __p2_690) __extension__ ({ \ - int32x2_t __s0_690 = __p0_690; \ - int64x2_t __s1_690 = __p1_690; \ - int32x2_t __rev0_690; __rev0_690 = __builtin_shufflevector(__s0_690, __s0_690, 1, 0); \ - int64x2_t __rev1_690; __rev1_690 = __builtin_shufflevector(__s1_690, __s1_690, 1, 0); \ - int32x4_t __ret_690; \ - __ret_690 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_690), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_690, __p2_690)))); \ - __ret_690 = __builtin_shufflevector(__ret_690, __ret_690, 3, 2, 1, 0); \ - __ret_690; \ +#define vqshrn_high_n_s64(__p0_782, __p1_782, __p2_782) __extension__ ({ \ + int32x2_t __s0_782 = __p0_782; \ + int64x2_t __s1_782 = __p1_782; \ + int32x2_t __rev0_782; __rev0_782 = __builtin_shufflevector(__s0_782, __s0_782, 1, 0); \ + int64x2_t __rev1_782; __rev1_782 = __builtin_shufflevector(__s1_782, __s1_782, 1, 0); \ + int32x4_t __ret_782; \ + __ret_782 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_782), (int32x2_t)(__noswap_vqshrn_n_s64(__rev1_782, __p2_782)))); \ + __ret_782 = __builtin_shufflevector(__ret_782, __ret_782, 3, 2, 1, 0); \ + __ret_782; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrn_high_n_s16(__p0_691, __p1_691, __p2_691) __extension__ ({ \ - int8x8_t __s0_691 = __p0_691; \ - int16x8_t __s1_691 = __p1_691; \ - int8x16_t __ret_691; \ - __ret_691 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_691), (int8x8_t)(vqshrn_n_s16(__s1_691, __p2_691)))); \ - __ret_691; \ +#define vqshrn_high_n_s16(__p0_783, __p1_783, __p2_783) __extension__ ({ \ + int8x8_t __s0_783 = __p0_783; \ + int16x8_t __s1_783 = __p1_783; \ + int8x16_t __ret_783; \ + __ret_783 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_783), (int8x8_t)(vqshrn_n_s16(__s1_783, __p2_783)))); \ + __ret_783; \ }) #else -#define vqshrn_high_n_s16(__p0_692, __p1_692, __p2_692) __extension__ ({ \ - int8x8_t __s0_692 = __p0_692; \ - int16x8_t __s1_692 = __p1_692; \ - int8x8_t __rev0_692; __rev0_692 = __builtin_shufflevector(__s0_692, __s0_692, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_692; __rev1_692 = __builtin_shufflevector(__s1_692, __s1_692, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_692; \ - __ret_692 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_692), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_692, __p2_692)))); \ - __ret_692 = __builtin_shufflevector(__ret_692, __ret_692, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_692; \ +#define vqshrn_high_n_s16(__p0_784, __p1_784, __p2_784) __extension__ ({ \ + int8x8_t __s0_784 = __p0_784; \ + int16x8_t __s1_784 = __p1_784; \ + int8x8_t __rev0_784; __rev0_784 = __builtin_shufflevector(__s0_784, __s0_784, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_784; __rev1_784 = __builtin_shufflevector(__s1_784, __s1_784, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_784; \ + __ret_784 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_784), (int8x8_t)(__noswap_vqshrn_n_s16(__rev1_784, __p2_784)))); \ + __ret_784 = __builtin_shufflevector(__ret_784, __ret_784, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_784; \ }) #endif @@ -58107,65 +59777,65 @@ __ai int16_t vqshlh_s16(int16_t __p0, int16_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s32(__p0_693, __p1_693, __p2_693) __extension__ ({ \ - int16x4_t __s0_693 = __p0_693; \ - int32x4_t __s1_693 = __p1_693; \ - int16x8_t __ret_693; \ - __ret_693 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_693), (int16x4_t)(vqshrun_n_s32(__s1_693, __p2_693)))); \ - __ret_693; \ +#define vqshrun_high_n_s32(__p0_785, __p1_785, __p2_785) __extension__ ({ \ + int16x4_t __s0_785 = __p0_785; \ + int32x4_t __s1_785 = __p1_785; \ + int16x8_t __ret_785; \ + __ret_785 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_785), (int16x4_t)(vqshrun_n_s32(__s1_785, __p2_785)))); \ + __ret_785; \ }) #else -#define vqshrun_high_n_s32(__p0_694, __p1_694, __p2_694) __extension__ ({ \ - int16x4_t __s0_694 = __p0_694; \ - int32x4_t __s1_694 = __p1_694; \ - int16x4_t __rev0_694; __rev0_694 = __builtin_shufflevector(__s0_694, __s0_694, 3, 2, 1, 0); \ - int32x4_t __rev1_694; __rev1_694 = __builtin_shufflevector(__s1_694, __s1_694, 3, 2, 1, 0); \ - int16x8_t __ret_694; \ - __ret_694 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_694), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_694, __p2_694)))); \ - __ret_694 = __builtin_shufflevector(__ret_694, __ret_694, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_694; \ +#define vqshrun_high_n_s32(__p0_786, __p1_786, __p2_786) __extension__ ({ \ + int16x4_t __s0_786 = __p0_786; \ + int32x4_t __s1_786 = __p1_786; \ + int16x4_t __rev0_786; __rev0_786 = __builtin_shufflevector(__s0_786, __s0_786, 3, 2, 1, 0); \ + int32x4_t __rev1_786; __rev1_786 = __builtin_shufflevector(__s1_786, __s1_786, 3, 2, 1, 0); \ + int16x8_t __ret_786; \ + __ret_786 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_786), (int16x4_t)(__noswap_vqshrun_n_s32(__rev1_786, __p2_786)))); \ + __ret_786 = __builtin_shufflevector(__ret_786, __ret_786, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_786; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s64(__p0_695, __p1_695, __p2_695) __extension__ ({ \ - int32x2_t __s0_695 = __p0_695; \ - int64x2_t __s1_695 = __p1_695; \ - int32x4_t __ret_695; \ - __ret_695 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_695), (int32x2_t)(vqshrun_n_s64(__s1_695, __p2_695)))); \ - __ret_695; \ +#define vqshrun_high_n_s64(__p0_787, __p1_787, __p2_787) __extension__ ({ \ + int32x2_t __s0_787 = __p0_787; \ + int64x2_t __s1_787 = __p1_787; \ + int32x4_t __ret_787; \ + __ret_787 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_787), (int32x2_t)(vqshrun_n_s64(__s1_787, __p2_787)))); \ + __ret_787; \ }) #else -#define vqshrun_high_n_s64(__p0_696, __p1_696, __p2_696) __extension__ ({ \ - int32x2_t __s0_696 = __p0_696; \ - int64x2_t __s1_696 = __p1_696; \ - int32x2_t __rev0_696; __rev0_696 = __builtin_shufflevector(__s0_696, __s0_696, 1, 0); \ - int64x2_t __rev1_696; __rev1_696 = __builtin_shufflevector(__s1_696, __s1_696, 1, 0); \ - int32x4_t __ret_696; \ - __ret_696 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_696), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_696, __p2_696)))); \ - __ret_696 = __builtin_shufflevector(__ret_696, __ret_696, 3, 2, 1, 0); \ - __ret_696; \ +#define vqshrun_high_n_s64(__p0_788, __p1_788, __p2_788) __extension__ ({ \ + int32x2_t __s0_788 = __p0_788; \ + int64x2_t __s1_788 = __p1_788; \ + int32x2_t __rev0_788; __rev0_788 = __builtin_shufflevector(__s0_788, __s0_788, 1, 0); \ + int64x2_t __rev1_788; __rev1_788 = __builtin_shufflevector(__s1_788, __s1_788, 1, 0); \ + int32x4_t __ret_788; \ + __ret_788 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_788), (int32x2_t)(__noswap_vqshrun_n_s64(__rev1_788, __p2_788)))); \ + __ret_788 = __builtin_shufflevector(__ret_788, __ret_788, 3, 2, 1, 0); \ + __ret_788; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqshrun_high_n_s16(__p0_697, __p1_697, __p2_697) __extension__ ({ \ - int8x8_t __s0_697 = __p0_697; \ - int16x8_t __s1_697 = __p1_697; \ - int8x16_t __ret_697; \ - __ret_697 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_697), (int8x8_t)(vqshrun_n_s16(__s1_697, __p2_697)))); \ - __ret_697; \ +#define vqshrun_high_n_s16(__p0_789, __p1_789, __p2_789) __extension__ ({ \ + int8x8_t __s0_789 = __p0_789; \ + int16x8_t __s1_789 = __p1_789; \ + int8x16_t __ret_789; \ + __ret_789 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_789), (int8x8_t)(vqshrun_n_s16(__s1_789, __p2_789)))); \ + __ret_789; \ }) #else -#define vqshrun_high_n_s16(__p0_698, __p1_698, __p2_698) __extension__ ({ \ - int8x8_t __s0_698 = __p0_698; \ - int16x8_t __s1_698 = __p1_698; \ - int8x8_t __rev0_698; __rev0_698 = __builtin_shufflevector(__s0_698, __s0_698, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_698; __rev1_698 = __builtin_shufflevector(__s1_698, __s1_698, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_698; \ - __ret_698 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_698), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_698, __p2_698)))); \ - __ret_698 = __builtin_shufflevector(__ret_698, __ret_698, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_698; \ +#define vqshrun_high_n_s16(__p0_790, __p1_790, __p2_790) __extension__ ({ \ + int8x8_t __s0_790 = __p0_790; \ + int16x8_t __s1_790 = __p1_790; \ + int8x8_t __rev0_790; __rev0_790 = __builtin_shufflevector(__s0_790, __s0_790, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_790; __rev1_790 = __builtin_shufflevector(__s1_790, __s1_790, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_790; \ + __ret_790 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_790), (int8x8_t)(__noswap_vqshrun_n_s16(__rev1_790, __p2_790)))); \ + __ret_790 = __builtin_shufflevector(__ret_790, __ret_790, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_790; \ }) #endif @@ -59452,7 +61122,7 @@ __ai float32_t vrecpxs_f32(float32_t __p0) { __ret = (float32_t) __builtin_neon_vrecpxs_f32(__p0); return __ret; } -__ai uint64_t vrshld_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vrshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vrshld_u64(__p0, __p1); return __ret; @@ -59475,128 +61145,128 @@ __ai int64_t vrshld_s64(int64_t __p0, int64_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u32(__p0_699, __p1_699, __p2_699) __extension__ ({ \ - uint16x4_t __s0_699 = __p0_699; \ - uint32x4_t __s1_699 = __p1_699; \ - uint16x8_t __ret_699; \ - __ret_699 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_699), (uint16x4_t)(vrshrn_n_u32(__s1_699, __p2_699)))); \ - __ret_699; \ +#define vrshrn_high_n_u32(__p0_791, __p1_791, __p2_791) __extension__ ({ \ + uint16x4_t __s0_791 = __p0_791; \ + uint32x4_t __s1_791 = __p1_791; \ + uint16x8_t __ret_791; \ + __ret_791 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_791), (uint16x4_t)(vrshrn_n_u32(__s1_791, __p2_791)))); \ + __ret_791; \ }) #else -#define vrshrn_high_n_u32(__p0_700, __p1_700, __p2_700) __extension__ ({ \ - uint16x4_t __s0_700 = __p0_700; \ - uint32x4_t __s1_700 = __p1_700; \ - uint16x4_t __rev0_700; __rev0_700 = __builtin_shufflevector(__s0_700, __s0_700, 3, 2, 1, 0); \ - uint32x4_t __rev1_700; __rev1_700 = __builtin_shufflevector(__s1_700, __s1_700, 3, 2, 1, 0); \ - uint16x8_t __ret_700; \ - __ret_700 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_700), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_700, __p2_700)))); \ - __ret_700 = __builtin_shufflevector(__ret_700, __ret_700, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_700; \ +#define vrshrn_high_n_u32(__p0_792, __p1_792, __p2_792) __extension__ ({ \ + uint16x4_t __s0_792 = __p0_792; \ + uint32x4_t __s1_792 = __p1_792; \ + uint16x4_t __rev0_792; __rev0_792 = __builtin_shufflevector(__s0_792, __s0_792, 3, 2, 1, 0); \ + uint32x4_t __rev1_792; __rev1_792 = __builtin_shufflevector(__s1_792, __s1_792, 3, 2, 1, 0); \ + uint16x8_t __ret_792; \ + __ret_792 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_792), (uint16x4_t)(__noswap_vrshrn_n_u32(__rev1_792, __p2_792)))); \ + __ret_792 = __builtin_shufflevector(__ret_792, __ret_792, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_792; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u64(__p0_701, __p1_701, __p2_701) __extension__ ({ \ - uint32x2_t __s0_701 = __p0_701; \ - uint64x2_t __s1_701 = __p1_701; \ - uint32x4_t __ret_701; \ - __ret_701 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_701), (uint32x2_t)(vrshrn_n_u64(__s1_701, __p2_701)))); \ - __ret_701; \ +#define vrshrn_high_n_u64(__p0_793, __p1_793, __p2_793) __extension__ ({ \ + uint32x2_t __s0_793 = __p0_793; \ + uint64x2_t __s1_793 = __p1_793; \ + uint32x4_t __ret_793; \ + __ret_793 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_793), (uint32x2_t)(vrshrn_n_u64(__s1_793, __p2_793)))); \ + __ret_793; \ }) #else -#define vrshrn_high_n_u64(__p0_702, __p1_702, __p2_702) __extension__ ({ \ - uint32x2_t __s0_702 = __p0_702; \ - uint64x2_t __s1_702 = __p1_702; \ - uint32x2_t __rev0_702; __rev0_702 = __builtin_shufflevector(__s0_702, __s0_702, 1, 0); \ - uint64x2_t __rev1_702; __rev1_702 = __builtin_shufflevector(__s1_702, __s1_702, 1, 0); \ - uint32x4_t __ret_702; \ - __ret_702 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_702), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_702, __p2_702)))); \ - __ret_702 = __builtin_shufflevector(__ret_702, __ret_702, 3, 2, 1, 0); \ - __ret_702; \ +#define vrshrn_high_n_u64(__p0_794, __p1_794, __p2_794) __extension__ ({ \ + uint32x2_t __s0_794 = __p0_794; \ + uint64x2_t __s1_794 = __p1_794; \ + uint32x2_t __rev0_794; __rev0_794 = __builtin_shufflevector(__s0_794, __s0_794, 1, 0); \ + uint64x2_t __rev1_794; __rev1_794 = __builtin_shufflevector(__s1_794, __s1_794, 1, 0); \ + uint32x4_t __ret_794; \ + __ret_794 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_794), (uint32x2_t)(__noswap_vrshrn_n_u64(__rev1_794, __p2_794)))); \ + __ret_794 = __builtin_shufflevector(__ret_794, __ret_794, 3, 2, 1, 0); \ + __ret_794; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_u16(__p0_703, __p1_703, __p2_703) __extension__ ({ \ - uint8x8_t __s0_703 = __p0_703; \ - uint16x8_t __s1_703 = __p1_703; \ - uint8x16_t __ret_703; \ - __ret_703 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_703), (uint8x8_t)(vrshrn_n_u16(__s1_703, __p2_703)))); \ - __ret_703; \ +#define vrshrn_high_n_u16(__p0_795, __p1_795, __p2_795) __extension__ ({ \ + uint8x8_t __s0_795 = __p0_795; \ + uint16x8_t __s1_795 = __p1_795; \ + uint8x16_t __ret_795; \ + __ret_795 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_795), (uint8x8_t)(vrshrn_n_u16(__s1_795, __p2_795)))); \ + __ret_795; \ }) #else -#define vrshrn_high_n_u16(__p0_704, __p1_704, __p2_704) __extension__ ({ \ - uint8x8_t __s0_704 = __p0_704; \ - uint16x8_t __s1_704 = __p1_704; \ - uint8x8_t __rev0_704; __rev0_704 = __builtin_shufflevector(__s0_704, __s0_704, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_704; __rev1_704 = __builtin_shufflevector(__s1_704, __s1_704, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_704; \ - __ret_704 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_704), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_704, __p2_704)))); \ - __ret_704 = __builtin_shufflevector(__ret_704, __ret_704, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_704; \ +#define vrshrn_high_n_u16(__p0_796, __p1_796, __p2_796) __extension__ ({ \ + uint8x8_t __s0_796 = __p0_796; \ + uint16x8_t __s1_796 = __p1_796; \ + uint8x8_t __rev0_796; __rev0_796 = __builtin_shufflevector(__s0_796, __s0_796, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_796; __rev1_796 = __builtin_shufflevector(__s1_796, __s1_796, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_796; \ + __ret_796 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_796), (uint8x8_t)(__noswap_vrshrn_n_u16(__rev1_796, __p2_796)))); \ + __ret_796 = __builtin_shufflevector(__ret_796, __ret_796, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_796; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s32(__p0_705, __p1_705, __p2_705) __extension__ ({ \ - int16x4_t __s0_705 = __p0_705; \ - int32x4_t __s1_705 = __p1_705; \ - int16x8_t __ret_705; \ - __ret_705 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_705), (int16x4_t)(vrshrn_n_s32(__s1_705, __p2_705)))); \ - __ret_705; \ +#define vrshrn_high_n_s32(__p0_797, __p1_797, __p2_797) __extension__ ({ \ + int16x4_t __s0_797 = __p0_797; \ + int32x4_t __s1_797 = __p1_797; \ + int16x8_t __ret_797; \ + __ret_797 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_797), (int16x4_t)(vrshrn_n_s32(__s1_797, __p2_797)))); \ + __ret_797; \ }) #else -#define vrshrn_high_n_s32(__p0_706, __p1_706, __p2_706) __extension__ ({ \ - int16x4_t __s0_706 = __p0_706; \ - int32x4_t __s1_706 = __p1_706; \ - int16x4_t __rev0_706; __rev0_706 = __builtin_shufflevector(__s0_706, __s0_706, 3, 2, 1, 0); \ - int32x4_t __rev1_706; __rev1_706 = __builtin_shufflevector(__s1_706, __s1_706, 3, 2, 1, 0); \ - int16x8_t __ret_706; \ - __ret_706 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_706), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_706, __p2_706)))); \ - __ret_706 = __builtin_shufflevector(__ret_706, __ret_706, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_706; \ +#define vrshrn_high_n_s32(__p0_798, __p1_798, __p2_798) __extension__ ({ \ + int16x4_t __s0_798 = __p0_798; \ + int32x4_t __s1_798 = __p1_798; \ + int16x4_t __rev0_798; __rev0_798 = __builtin_shufflevector(__s0_798, __s0_798, 3, 2, 1, 0); \ + int32x4_t __rev1_798; __rev1_798 = __builtin_shufflevector(__s1_798, __s1_798, 3, 2, 1, 0); \ + int16x8_t __ret_798; \ + __ret_798 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_798), (int16x4_t)(__noswap_vrshrn_n_s32(__rev1_798, __p2_798)))); \ + __ret_798 = __builtin_shufflevector(__ret_798, __ret_798, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_798; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s64(__p0_707, __p1_707, __p2_707) __extension__ ({ \ - int32x2_t __s0_707 = __p0_707; \ - int64x2_t __s1_707 = __p1_707; \ - int32x4_t __ret_707; \ - __ret_707 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_707), (int32x2_t)(vrshrn_n_s64(__s1_707, __p2_707)))); \ - __ret_707; \ +#define vrshrn_high_n_s64(__p0_799, __p1_799, __p2_799) __extension__ ({ \ + int32x2_t __s0_799 = __p0_799; \ + int64x2_t __s1_799 = __p1_799; \ + int32x4_t __ret_799; \ + __ret_799 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_799), (int32x2_t)(vrshrn_n_s64(__s1_799, __p2_799)))); \ + __ret_799; \ }) #else -#define vrshrn_high_n_s64(__p0_708, __p1_708, __p2_708) __extension__ ({ \ - int32x2_t __s0_708 = __p0_708; \ - int64x2_t __s1_708 = __p1_708; \ - int32x2_t __rev0_708; __rev0_708 = __builtin_shufflevector(__s0_708, __s0_708, 1, 0); \ - int64x2_t __rev1_708; __rev1_708 = __builtin_shufflevector(__s1_708, __s1_708, 1, 0); \ - int32x4_t __ret_708; \ - __ret_708 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_708), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_708, __p2_708)))); \ - __ret_708 = __builtin_shufflevector(__ret_708, __ret_708, 3, 2, 1, 0); \ - __ret_708; \ +#define vrshrn_high_n_s64(__p0_800, __p1_800, __p2_800) __extension__ ({ \ + int32x2_t __s0_800 = __p0_800; \ + int64x2_t __s1_800 = __p1_800; \ + int32x2_t __rev0_800; __rev0_800 = __builtin_shufflevector(__s0_800, __s0_800, 1, 0); \ + int64x2_t __rev1_800; __rev1_800 = __builtin_shufflevector(__s1_800, __s1_800, 1, 0); \ + int32x4_t __ret_800; \ + __ret_800 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_800), (int32x2_t)(__noswap_vrshrn_n_s64(__rev1_800, __p2_800)))); \ + __ret_800 = __builtin_shufflevector(__ret_800, __ret_800, 3, 2, 1, 0); \ + __ret_800; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vrshrn_high_n_s16(__p0_709, __p1_709, __p2_709) __extension__ ({ \ - int8x8_t __s0_709 = __p0_709; \ - int16x8_t __s1_709 = __p1_709; \ - int8x16_t __ret_709; \ - __ret_709 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_709), (int8x8_t)(vrshrn_n_s16(__s1_709, __p2_709)))); \ - __ret_709; \ +#define vrshrn_high_n_s16(__p0_801, __p1_801, __p2_801) __extension__ ({ \ + int8x8_t __s0_801 = __p0_801; \ + int16x8_t __s1_801 = __p1_801; \ + int8x16_t __ret_801; \ + __ret_801 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_801), (int8x8_t)(vrshrn_n_s16(__s1_801, __p2_801)))); \ + __ret_801; \ }) #else -#define vrshrn_high_n_s16(__p0_710, __p1_710, __p2_710) __extension__ ({ \ - int8x8_t __s0_710 = __p0_710; \ - int16x8_t __s1_710 = __p1_710; \ - int8x8_t __rev0_710; __rev0_710 = __builtin_shufflevector(__s0_710, __s0_710, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_710; __rev1_710 = __builtin_shufflevector(__s1_710, __s1_710, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_710; \ - __ret_710 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_710), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_710, __p2_710)))); \ - __ret_710 = __builtin_shufflevector(__ret_710, __ret_710, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_710; \ +#define vrshrn_high_n_s16(__p0_802, __p1_802, __p2_802) __extension__ ({ \ + int8x8_t __s0_802 = __p0_802; \ + int16x8_t __s1_802 = __p1_802; \ + int8x8_t __rev0_802; __rev0_802 = __builtin_shufflevector(__s0_802, __s0_802, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_802; __rev1_802 = __builtin_shufflevector(__s1_802, __s1_802, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_802; \ + __ret_802 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_802), (int8x8_t)(__noswap_vrshrn_n_s16(__rev1_802, __p2_802)))); \ + __ret_802 = __builtin_shufflevector(__ret_802, __ret_802, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_802; \ }) #endif @@ -59853,7 +61523,7 @@ __ai int8x16_t vrsubhn_high_s16(int8x8_t __p0, int16x8_t __p1, int16x8_t __p2) { __ret = (float64x1_t) __builtin_neon_vset_lane_f64(__s0, (float64x1_t)__s1, __p2); \ __ret; \ }) -__ai uint64_t vshld_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vshld_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vshld_u64(__p0, __p1); return __ret; @@ -59876,110 +61546,110 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u8(__p0_711, __p1_711) __extension__ ({ \ - uint8x16_t __s0_711 = __p0_711; \ - uint16x8_t __ret_711; \ - __ret_711 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_711), __p1_711)); \ - __ret_711; \ +#define vshll_high_n_u8(__p0_803, __p1_803) __extension__ ({ \ + uint8x16_t __s0_803 = __p0_803; \ + uint16x8_t __ret_803; \ + __ret_803 = (uint16x8_t)(vshll_n_u8(vget_high_u8(__s0_803), __p1_803)); \ + __ret_803; \ }) #else -#define vshll_high_n_u8(__p0_712, __p1_712) __extension__ ({ \ - uint8x16_t __s0_712 = __p0_712; \ - uint8x16_t __rev0_712; __rev0_712 = __builtin_shufflevector(__s0_712, __s0_712, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __ret_712; \ - __ret_712 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_712), __p1_712)); \ - __ret_712 = __builtin_shufflevector(__ret_712, __ret_712, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_712; \ +#define vshll_high_n_u8(__p0_804, __p1_804) __extension__ ({ \ + uint8x16_t __s0_804 = __p0_804; \ + uint8x16_t __rev0_804; __rev0_804 = __builtin_shufflevector(__s0_804, __s0_804, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __ret_804; \ + __ret_804 = (uint16x8_t)(__noswap_vshll_n_u8(__noswap_vget_high_u8(__rev0_804), __p1_804)); \ + __ret_804 = __builtin_shufflevector(__ret_804, __ret_804, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_804; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u32(__p0_713, __p1_713) __extension__ ({ \ - uint32x4_t __s0_713 = __p0_713; \ - uint64x2_t __ret_713; \ - __ret_713 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_713), __p1_713)); \ - __ret_713; \ +#define vshll_high_n_u32(__p0_805, __p1_805) __extension__ ({ \ + uint32x4_t __s0_805 = __p0_805; \ + uint64x2_t __ret_805; \ + __ret_805 = (uint64x2_t)(vshll_n_u32(vget_high_u32(__s0_805), __p1_805)); \ + __ret_805; \ }) #else -#define vshll_high_n_u32(__p0_714, __p1_714) __extension__ ({ \ - uint32x4_t __s0_714 = __p0_714; \ - uint32x4_t __rev0_714; __rev0_714 = __builtin_shufflevector(__s0_714, __s0_714, 3, 2, 1, 0); \ - uint64x2_t __ret_714; \ - __ret_714 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_714), __p1_714)); \ - __ret_714 = __builtin_shufflevector(__ret_714, __ret_714, 1, 0); \ - __ret_714; \ +#define vshll_high_n_u32(__p0_806, __p1_806) __extension__ ({ \ + uint32x4_t __s0_806 = __p0_806; \ + uint32x4_t __rev0_806; __rev0_806 = __builtin_shufflevector(__s0_806, __s0_806, 3, 2, 1, 0); \ + uint64x2_t __ret_806; \ + __ret_806 = (uint64x2_t)(__noswap_vshll_n_u32(__noswap_vget_high_u32(__rev0_806), __p1_806)); \ + __ret_806 = __builtin_shufflevector(__ret_806, __ret_806, 1, 0); \ + __ret_806; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_u16(__p0_715, __p1_715) __extension__ ({ \ - uint16x8_t __s0_715 = __p0_715; \ - uint32x4_t __ret_715; \ - __ret_715 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_715), __p1_715)); \ - __ret_715; \ +#define vshll_high_n_u16(__p0_807, __p1_807) __extension__ ({ \ + uint16x8_t __s0_807 = __p0_807; \ + uint32x4_t __ret_807; \ + __ret_807 = (uint32x4_t)(vshll_n_u16(vget_high_u16(__s0_807), __p1_807)); \ + __ret_807; \ }) #else -#define vshll_high_n_u16(__p0_716, __p1_716) __extension__ ({ \ - uint16x8_t __s0_716 = __p0_716; \ - uint16x8_t __rev0_716; __rev0_716 = __builtin_shufflevector(__s0_716, __s0_716, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint32x4_t __ret_716; \ - __ret_716 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_716), __p1_716)); \ - __ret_716 = __builtin_shufflevector(__ret_716, __ret_716, 3, 2, 1, 0); \ - __ret_716; \ +#define vshll_high_n_u16(__p0_808, __p1_808) __extension__ ({ \ + uint16x8_t __s0_808 = __p0_808; \ + uint16x8_t __rev0_808; __rev0_808 = __builtin_shufflevector(__s0_808, __s0_808, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint32x4_t __ret_808; \ + __ret_808 = (uint32x4_t)(__noswap_vshll_n_u16(__noswap_vget_high_u16(__rev0_808), __p1_808)); \ + __ret_808 = __builtin_shufflevector(__ret_808, __ret_808, 3, 2, 1, 0); \ + __ret_808; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s8(__p0_717, __p1_717) __extension__ ({ \ - int8x16_t __s0_717 = __p0_717; \ - int16x8_t __ret_717; \ - __ret_717 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_717), __p1_717)); \ - __ret_717; \ +#define vshll_high_n_s8(__p0_809, __p1_809) __extension__ ({ \ + int8x16_t __s0_809 = __p0_809; \ + int16x8_t __ret_809; \ + __ret_809 = (int16x8_t)(vshll_n_s8(vget_high_s8(__s0_809), __p1_809)); \ + __ret_809; \ }) #else -#define vshll_high_n_s8(__p0_718, __p1_718) __extension__ ({ \ - int8x16_t __s0_718 = __p0_718; \ - int8x16_t __rev0_718; __rev0_718 = __builtin_shufflevector(__s0_718, __s0_718, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __ret_718; \ - __ret_718 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_718), __p1_718)); \ - __ret_718 = __builtin_shufflevector(__ret_718, __ret_718, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_718; \ +#define vshll_high_n_s8(__p0_810, __p1_810) __extension__ ({ \ + int8x16_t __s0_810 = __p0_810; \ + int8x16_t __rev0_810; __rev0_810 = __builtin_shufflevector(__s0_810, __s0_810, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __ret_810; \ + __ret_810 = (int16x8_t)(__noswap_vshll_n_s8(__noswap_vget_high_s8(__rev0_810), __p1_810)); \ + __ret_810 = __builtin_shufflevector(__ret_810, __ret_810, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_810; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s32(__p0_719, __p1_719) __extension__ ({ \ - int32x4_t __s0_719 = __p0_719; \ - int64x2_t __ret_719; \ - __ret_719 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_719), __p1_719)); \ - __ret_719; \ +#define vshll_high_n_s32(__p0_811, __p1_811) __extension__ ({ \ + int32x4_t __s0_811 = __p0_811; \ + int64x2_t __ret_811; \ + __ret_811 = (int64x2_t)(vshll_n_s32(vget_high_s32(__s0_811), __p1_811)); \ + __ret_811; \ }) #else -#define vshll_high_n_s32(__p0_720, __p1_720) __extension__ ({ \ - int32x4_t __s0_720 = __p0_720; \ - int32x4_t __rev0_720; __rev0_720 = __builtin_shufflevector(__s0_720, __s0_720, 3, 2, 1, 0); \ - int64x2_t __ret_720; \ - __ret_720 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_720), __p1_720)); \ - __ret_720 = __builtin_shufflevector(__ret_720, __ret_720, 1, 0); \ - __ret_720; \ +#define vshll_high_n_s32(__p0_812, __p1_812) __extension__ ({ \ + int32x4_t __s0_812 = __p0_812; \ + int32x4_t __rev0_812; __rev0_812 = __builtin_shufflevector(__s0_812, __s0_812, 3, 2, 1, 0); \ + int64x2_t __ret_812; \ + __ret_812 = (int64x2_t)(__noswap_vshll_n_s32(__noswap_vget_high_s32(__rev0_812), __p1_812)); \ + __ret_812 = __builtin_shufflevector(__ret_812, __ret_812, 1, 0); \ + __ret_812; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshll_high_n_s16(__p0_721, __p1_721) __extension__ ({ \ - int16x8_t __s0_721 = __p0_721; \ - int32x4_t __ret_721; \ - __ret_721 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_721), __p1_721)); \ - __ret_721; \ +#define vshll_high_n_s16(__p0_813, __p1_813) __extension__ ({ \ + int16x8_t __s0_813 = __p0_813; \ + int32x4_t __ret_813; \ + __ret_813 = (int32x4_t)(vshll_n_s16(vget_high_s16(__s0_813), __p1_813)); \ + __ret_813; \ }) #else -#define vshll_high_n_s16(__p0_722, __p1_722) __extension__ ({ \ - int16x8_t __s0_722 = __p0_722; \ - int16x8_t __rev0_722; __rev0_722 = __builtin_shufflevector(__s0_722, __s0_722, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_722; \ - __ret_722 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_722), __p1_722)); \ - __ret_722 = __builtin_shufflevector(__ret_722, __ret_722, 3, 2, 1, 0); \ - __ret_722; \ +#define vshll_high_n_s16(__p0_814, __p1_814) __extension__ ({ \ + int16x8_t __s0_814 = __p0_814; \ + int16x8_t __rev0_814; __rev0_814 = __builtin_shufflevector(__s0_814, __s0_814, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_814; \ + __ret_814 = (int32x4_t)(__noswap_vshll_n_s16(__noswap_vget_high_s16(__rev0_814), __p1_814)); \ + __ret_814 = __builtin_shufflevector(__ret_814, __ret_814, 3, 2, 1, 0); \ + __ret_814; \ }) #endif @@ -59996,128 +61666,128 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { __ret; \ }) #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u32(__p0_723, __p1_723, __p2_723) __extension__ ({ \ - uint16x4_t __s0_723 = __p0_723; \ - uint32x4_t __s1_723 = __p1_723; \ - uint16x8_t __ret_723; \ - __ret_723 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_723), (uint16x4_t)(vshrn_n_u32(__s1_723, __p2_723)))); \ - __ret_723; \ +#define vshrn_high_n_u32(__p0_815, __p1_815, __p2_815) __extension__ ({ \ + uint16x4_t __s0_815 = __p0_815; \ + uint32x4_t __s1_815 = __p1_815; \ + uint16x8_t __ret_815; \ + __ret_815 = (uint16x8_t)(vcombine_u16((uint16x4_t)(__s0_815), (uint16x4_t)(vshrn_n_u32(__s1_815, __p2_815)))); \ + __ret_815; \ }) #else -#define vshrn_high_n_u32(__p0_724, __p1_724, __p2_724) __extension__ ({ \ - uint16x4_t __s0_724 = __p0_724; \ - uint32x4_t __s1_724 = __p1_724; \ - uint16x4_t __rev0_724; __rev0_724 = __builtin_shufflevector(__s0_724, __s0_724, 3, 2, 1, 0); \ - uint32x4_t __rev1_724; __rev1_724 = __builtin_shufflevector(__s1_724, __s1_724, 3, 2, 1, 0); \ - uint16x8_t __ret_724; \ - __ret_724 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_724), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_724, __p2_724)))); \ - __ret_724 = __builtin_shufflevector(__ret_724, __ret_724, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_724; \ +#define vshrn_high_n_u32(__p0_816, __p1_816, __p2_816) __extension__ ({ \ + uint16x4_t __s0_816 = __p0_816; \ + uint32x4_t __s1_816 = __p1_816; \ + uint16x4_t __rev0_816; __rev0_816 = __builtin_shufflevector(__s0_816, __s0_816, 3, 2, 1, 0); \ + uint32x4_t __rev1_816; __rev1_816 = __builtin_shufflevector(__s1_816, __s1_816, 3, 2, 1, 0); \ + uint16x8_t __ret_816; \ + __ret_816 = (uint16x8_t)(__noswap_vcombine_u16((uint16x4_t)(__rev0_816), (uint16x4_t)(__noswap_vshrn_n_u32(__rev1_816, __p2_816)))); \ + __ret_816 = __builtin_shufflevector(__ret_816, __ret_816, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_816; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u64(__p0_725, __p1_725, __p2_725) __extension__ ({ \ - uint32x2_t __s0_725 = __p0_725; \ - uint64x2_t __s1_725 = __p1_725; \ - uint32x4_t __ret_725; \ - __ret_725 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_725), (uint32x2_t)(vshrn_n_u64(__s1_725, __p2_725)))); \ - __ret_725; \ +#define vshrn_high_n_u64(__p0_817, __p1_817, __p2_817) __extension__ ({ \ + uint32x2_t __s0_817 = __p0_817; \ + uint64x2_t __s1_817 = __p1_817; \ + uint32x4_t __ret_817; \ + __ret_817 = (uint32x4_t)(vcombine_u32((uint32x2_t)(__s0_817), (uint32x2_t)(vshrn_n_u64(__s1_817, __p2_817)))); \ + __ret_817; \ }) #else -#define vshrn_high_n_u64(__p0_726, __p1_726, __p2_726) __extension__ ({ \ - uint32x2_t __s0_726 = __p0_726; \ - uint64x2_t __s1_726 = __p1_726; \ - uint32x2_t __rev0_726; __rev0_726 = __builtin_shufflevector(__s0_726, __s0_726, 1, 0); \ - uint64x2_t __rev1_726; __rev1_726 = __builtin_shufflevector(__s1_726, __s1_726, 1, 0); \ - uint32x4_t __ret_726; \ - __ret_726 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_726), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_726, __p2_726)))); \ - __ret_726 = __builtin_shufflevector(__ret_726, __ret_726, 3, 2, 1, 0); \ - __ret_726; \ +#define vshrn_high_n_u64(__p0_818, __p1_818, __p2_818) __extension__ ({ \ + uint32x2_t __s0_818 = __p0_818; \ + uint64x2_t __s1_818 = __p1_818; \ + uint32x2_t __rev0_818; __rev0_818 = __builtin_shufflevector(__s0_818, __s0_818, 1, 0); \ + uint64x2_t __rev1_818; __rev1_818 = __builtin_shufflevector(__s1_818, __s1_818, 1, 0); \ + uint32x4_t __ret_818; \ + __ret_818 = (uint32x4_t)(__noswap_vcombine_u32((uint32x2_t)(__rev0_818), (uint32x2_t)(__noswap_vshrn_n_u64(__rev1_818, __p2_818)))); \ + __ret_818 = __builtin_shufflevector(__ret_818, __ret_818, 3, 2, 1, 0); \ + __ret_818; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_u16(__p0_727, __p1_727, __p2_727) __extension__ ({ \ - uint8x8_t __s0_727 = __p0_727; \ - uint16x8_t __s1_727 = __p1_727; \ - uint8x16_t __ret_727; \ - __ret_727 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_727), (uint8x8_t)(vshrn_n_u16(__s1_727, __p2_727)))); \ - __ret_727; \ +#define vshrn_high_n_u16(__p0_819, __p1_819, __p2_819) __extension__ ({ \ + uint8x8_t __s0_819 = __p0_819; \ + uint16x8_t __s1_819 = __p1_819; \ + uint8x16_t __ret_819; \ + __ret_819 = (uint8x16_t)(vcombine_u8((uint8x8_t)(__s0_819), (uint8x8_t)(vshrn_n_u16(__s1_819, __p2_819)))); \ + __ret_819; \ }) #else -#define vshrn_high_n_u16(__p0_728, __p1_728, __p2_728) __extension__ ({ \ - uint8x8_t __s0_728 = __p0_728; \ - uint16x8_t __s1_728 = __p1_728; \ - uint8x8_t __rev0_728; __rev0_728 = __builtin_shufflevector(__s0_728, __s0_728, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint16x8_t __rev1_728; __rev1_728 = __builtin_shufflevector(__s1_728, __s1_728, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __ret_728; \ - __ret_728 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_728), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_728, __p2_728)))); \ - __ret_728 = __builtin_shufflevector(__ret_728, __ret_728, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_728; \ +#define vshrn_high_n_u16(__p0_820, __p1_820, __p2_820) __extension__ ({ \ + uint8x8_t __s0_820 = __p0_820; \ + uint16x8_t __s1_820 = __p1_820; \ + uint8x8_t __rev0_820; __rev0_820 = __builtin_shufflevector(__s0_820, __s0_820, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint16x8_t __rev1_820; __rev1_820 = __builtin_shufflevector(__s1_820, __s1_820, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __ret_820; \ + __ret_820 = (uint8x16_t)(__noswap_vcombine_u8((uint8x8_t)(__rev0_820), (uint8x8_t)(__noswap_vshrn_n_u16(__rev1_820, __p2_820)))); \ + __ret_820 = __builtin_shufflevector(__ret_820, __ret_820, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_820; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s32(__p0_729, __p1_729, __p2_729) __extension__ ({ \ - int16x4_t __s0_729 = __p0_729; \ - int32x4_t __s1_729 = __p1_729; \ - int16x8_t __ret_729; \ - __ret_729 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_729), (int16x4_t)(vshrn_n_s32(__s1_729, __p2_729)))); \ - __ret_729; \ +#define vshrn_high_n_s32(__p0_821, __p1_821, __p2_821) __extension__ ({ \ + int16x4_t __s0_821 = __p0_821; \ + int32x4_t __s1_821 = __p1_821; \ + int16x8_t __ret_821; \ + __ret_821 = (int16x8_t)(vcombine_s16((int16x4_t)(__s0_821), (int16x4_t)(vshrn_n_s32(__s1_821, __p2_821)))); \ + __ret_821; \ }) #else -#define vshrn_high_n_s32(__p0_730, __p1_730, __p2_730) __extension__ ({ \ - int16x4_t __s0_730 = __p0_730; \ - int32x4_t __s1_730 = __p1_730; \ - int16x4_t __rev0_730; __rev0_730 = __builtin_shufflevector(__s0_730, __s0_730, 3, 2, 1, 0); \ - int32x4_t __rev1_730; __rev1_730 = __builtin_shufflevector(__s1_730, __s1_730, 3, 2, 1, 0); \ - int16x8_t __ret_730; \ - __ret_730 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_730), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_730, __p2_730)))); \ - __ret_730 = __builtin_shufflevector(__ret_730, __ret_730, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_730; \ +#define vshrn_high_n_s32(__p0_822, __p1_822, __p2_822) __extension__ ({ \ + int16x4_t __s0_822 = __p0_822; \ + int32x4_t __s1_822 = __p1_822; \ + int16x4_t __rev0_822; __rev0_822 = __builtin_shufflevector(__s0_822, __s0_822, 3, 2, 1, 0); \ + int32x4_t __rev1_822; __rev1_822 = __builtin_shufflevector(__s1_822, __s1_822, 3, 2, 1, 0); \ + int16x8_t __ret_822; \ + __ret_822 = (int16x8_t)(__noswap_vcombine_s16((int16x4_t)(__rev0_822), (int16x4_t)(__noswap_vshrn_n_s32(__rev1_822, __p2_822)))); \ + __ret_822 = __builtin_shufflevector(__ret_822, __ret_822, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_822; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s64(__p0_731, __p1_731, __p2_731) __extension__ ({ \ - int32x2_t __s0_731 = __p0_731; \ - int64x2_t __s1_731 = __p1_731; \ - int32x4_t __ret_731; \ - __ret_731 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_731), (int32x2_t)(vshrn_n_s64(__s1_731, __p2_731)))); \ - __ret_731; \ +#define vshrn_high_n_s64(__p0_823, __p1_823, __p2_823) __extension__ ({ \ + int32x2_t __s0_823 = __p0_823; \ + int64x2_t __s1_823 = __p1_823; \ + int32x4_t __ret_823; \ + __ret_823 = (int32x4_t)(vcombine_s32((int32x2_t)(__s0_823), (int32x2_t)(vshrn_n_s64(__s1_823, __p2_823)))); \ + __ret_823; \ }) #else -#define vshrn_high_n_s64(__p0_732, __p1_732, __p2_732) __extension__ ({ \ - int32x2_t __s0_732 = __p0_732; \ - int64x2_t __s1_732 = __p1_732; \ - int32x2_t __rev0_732; __rev0_732 = __builtin_shufflevector(__s0_732, __s0_732, 1, 0); \ - int64x2_t __rev1_732; __rev1_732 = __builtin_shufflevector(__s1_732, __s1_732, 1, 0); \ - int32x4_t __ret_732; \ - __ret_732 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_732), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_732, __p2_732)))); \ - __ret_732 = __builtin_shufflevector(__ret_732, __ret_732, 3, 2, 1, 0); \ - __ret_732; \ +#define vshrn_high_n_s64(__p0_824, __p1_824, __p2_824) __extension__ ({ \ + int32x2_t __s0_824 = __p0_824; \ + int64x2_t __s1_824 = __p1_824; \ + int32x2_t __rev0_824; __rev0_824 = __builtin_shufflevector(__s0_824, __s0_824, 1, 0); \ + int64x2_t __rev1_824; __rev1_824 = __builtin_shufflevector(__s1_824, __s1_824, 1, 0); \ + int32x4_t __ret_824; \ + __ret_824 = (int32x4_t)(__noswap_vcombine_s32((int32x2_t)(__rev0_824), (int32x2_t)(__noswap_vshrn_n_s64(__rev1_824, __p2_824)))); \ + __ret_824 = __builtin_shufflevector(__ret_824, __ret_824, 3, 2, 1, 0); \ + __ret_824; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vshrn_high_n_s16(__p0_733, __p1_733, __p2_733) __extension__ ({ \ - int8x8_t __s0_733 = __p0_733; \ - int16x8_t __s1_733 = __p1_733; \ - int8x16_t __ret_733; \ - __ret_733 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_733), (int8x8_t)(vshrn_n_s16(__s1_733, __p2_733)))); \ - __ret_733; \ +#define vshrn_high_n_s16(__p0_825, __p1_825, __p2_825) __extension__ ({ \ + int8x8_t __s0_825 = __p0_825; \ + int16x8_t __s1_825 = __p1_825; \ + int8x16_t __ret_825; \ + __ret_825 = (int8x16_t)(vcombine_s8((int8x8_t)(__s0_825), (int8x8_t)(vshrn_n_s16(__s1_825, __p2_825)))); \ + __ret_825; \ }) #else -#define vshrn_high_n_s16(__p0_734, __p1_734, __p2_734) __extension__ ({ \ - int8x8_t __s0_734 = __p0_734; \ - int16x8_t __s1_734 = __p1_734; \ - int8x8_t __rev0_734; __rev0_734 = __builtin_shufflevector(__s0_734, __s0_734, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16x8_t __rev1_734; __rev1_734 = __builtin_shufflevector(__s1_734, __s1_734, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __ret_734; \ - __ret_734 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_734), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_734, __p2_734)))); \ - __ret_734 = __builtin_shufflevector(__ret_734, __ret_734, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_734; \ +#define vshrn_high_n_s16(__p0_826, __p1_826, __p2_826) __extension__ ({ \ + int8x8_t __s0_826 = __p0_826; \ + int16x8_t __s1_826 = __p1_826; \ + int8x8_t __rev0_826; __rev0_826 = __builtin_shufflevector(__s0_826, __s0_826, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16x8_t __rev1_826; __rev1_826 = __builtin_shufflevector(__s1_826, __s1_826, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __ret_826; \ + __ret_826 = (int8x16_t)(__noswap_vcombine_s8((int8x8_t)(__rev0_826), (int8x8_t)(__noswap_vshrn_n_s16(__rev1_826, __p2_826)))); \ + __ret_826 = __builtin_shufflevector(__ret_826, __ret_826, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_826; \ }) #endif @@ -61553,54 +63223,54 @@ __ai int32x4_t vsubw_high_s16(int32x4_t __p0, int16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vsudotq_laneq_s32(__p0_735, __p1_735, __p2_735, __p3_735) __extension__ ({ \ - int32x4_t __s0_735 = __p0_735; \ - int8x16_t __s1_735 = __p1_735; \ - uint8x16_t __s2_735 = __p2_735; \ - int32x4_t __ret_735; \ -uint8x16_t __reint_735 = __s2_735; \ - __ret_735 = vusdotq_s32(__s0_735, (uint8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_735, __p3_735)), __s1_735); \ - __ret_735; \ +#define vsudotq_laneq_s32(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ + int32x4_t __s0_827 = __p0_827; \ + int8x16_t __s1_827 = __p1_827; \ + uint8x16_t __s2_827 = __p2_827; \ + int32x4_t __ret_827; \ +uint8x16_t __reint_827 = __s2_827; \ + __ret_827 = vusdotq_s32(__s0_827, (uint8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_827, __p3_827)), __s1_827); \ + __ret_827; \ }) #else -#define vsudotq_laneq_s32(__p0_736, __p1_736, __p2_736, __p3_736) __extension__ ({ \ - int32x4_t __s0_736 = __p0_736; \ - int8x16_t __s1_736 = __p1_736; \ - uint8x16_t __s2_736 = __p2_736; \ - int32x4_t __rev0_736; __rev0_736 = __builtin_shufflevector(__s0_736, __s0_736, 3, 2, 1, 0); \ - int8x16_t __rev1_736; __rev1_736 = __builtin_shufflevector(__s1_736, __s1_736, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_736; __rev2_736 = __builtin_shufflevector(__s2_736, __s2_736, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_736; \ -uint8x16_t __reint_736 = __rev2_736; \ - __ret_736 = __noswap_vusdotq_s32(__rev0_736, (uint8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_736, __p3_736)), __rev1_736); \ - __ret_736 = __builtin_shufflevector(__ret_736, __ret_736, 3, 2, 1, 0); \ - __ret_736; \ +#define vsudotq_laneq_s32(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ + int32x4_t __s0_828 = __p0_828; \ + int8x16_t __s1_828 = __p1_828; \ + uint8x16_t __s2_828 = __p2_828; \ + int32x4_t __rev0_828; __rev0_828 = __builtin_shufflevector(__s0_828, __s0_828, 3, 2, 1, 0); \ + int8x16_t __rev1_828; __rev1_828 = __builtin_shufflevector(__s1_828, __s1_828, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_828; __rev2_828 = __builtin_shufflevector(__s2_828, __s2_828, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_828; \ +uint8x16_t __reint_828 = __rev2_828; \ + __ret_828 = __noswap_vusdotq_s32(__rev0_828, (uint8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_828, __p3_828)), __rev1_828); \ + __ret_828 = __builtin_shufflevector(__ret_828, __ret_828, 3, 2, 1, 0); \ + __ret_828; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsudot_laneq_s32(__p0_737, __p1_737, __p2_737, __p3_737) __extension__ ({ \ - int32x2_t __s0_737 = __p0_737; \ - int8x8_t __s1_737 = __p1_737; \ - uint8x16_t __s2_737 = __p2_737; \ - int32x2_t __ret_737; \ -uint8x16_t __reint_737 = __s2_737; \ - __ret_737 = vusdot_s32(__s0_737, (uint8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_737, __p3_737)), __s1_737); \ - __ret_737; \ +#define vsudot_laneq_s32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ + int32x2_t __s0_829 = __p0_829; \ + int8x8_t __s1_829 = __p1_829; \ + uint8x16_t __s2_829 = __p2_829; \ + int32x2_t __ret_829; \ +uint8x16_t __reint_829 = __s2_829; \ + __ret_829 = vusdot_s32(__s0_829, (uint8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_829, __p3_829)), __s1_829); \ + __ret_829; \ }) #else -#define vsudot_laneq_s32(__p0_738, __p1_738, __p2_738, __p3_738) __extension__ ({ \ - int32x2_t __s0_738 = __p0_738; \ - int8x8_t __s1_738 = __p1_738; \ - uint8x16_t __s2_738 = __p2_738; \ - int32x2_t __rev0_738; __rev0_738 = __builtin_shufflevector(__s0_738, __s0_738, 1, 0); \ - int8x8_t __rev1_738; __rev1_738 = __builtin_shufflevector(__s1_738, __s1_738, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x16_t __rev2_738; __rev2_738 = __builtin_shufflevector(__s2_738, __s2_738, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_738; \ -uint8x16_t __reint_738 = __rev2_738; \ - __ret_738 = __noswap_vusdot_s32(__rev0_738, (uint8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_738, __p3_738)), __rev1_738); \ - __ret_738 = __builtin_shufflevector(__ret_738, __ret_738, 1, 0); \ - __ret_738; \ +#define vsudot_laneq_s32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ + int32x2_t __s0_830 = __p0_830; \ + int8x8_t __s1_830 = __p1_830; \ + uint8x16_t __s2_830 = __p2_830; \ + int32x2_t __rev0_830; __rev0_830 = __builtin_shufflevector(__s0_830, __s0_830, 1, 0); \ + int8x8_t __rev1_830; __rev1_830 = __builtin_shufflevector(__s1_830, __s1_830, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x16_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_830; \ +uint8x16_t __reint_830 = __rev2_830; \ + __ret_830 = __noswap_vusdot_s32(__rev0_830, (uint8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_830, __p3_830)), __rev1_830); \ + __ret_830 = __builtin_shufflevector(__ret_830, __ret_830, 1, 0); \ + __ret_830; \ }) #endif @@ -62423,9 +64093,9 @@ __ai uint64_t vtstd_u64(uint64_t __p0, uint64_t __p1) { __ret = (uint64_t) __builtin_neon_vtstd_u64(__p0, __p1); return __ret; } -__ai int64_t vtstd_s64(int64_t __p0, int64_t __p1) { - int64_t __ret; - __ret = (int64_t) __builtin_neon_vtstd_s64(__p0, __p1); +__ai uint64_t vtstd_s64(int64_t __p0, int64_t __p1) { + uint64_t __ret; + __ret = (uint64_t) __builtin_neon_vtstd_s64(__p0, __p1); return __ret; } __ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { @@ -62573,54 +64243,54 @@ __ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vusdotq_laneq_s32(__p0_739, __p1_739, __p2_739, __p3_739) __extension__ ({ \ - int32x4_t __s0_739 = __p0_739; \ - uint8x16_t __s1_739 = __p1_739; \ - int8x16_t __s2_739 = __p2_739; \ - int32x4_t __ret_739; \ -int8x16_t __reint_739 = __s2_739; \ - __ret_739 = vusdotq_s32(__s0_739, __s1_739, (int8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_739, __p3_739))); \ - __ret_739; \ +#define vusdotq_laneq_s32(__p0_831, __p1_831, __p2_831, __p3_831) __extension__ ({ \ + int32x4_t __s0_831 = __p0_831; \ + uint8x16_t __s1_831 = __p1_831; \ + int8x16_t __s2_831 = __p2_831; \ + int32x4_t __ret_831; \ +int8x16_t __reint_831 = __s2_831; \ + __ret_831 = vusdotq_s32(__s0_831, __s1_831, (int8x16_t)(splatq_laneq_s32(*(int32x4_t *) &__reint_831, __p3_831))); \ + __ret_831; \ }) #else -#define vusdotq_laneq_s32(__p0_740, __p1_740, __p2_740, __p3_740) __extension__ ({ \ - int32x4_t __s0_740 = __p0_740; \ - uint8x16_t __s1_740 = __p1_740; \ - int8x16_t __s2_740 = __p2_740; \ - int32x4_t __rev0_740; __rev0_740 = __builtin_shufflevector(__s0_740, __s0_740, 3, 2, 1, 0); \ - uint8x16_t __rev1_740; __rev1_740 = __builtin_shufflevector(__s1_740, __s1_740, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_740; __rev2_740 = __builtin_shufflevector(__s2_740, __s2_740, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_740; \ -int8x16_t __reint_740 = __rev2_740; \ - __ret_740 = __noswap_vusdotq_s32(__rev0_740, __rev1_740, (int8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_740, __p3_740))); \ - __ret_740 = __builtin_shufflevector(__ret_740, __ret_740, 3, 2, 1, 0); \ - __ret_740; \ +#define vusdotq_laneq_s32(__p0_832, __p1_832, __p2_832, __p3_832) __extension__ ({ \ + int32x4_t __s0_832 = __p0_832; \ + uint8x16_t __s1_832 = __p1_832; \ + int8x16_t __s2_832 = __p2_832; \ + int32x4_t __rev0_832; __rev0_832 = __builtin_shufflevector(__s0_832, __s0_832, 3, 2, 1, 0); \ + uint8x16_t __rev1_832; __rev1_832 = __builtin_shufflevector(__s1_832, __s1_832, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_832; __rev2_832 = __builtin_shufflevector(__s2_832, __s2_832, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_832; \ +int8x16_t __reint_832 = __rev2_832; \ + __ret_832 = __noswap_vusdotq_s32(__rev0_832, __rev1_832, (int8x16_t)(__noswap_splatq_laneq_s32(*(int32x4_t *) &__reint_832, __p3_832))); \ + __ret_832 = __builtin_shufflevector(__ret_832, __ret_832, 3, 2, 1, 0); \ + __ret_832; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vusdot_laneq_s32(__p0_741, __p1_741, __p2_741, __p3_741) __extension__ ({ \ - int32x2_t __s0_741 = __p0_741; \ - uint8x8_t __s1_741 = __p1_741; \ - int8x16_t __s2_741 = __p2_741; \ - int32x2_t __ret_741; \ -int8x16_t __reint_741 = __s2_741; \ - __ret_741 = vusdot_s32(__s0_741, __s1_741, (int8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_741, __p3_741))); \ - __ret_741; \ +#define vusdot_laneq_s32(__p0_833, __p1_833, __p2_833, __p3_833) __extension__ ({ \ + int32x2_t __s0_833 = __p0_833; \ + uint8x8_t __s1_833 = __p1_833; \ + int8x16_t __s2_833 = __p2_833; \ + int32x2_t __ret_833; \ +int8x16_t __reint_833 = __s2_833; \ + __ret_833 = vusdot_s32(__s0_833, __s1_833, (int8x8_t)(splat_laneq_s32(*(int32x4_t *) &__reint_833, __p3_833))); \ + __ret_833; \ }) #else -#define vusdot_laneq_s32(__p0_742, __p1_742, __p2_742, __p3_742) __extension__ ({ \ - int32x2_t __s0_742 = __p0_742; \ - uint8x8_t __s1_742 = __p1_742; \ - int8x16_t __s2_742 = __p2_742; \ - int32x2_t __rev0_742; __rev0_742 = __builtin_shufflevector(__s0_742, __s0_742, 1, 0); \ - uint8x8_t __rev1_742; __rev1_742 = __builtin_shufflevector(__s1_742, __s1_742, 7, 6, 5, 4, 3, 2, 1, 0); \ - int8x16_t __rev2_742; __rev2_742 = __builtin_shufflevector(__s2_742, __s2_742, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_742; \ -int8x16_t __reint_742 = __rev2_742; \ - __ret_742 = __noswap_vusdot_s32(__rev0_742, __rev1_742, (int8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_742, __p3_742))); \ - __ret_742 = __builtin_shufflevector(__ret_742, __ret_742, 1, 0); \ - __ret_742; \ +#define vusdot_laneq_s32(__p0_834, __p1_834, __p2_834, __p3_834) __extension__ ({ \ + int32x2_t __s0_834 = __p0_834; \ + uint8x8_t __s1_834 = __p1_834; \ + int8x16_t __s2_834 = __p2_834; \ + int32x2_t __rev0_834; __rev0_834 = __builtin_shufflevector(__s0_834, __s0_834, 1, 0); \ + uint8x8_t __rev1_834; __rev1_834 = __builtin_shufflevector(__s1_834, __s1_834, 7, 6, 5, 4, 3, 2, 1, 0); \ + int8x16_t __rev2_834; __rev2_834 = __builtin_shufflevector(__s2_834, __s2_834, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_834; \ +int8x16_t __reint_834 = __rev2_834; \ + __ret_834 = __noswap_vusdot_s32(__rev0_834, __rev1_834, (int8x8_t)(__noswap_splat_laneq_s32(*(int32x4_t *) &__reint_834, __p3_834))); \ + __ret_834 = __builtin_shufflevector(__ret_834, __ret_834, 1, 0); \ + __ret_834; \ }) #endif @@ -64674,60 +66344,60 @@ __ai int32x4_t vaddw_s16(int32x4_t __p0, int16x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vget_lane_f16(__p0_743, __p1_743) __extension__ ({ \ - float16x4_t __s0_743 = __p0_743; \ - float16_t __ret_743; \ -float16x4_t __reint_743 = __s0_743; \ -int16_t __reint1_743 = vget_lane_s16(*(int16x4_t *) &__reint_743, __p1_743); \ - __ret_743 = *(float16_t *) &__reint1_743; \ - __ret_743; \ +#define vget_lane_f16(__p0_835, __p1_835) __extension__ ({ \ + float16x4_t __s0_835 = __p0_835; \ + float16_t __ret_835; \ +float16x4_t __reint_835 = __s0_835; \ +int16_t __reint1_835 = vget_lane_s16(*(int16x4_t *) &__reint_835, __p1_835); \ + __ret_835 = *(float16_t *) &__reint1_835; \ + __ret_835; \ }) #else -#define vget_lane_f16(__p0_744, __p1_744) __extension__ ({ \ - float16x4_t __s0_744 = __p0_744; \ - float16x4_t __rev0_744; __rev0_744 = __builtin_shufflevector(__s0_744, __s0_744, 3, 2, 1, 0); \ - float16_t __ret_744; \ -float16x4_t __reint_744 = __rev0_744; \ -int16_t __reint1_744 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_744, __p1_744); \ - __ret_744 = *(float16_t *) &__reint1_744; \ - __ret_744; \ +#define vget_lane_f16(__p0_836, __p1_836) __extension__ ({ \ + float16x4_t __s0_836 = __p0_836; \ + float16x4_t __rev0_836; __rev0_836 = __builtin_shufflevector(__s0_836, __s0_836, 3, 2, 1, 0); \ + float16_t __ret_836; \ +float16x4_t __reint_836 = __rev0_836; \ +int16_t __reint1_836 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_836, __p1_836); \ + __ret_836 = *(float16_t *) &__reint1_836; \ + __ret_836; \ }) -#define __noswap_vget_lane_f16(__p0_745, __p1_745) __extension__ ({ \ - float16x4_t __s0_745 = __p0_745; \ - float16_t __ret_745; \ -float16x4_t __reint_745 = __s0_745; \ -int16_t __reint1_745 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_745, __p1_745); \ - __ret_745 = *(float16_t *) &__reint1_745; \ - __ret_745; \ +#define __noswap_vget_lane_f16(__p0_837, __p1_837) __extension__ ({ \ + float16x4_t __s0_837 = __p0_837; \ + float16_t __ret_837; \ +float16x4_t __reint_837 = __s0_837; \ +int16_t __reint1_837 = __noswap_vget_lane_s16(*(int16x4_t *) &__reint_837, __p1_837); \ + __ret_837 = *(float16_t *) &__reint1_837; \ + __ret_837; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vgetq_lane_f16(__p0_746, __p1_746) __extension__ ({ \ - float16x8_t __s0_746 = __p0_746; \ - float16_t __ret_746; \ -float16x8_t __reint_746 = __s0_746; \ -int16_t __reint1_746 = vgetq_lane_s16(*(int16x8_t *) &__reint_746, __p1_746); \ - __ret_746 = *(float16_t *) &__reint1_746; \ - __ret_746; \ +#define vgetq_lane_f16(__p0_838, __p1_838) __extension__ ({ \ + float16x8_t __s0_838 = __p0_838; \ + float16_t __ret_838; \ +float16x8_t __reint_838 = __s0_838; \ +int16_t __reint1_838 = vgetq_lane_s16(*(int16x8_t *) &__reint_838, __p1_838); \ + __ret_838 = *(float16_t *) &__reint1_838; \ + __ret_838; \ }) #else -#define vgetq_lane_f16(__p0_747, __p1_747) __extension__ ({ \ - float16x8_t __s0_747 = __p0_747; \ - float16x8_t __rev0_747; __rev0_747 = __builtin_shufflevector(__s0_747, __s0_747, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_747; \ -float16x8_t __reint_747 = __rev0_747; \ -int16_t __reint1_747 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_747, __p1_747); \ - __ret_747 = *(float16_t *) &__reint1_747; \ - __ret_747; \ +#define vgetq_lane_f16(__p0_839, __p1_839) __extension__ ({ \ + float16x8_t __s0_839 = __p0_839; \ + float16x8_t __rev0_839; __rev0_839 = __builtin_shufflevector(__s0_839, __s0_839, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_839; \ +float16x8_t __reint_839 = __rev0_839; \ +int16_t __reint1_839 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_839, __p1_839); \ + __ret_839 = *(float16_t *) &__reint1_839; \ + __ret_839; \ }) -#define __noswap_vgetq_lane_f16(__p0_748, __p1_748) __extension__ ({ \ - float16x8_t __s0_748 = __p0_748; \ - float16_t __ret_748; \ -float16x8_t __reint_748 = __s0_748; \ -int16_t __reint1_748 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_748, __p1_748); \ - __ret_748 = *(float16_t *) &__reint1_748; \ - __ret_748; \ +#define __noswap_vgetq_lane_f16(__p0_840, __p1_840) __extension__ ({ \ + float16x8_t __s0_840 = __p0_840; \ + float16_t __ret_840; \ +float16x8_t __reint_840 = __s0_840; \ +int16_t __reint1_840 = __noswap_vgetq_lane_s16(*(int16x8_t *) &__reint_840, __p1_840); \ + __ret_840 = *(float16_t *) &__reint1_840; \ + __ret_840; \ }) #endif @@ -64870,98 +66540,98 @@ __ai int32x4_t __noswap_vmlal_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u32(__p0_749, __p1_749, __p2_749, __p3_749) __extension__ ({ \ - uint64x2_t __s0_749 = __p0_749; \ - uint32x2_t __s1_749 = __p1_749; \ - uint32x2_t __s2_749 = __p2_749; \ - uint64x2_t __ret_749; \ - __ret_749 = __s0_749 + vmull_u32(__s1_749, splat_lane_u32(__s2_749, __p3_749)); \ - __ret_749; \ +#define vmlal_lane_u32(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ + uint64x2_t __s0_841 = __p0_841; \ + uint32x2_t __s1_841 = __p1_841; \ + uint32x2_t __s2_841 = __p2_841; \ + uint64x2_t __ret_841; \ + __ret_841 = __s0_841 + vmull_u32(__s1_841, splat_lane_u32(__s2_841, __p3_841)); \ + __ret_841; \ }) #else -#define vmlal_lane_u32(__p0_750, __p1_750, __p2_750, __p3_750) __extension__ ({ \ - uint64x2_t __s0_750 = __p0_750; \ - uint32x2_t __s1_750 = __p1_750; \ - uint32x2_t __s2_750 = __p2_750; \ - uint64x2_t __rev0_750; __rev0_750 = __builtin_shufflevector(__s0_750, __s0_750, 1, 0); \ - uint32x2_t __rev1_750; __rev1_750 = __builtin_shufflevector(__s1_750, __s1_750, 1, 0); \ - uint32x2_t __rev2_750; __rev2_750 = __builtin_shufflevector(__s2_750, __s2_750, 1, 0); \ - uint64x2_t __ret_750; \ - __ret_750 = __rev0_750 + __noswap_vmull_u32(__rev1_750, __noswap_splat_lane_u32(__rev2_750, __p3_750)); \ - __ret_750 = __builtin_shufflevector(__ret_750, __ret_750, 1, 0); \ - __ret_750; \ +#define vmlal_lane_u32(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ + uint64x2_t __s0_842 = __p0_842; \ + uint32x2_t __s1_842 = __p1_842; \ + uint32x2_t __s2_842 = __p2_842; \ + uint64x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 1, 0); \ + uint32x2_t __rev1_842; __rev1_842 = __builtin_shufflevector(__s1_842, __s1_842, 1, 0); \ + uint32x2_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, 1, 0); \ + uint64x2_t __ret_842; \ + __ret_842 = __rev0_842 + __noswap_vmull_u32(__rev1_842, __noswap_splat_lane_u32(__rev2_842, __p3_842)); \ + __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 1, 0); \ + __ret_842; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_u16(__p0_751, __p1_751, __p2_751, __p3_751) __extension__ ({ \ - uint32x4_t __s0_751 = __p0_751; \ - uint16x4_t __s1_751 = __p1_751; \ - uint16x4_t __s2_751 = __p2_751; \ - uint32x4_t __ret_751; \ - __ret_751 = __s0_751 + vmull_u16(__s1_751, splat_lane_u16(__s2_751, __p3_751)); \ - __ret_751; \ +#define vmlal_lane_u16(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ + uint32x4_t __s0_843 = __p0_843; \ + uint16x4_t __s1_843 = __p1_843; \ + uint16x4_t __s2_843 = __p2_843; \ + uint32x4_t __ret_843; \ + __ret_843 = __s0_843 + vmull_u16(__s1_843, splat_lane_u16(__s2_843, __p3_843)); \ + __ret_843; \ }) #else -#define vmlal_lane_u16(__p0_752, __p1_752, __p2_752, __p3_752) __extension__ ({ \ - uint32x4_t __s0_752 = __p0_752; \ - uint16x4_t __s1_752 = __p1_752; \ - uint16x4_t __s2_752 = __p2_752; \ - uint32x4_t __rev0_752; __rev0_752 = __builtin_shufflevector(__s0_752, __s0_752, 3, 2, 1, 0); \ - uint16x4_t __rev1_752; __rev1_752 = __builtin_shufflevector(__s1_752, __s1_752, 3, 2, 1, 0); \ - uint16x4_t __rev2_752; __rev2_752 = __builtin_shufflevector(__s2_752, __s2_752, 3, 2, 1, 0); \ - uint32x4_t __ret_752; \ - __ret_752 = __rev0_752 + __noswap_vmull_u16(__rev1_752, __noswap_splat_lane_u16(__rev2_752, __p3_752)); \ - __ret_752 = __builtin_shufflevector(__ret_752, __ret_752, 3, 2, 1, 0); \ - __ret_752; \ +#define vmlal_lane_u16(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ + uint32x4_t __s0_844 = __p0_844; \ + uint16x4_t __s1_844 = __p1_844; \ + uint16x4_t __s2_844 = __p2_844; \ + uint32x4_t __rev0_844; __rev0_844 = __builtin_shufflevector(__s0_844, __s0_844, 3, 2, 1, 0); \ + uint16x4_t __rev1_844; __rev1_844 = __builtin_shufflevector(__s1_844, __s1_844, 3, 2, 1, 0); \ + uint16x4_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 3, 2, 1, 0); \ + uint32x4_t __ret_844; \ + __ret_844 = __rev0_844 + __noswap_vmull_u16(__rev1_844, __noswap_splat_lane_u16(__rev2_844, __p3_844)); \ + __ret_844 = __builtin_shufflevector(__ret_844, __ret_844, 3, 2, 1, 0); \ + __ret_844; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s32(__p0_753, __p1_753, __p2_753, __p3_753) __extension__ ({ \ - int64x2_t __s0_753 = __p0_753; \ - int32x2_t __s1_753 = __p1_753; \ - int32x2_t __s2_753 = __p2_753; \ - int64x2_t __ret_753; \ - __ret_753 = __s0_753 + vmull_s32(__s1_753, splat_lane_s32(__s2_753, __p3_753)); \ - __ret_753; \ +#define vmlal_lane_s32(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ + int64x2_t __s0_845 = __p0_845; \ + int32x2_t __s1_845 = __p1_845; \ + int32x2_t __s2_845 = __p2_845; \ + int64x2_t __ret_845; \ + __ret_845 = __s0_845 + vmull_s32(__s1_845, splat_lane_s32(__s2_845, __p3_845)); \ + __ret_845; \ }) #else -#define vmlal_lane_s32(__p0_754, __p1_754, __p2_754, __p3_754) __extension__ ({ \ - int64x2_t __s0_754 = __p0_754; \ - int32x2_t __s1_754 = __p1_754; \ - int32x2_t __s2_754 = __p2_754; \ - int64x2_t __rev0_754; __rev0_754 = __builtin_shufflevector(__s0_754, __s0_754, 1, 0); \ - int32x2_t __rev1_754; __rev1_754 = __builtin_shufflevector(__s1_754, __s1_754, 1, 0); \ - int32x2_t __rev2_754; __rev2_754 = __builtin_shufflevector(__s2_754, __s2_754, 1, 0); \ - int64x2_t __ret_754; \ - __ret_754 = __rev0_754 + __noswap_vmull_s32(__rev1_754, __noswap_splat_lane_s32(__rev2_754, __p3_754)); \ - __ret_754 = __builtin_shufflevector(__ret_754, __ret_754, 1, 0); \ - __ret_754; \ +#define vmlal_lane_s32(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ + int64x2_t __s0_846 = __p0_846; \ + int32x2_t __s1_846 = __p1_846; \ + int32x2_t __s2_846 = __p2_846; \ + int64x2_t __rev0_846; __rev0_846 = __builtin_shufflevector(__s0_846, __s0_846, 1, 0); \ + int32x2_t __rev1_846; __rev1_846 = __builtin_shufflevector(__s1_846, __s1_846, 1, 0); \ + int32x2_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 1, 0); \ + int64x2_t __ret_846; \ + __ret_846 = __rev0_846 + __noswap_vmull_s32(__rev1_846, __noswap_splat_lane_s32(__rev2_846, __p3_846)); \ + __ret_846 = __builtin_shufflevector(__ret_846, __ret_846, 1, 0); \ + __ret_846; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlal_lane_s16(__p0_755, __p1_755, __p2_755, __p3_755) __extension__ ({ \ - int32x4_t __s0_755 = __p0_755; \ - int16x4_t __s1_755 = __p1_755; \ - int16x4_t __s2_755 = __p2_755; \ - int32x4_t __ret_755; \ - __ret_755 = __s0_755 + vmull_s16(__s1_755, splat_lane_s16(__s2_755, __p3_755)); \ - __ret_755; \ +#define vmlal_lane_s16(__p0_847, __p1_847, __p2_847, __p3_847) __extension__ ({ \ + int32x4_t __s0_847 = __p0_847; \ + int16x4_t __s1_847 = __p1_847; \ + int16x4_t __s2_847 = __p2_847; \ + int32x4_t __ret_847; \ + __ret_847 = __s0_847 + vmull_s16(__s1_847, splat_lane_s16(__s2_847, __p3_847)); \ + __ret_847; \ }) #else -#define vmlal_lane_s16(__p0_756, __p1_756, __p2_756, __p3_756) __extension__ ({ \ - int32x4_t __s0_756 = __p0_756; \ - int16x4_t __s1_756 = __p1_756; \ - int16x4_t __s2_756 = __p2_756; \ - int32x4_t __rev0_756; __rev0_756 = __builtin_shufflevector(__s0_756, __s0_756, 3, 2, 1, 0); \ - int16x4_t __rev1_756; __rev1_756 = __builtin_shufflevector(__s1_756, __s1_756, 3, 2, 1, 0); \ - int16x4_t __rev2_756; __rev2_756 = __builtin_shufflevector(__s2_756, __s2_756, 3, 2, 1, 0); \ - int32x4_t __ret_756; \ - __ret_756 = __rev0_756 + __noswap_vmull_s16(__rev1_756, __noswap_splat_lane_s16(__rev2_756, __p3_756)); \ - __ret_756 = __builtin_shufflevector(__ret_756, __ret_756, 3, 2, 1, 0); \ - __ret_756; \ +#define vmlal_lane_s16(__p0_848, __p1_848, __p2_848, __p3_848) __extension__ ({ \ + int32x4_t __s0_848 = __p0_848; \ + int16x4_t __s1_848 = __p1_848; \ + int16x4_t __s2_848 = __p2_848; \ + int32x4_t __rev0_848; __rev0_848 = __builtin_shufflevector(__s0_848, __s0_848, 3, 2, 1, 0); \ + int16x4_t __rev1_848; __rev1_848 = __builtin_shufflevector(__s1_848, __s1_848, 3, 2, 1, 0); \ + int16x4_t __rev2_848; __rev2_848 = __builtin_shufflevector(__s2_848, __s2_848, 3, 2, 1, 0); \ + int32x4_t __ret_848; \ + __ret_848 = __rev0_848 + __noswap_vmull_s16(__rev1_848, __noswap_splat_lane_s16(__rev2_848, __p3_848)); \ + __ret_848 = __builtin_shufflevector(__ret_848, __ret_848, 3, 2, 1, 0); \ + __ret_848; \ }) #endif @@ -65192,98 +66862,98 @@ __ai int32x4_t __noswap_vmlsl_s16(int32x4_t __p0, int16x4_t __p1, int16x4_t __p2 #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u32(__p0_757, __p1_757, __p2_757, __p3_757) __extension__ ({ \ - uint64x2_t __s0_757 = __p0_757; \ - uint32x2_t __s1_757 = __p1_757; \ - uint32x2_t __s2_757 = __p2_757; \ - uint64x2_t __ret_757; \ - __ret_757 = __s0_757 - vmull_u32(__s1_757, splat_lane_u32(__s2_757, __p3_757)); \ - __ret_757; \ +#define vmlsl_lane_u32(__p0_849, __p1_849, __p2_849, __p3_849) __extension__ ({ \ + uint64x2_t __s0_849 = __p0_849; \ + uint32x2_t __s1_849 = __p1_849; \ + uint32x2_t __s2_849 = __p2_849; \ + uint64x2_t __ret_849; \ + __ret_849 = __s0_849 - vmull_u32(__s1_849, splat_lane_u32(__s2_849, __p3_849)); \ + __ret_849; \ }) #else -#define vmlsl_lane_u32(__p0_758, __p1_758, __p2_758, __p3_758) __extension__ ({ \ - uint64x2_t __s0_758 = __p0_758; \ - uint32x2_t __s1_758 = __p1_758; \ - uint32x2_t __s2_758 = __p2_758; \ - uint64x2_t __rev0_758; __rev0_758 = __builtin_shufflevector(__s0_758, __s0_758, 1, 0); \ - uint32x2_t __rev1_758; __rev1_758 = __builtin_shufflevector(__s1_758, __s1_758, 1, 0); \ - uint32x2_t __rev2_758; __rev2_758 = __builtin_shufflevector(__s2_758, __s2_758, 1, 0); \ - uint64x2_t __ret_758; \ - __ret_758 = __rev0_758 - __noswap_vmull_u32(__rev1_758, __noswap_splat_lane_u32(__rev2_758, __p3_758)); \ - __ret_758 = __builtin_shufflevector(__ret_758, __ret_758, 1, 0); \ - __ret_758; \ +#define vmlsl_lane_u32(__p0_850, __p1_850, __p2_850, __p3_850) __extension__ ({ \ + uint64x2_t __s0_850 = __p0_850; \ + uint32x2_t __s1_850 = __p1_850; \ + uint32x2_t __s2_850 = __p2_850; \ + uint64x2_t __rev0_850; __rev0_850 = __builtin_shufflevector(__s0_850, __s0_850, 1, 0); \ + uint32x2_t __rev1_850; __rev1_850 = __builtin_shufflevector(__s1_850, __s1_850, 1, 0); \ + uint32x2_t __rev2_850; __rev2_850 = __builtin_shufflevector(__s2_850, __s2_850, 1, 0); \ + uint64x2_t __ret_850; \ + __ret_850 = __rev0_850 - __noswap_vmull_u32(__rev1_850, __noswap_splat_lane_u32(__rev2_850, __p3_850)); \ + __ret_850 = __builtin_shufflevector(__ret_850, __ret_850, 1, 0); \ + __ret_850; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_u16(__p0_759, __p1_759, __p2_759, __p3_759) __extension__ ({ \ - uint32x4_t __s0_759 = __p0_759; \ - uint16x4_t __s1_759 = __p1_759; \ - uint16x4_t __s2_759 = __p2_759; \ - uint32x4_t __ret_759; \ - __ret_759 = __s0_759 - vmull_u16(__s1_759, splat_lane_u16(__s2_759, __p3_759)); \ - __ret_759; \ +#define vmlsl_lane_u16(__p0_851, __p1_851, __p2_851, __p3_851) __extension__ ({ \ + uint32x4_t __s0_851 = __p0_851; \ + uint16x4_t __s1_851 = __p1_851; \ + uint16x4_t __s2_851 = __p2_851; \ + uint32x4_t __ret_851; \ + __ret_851 = __s0_851 - vmull_u16(__s1_851, splat_lane_u16(__s2_851, __p3_851)); \ + __ret_851; \ }) #else -#define vmlsl_lane_u16(__p0_760, __p1_760, __p2_760, __p3_760) __extension__ ({ \ - uint32x4_t __s0_760 = __p0_760; \ - uint16x4_t __s1_760 = __p1_760; \ - uint16x4_t __s2_760 = __p2_760; \ - uint32x4_t __rev0_760; __rev0_760 = __builtin_shufflevector(__s0_760, __s0_760, 3, 2, 1, 0); \ - uint16x4_t __rev1_760; __rev1_760 = __builtin_shufflevector(__s1_760, __s1_760, 3, 2, 1, 0); \ - uint16x4_t __rev2_760; __rev2_760 = __builtin_shufflevector(__s2_760, __s2_760, 3, 2, 1, 0); \ - uint32x4_t __ret_760; \ - __ret_760 = __rev0_760 - __noswap_vmull_u16(__rev1_760, __noswap_splat_lane_u16(__rev2_760, __p3_760)); \ - __ret_760 = __builtin_shufflevector(__ret_760, __ret_760, 3, 2, 1, 0); \ - __ret_760; \ +#define vmlsl_lane_u16(__p0_852, __p1_852, __p2_852, __p3_852) __extension__ ({ \ + uint32x4_t __s0_852 = __p0_852; \ + uint16x4_t __s1_852 = __p1_852; \ + uint16x4_t __s2_852 = __p2_852; \ + uint32x4_t __rev0_852; __rev0_852 = __builtin_shufflevector(__s0_852, __s0_852, 3, 2, 1, 0); \ + uint16x4_t __rev1_852; __rev1_852 = __builtin_shufflevector(__s1_852, __s1_852, 3, 2, 1, 0); \ + uint16x4_t __rev2_852; __rev2_852 = __builtin_shufflevector(__s2_852, __s2_852, 3, 2, 1, 0); \ + uint32x4_t __ret_852; \ + __ret_852 = __rev0_852 - __noswap_vmull_u16(__rev1_852, __noswap_splat_lane_u16(__rev2_852, __p3_852)); \ + __ret_852 = __builtin_shufflevector(__ret_852, __ret_852, 3, 2, 1, 0); \ + __ret_852; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s32(__p0_761, __p1_761, __p2_761, __p3_761) __extension__ ({ \ - int64x2_t __s0_761 = __p0_761; \ - int32x2_t __s1_761 = __p1_761; \ - int32x2_t __s2_761 = __p2_761; \ - int64x2_t __ret_761; \ - __ret_761 = __s0_761 - vmull_s32(__s1_761, splat_lane_s32(__s2_761, __p3_761)); \ - __ret_761; \ +#define vmlsl_lane_s32(__p0_853, __p1_853, __p2_853, __p3_853) __extension__ ({ \ + int64x2_t __s0_853 = __p0_853; \ + int32x2_t __s1_853 = __p1_853; \ + int32x2_t __s2_853 = __p2_853; \ + int64x2_t __ret_853; \ + __ret_853 = __s0_853 - vmull_s32(__s1_853, splat_lane_s32(__s2_853, __p3_853)); \ + __ret_853; \ }) #else -#define vmlsl_lane_s32(__p0_762, __p1_762, __p2_762, __p3_762) __extension__ ({ \ - int64x2_t __s0_762 = __p0_762; \ - int32x2_t __s1_762 = __p1_762; \ - int32x2_t __s2_762 = __p2_762; \ - int64x2_t __rev0_762; __rev0_762 = __builtin_shufflevector(__s0_762, __s0_762, 1, 0); \ - int32x2_t __rev1_762; __rev1_762 = __builtin_shufflevector(__s1_762, __s1_762, 1, 0); \ - int32x2_t __rev2_762; __rev2_762 = __builtin_shufflevector(__s2_762, __s2_762, 1, 0); \ - int64x2_t __ret_762; \ - __ret_762 = __rev0_762 - __noswap_vmull_s32(__rev1_762, __noswap_splat_lane_s32(__rev2_762, __p3_762)); \ - __ret_762 = __builtin_shufflevector(__ret_762, __ret_762, 1, 0); \ - __ret_762; \ +#define vmlsl_lane_s32(__p0_854, __p1_854, __p2_854, __p3_854) __extension__ ({ \ + int64x2_t __s0_854 = __p0_854; \ + int32x2_t __s1_854 = __p1_854; \ + int32x2_t __s2_854 = __p2_854; \ + int64x2_t __rev0_854; __rev0_854 = __builtin_shufflevector(__s0_854, __s0_854, 1, 0); \ + int32x2_t __rev1_854; __rev1_854 = __builtin_shufflevector(__s1_854, __s1_854, 1, 0); \ + int32x2_t __rev2_854; __rev2_854 = __builtin_shufflevector(__s2_854, __s2_854, 1, 0); \ + int64x2_t __ret_854; \ + __ret_854 = __rev0_854 - __noswap_vmull_s32(__rev1_854, __noswap_splat_lane_s32(__rev2_854, __p3_854)); \ + __ret_854 = __builtin_shufflevector(__ret_854, __ret_854, 1, 0); \ + __ret_854; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmlsl_lane_s16(__p0_763, __p1_763, __p2_763, __p3_763) __extension__ ({ \ - int32x4_t __s0_763 = __p0_763; \ - int16x4_t __s1_763 = __p1_763; \ - int16x4_t __s2_763 = __p2_763; \ - int32x4_t __ret_763; \ - __ret_763 = __s0_763 - vmull_s16(__s1_763, splat_lane_s16(__s2_763, __p3_763)); \ - __ret_763; \ +#define vmlsl_lane_s16(__p0_855, __p1_855, __p2_855, __p3_855) __extension__ ({ \ + int32x4_t __s0_855 = __p0_855; \ + int16x4_t __s1_855 = __p1_855; \ + int16x4_t __s2_855 = __p2_855; \ + int32x4_t __ret_855; \ + __ret_855 = __s0_855 - vmull_s16(__s1_855, splat_lane_s16(__s2_855, __p3_855)); \ + __ret_855; \ }) #else -#define vmlsl_lane_s16(__p0_764, __p1_764, __p2_764, __p3_764) __extension__ ({ \ - int32x4_t __s0_764 = __p0_764; \ - int16x4_t __s1_764 = __p1_764; \ - int16x4_t __s2_764 = __p2_764; \ - int32x4_t __rev0_764; __rev0_764 = __builtin_shufflevector(__s0_764, __s0_764, 3, 2, 1, 0); \ - int16x4_t __rev1_764; __rev1_764 = __builtin_shufflevector(__s1_764, __s1_764, 3, 2, 1, 0); \ - int16x4_t __rev2_764; __rev2_764 = __builtin_shufflevector(__s2_764, __s2_764, 3, 2, 1, 0); \ - int32x4_t __ret_764; \ - __ret_764 = __rev0_764 - __noswap_vmull_s16(__rev1_764, __noswap_splat_lane_s16(__rev2_764, __p3_764)); \ - __ret_764 = __builtin_shufflevector(__ret_764, __ret_764, 3, 2, 1, 0); \ - __ret_764; \ +#define vmlsl_lane_s16(__p0_856, __p1_856, __p2_856, __p3_856) __extension__ ({ \ + int32x4_t __s0_856 = __p0_856; \ + int16x4_t __s1_856 = __p1_856; \ + int16x4_t __s2_856 = __p2_856; \ + int32x4_t __rev0_856; __rev0_856 = __builtin_shufflevector(__s0_856, __s0_856, 3, 2, 1, 0); \ + int16x4_t __rev1_856; __rev1_856 = __builtin_shufflevector(__s1_856, __s1_856, 3, 2, 1, 0); \ + int16x4_t __rev2_856; __rev2_856 = __builtin_shufflevector(__s2_856, __s2_856, 3, 2, 1, 0); \ + int32x4_t __ret_856; \ + __ret_856 = __rev0_856 - __noswap_vmull_s16(__rev1_856, __noswap_splat_lane_s16(__rev2_856, __p3_856)); \ + __ret_856 = __builtin_shufflevector(__ret_856, __ret_856, 3, 2, 1, 0); \ + __ret_856; \ }) #endif @@ -65376,151 +67046,151 @@ __ai int32x4_t __noswap_vmlsl_n_s16(int32x4_t __p0, int16x4_t __p1, int16_t __p2 #endif #ifdef __LITTLE_ENDIAN__ -#define vset_lane_f16(__p0_765, __p1_765, __p2_765) __extension__ ({ \ - float16_t __s0_765 = __p0_765; \ - float16x4_t __s1_765 = __p1_765; \ - float16x4_t __ret_765; \ -float16_t __reint_765 = __s0_765; \ -float16x4_t __reint1_765 = __s1_765; \ -int16x4_t __reint2_765 = vset_lane_s16(*(int16_t *) &__reint_765, *(int16x4_t *) &__reint1_765, __p2_765); \ - __ret_765 = *(float16x4_t *) &__reint2_765; \ - __ret_765; \ +#define vset_lane_f16(__p0_857, __p1_857, __p2_857) __extension__ ({ \ + float16_t __s0_857 = __p0_857; \ + float16x4_t __s1_857 = __p1_857; \ + float16x4_t __ret_857; \ +float16_t __reint_857 = __s0_857; \ +float16x4_t __reint1_857 = __s1_857; \ +int16x4_t __reint2_857 = vset_lane_s16(*(int16_t *) &__reint_857, *(int16x4_t *) &__reint1_857, __p2_857); \ + __ret_857 = *(float16x4_t *) &__reint2_857; \ + __ret_857; \ }) #else -#define vset_lane_f16(__p0_766, __p1_766, __p2_766) __extension__ ({ \ - float16_t __s0_766 = __p0_766; \ - float16x4_t __s1_766 = __p1_766; \ - float16x4_t __rev1_766; __rev1_766 = __builtin_shufflevector(__s1_766, __s1_766, 3, 2, 1, 0); \ - float16x4_t __ret_766; \ -float16_t __reint_766 = __s0_766; \ -float16x4_t __reint1_766 = __rev1_766; \ -int16x4_t __reint2_766 = __noswap_vset_lane_s16(*(int16_t *) &__reint_766, *(int16x4_t *) &__reint1_766, __p2_766); \ - __ret_766 = *(float16x4_t *) &__reint2_766; \ - __ret_766 = __builtin_shufflevector(__ret_766, __ret_766, 3, 2, 1, 0); \ - __ret_766; \ +#define vset_lane_f16(__p0_858, __p1_858, __p2_858) __extension__ ({ \ + float16_t __s0_858 = __p0_858; \ + float16x4_t __s1_858 = __p1_858; \ + float16x4_t __rev1_858; __rev1_858 = __builtin_shufflevector(__s1_858, __s1_858, 3, 2, 1, 0); \ + float16x4_t __ret_858; \ +float16_t __reint_858 = __s0_858; \ +float16x4_t __reint1_858 = __rev1_858; \ +int16x4_t __reint2_858 = __noswap_vset_lane_s16(*(int16_t *) &__reint_858, *(int16x4_t *) &__reint1_858, __p2_858); \ + __ret_858 = *(float16x4_t *) &__reint2_858; \ + __ret_858 = __builtin_shufflevector(__ret_858, __ret_858, 3, 2, 1, 0); \ + __ret_858; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsetq_lane_f16(__p0_767, __p1_767, __p2_767) __extension__ ({ \ - float16_t __s0_767 = __p0_767; \ - float16x8_t __s1_767 = __p1_767; \ - float16x8_t __ret_767; \ -float16_t __reint_767 = __s0_767; \ -float16x8_t __reint1_767 = __s1_767; \ -int16x8_t __reint2_767 = vsetq_lane_s16(*(int16_t *) &__reint_767, *(int16x8_t *) &__reint1_767, __p2_767); \ - __ret_767 = *(float16x8_t *) &__reint2_767; \ - __ret_767; \ +#define vsetq_lane_f16(__p0_859, __p1_859, __p2_859) __extension__ ({ \ + float16_t __s0_859 = __p0_859; \ + float16x8_t __s1_859 = __p1_859; \ + float16x8_t __ret_859; \ +float16_t __reint_859 = __s0_859; \ +float16x8_t __reint1_859 = __s1_859; \ +int16x8_t __reint2_859 = vsetq_lane_s16(*(int16_t *) &__reint_859, *(int16x8_t *) &__reint1_859, __p2_859); \ + __ret_859 = *(float16x8_t *) &__reint2_859; \ + __ret_859; \ }) #else -#define vsetq_lane_f16(__p0_768, __p1_768, __p2_768) __extension__ ({ \ - float16_t __s0_768 = __p0_768; \ - float16x8_t __s1_768 = __p1_768; \ - float16x8_t __rev1_768; __rev1_768 = __builtin_shufflevector(__s1_768, __s1_768, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __ret_768; \ -float16_t __reint_768 = __s0_768; \ -float16x8_t __reint1_768 = __rev1_768; \ -int16x8_t __reint2_768 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_768, *(int16x8_t *) &__reint1_768, __p2_768); \ - __ret_768 = *(float16x8_t *) &__reint2_768; \ - __ret_768 = __builtin_shufflevector(__ret_768, __ret_768, 7, 6, 5, 4, 3, 2, 1, 0); \ - __ret_768; \ +#define vsetq_lane_f16(__p0_860, __p1_860, __p2_860) __extension__ ({ \ + float16_t __s0_860 = __p0_860; \ + float16x8_t __s1_860 = __p1_860; \ + float16x8_t __rev1_860; __rev1_860 = __builtin_shufflevector(__s1_860, __s1_860, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __ret_860; \ +float16_t __reint_860 = __s0_860; \ +float16x8_t __reint1_860 = __rev1_860; \ +int16x8_t __reint2_860 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_860, *(int16x8_t *) &__reint1_860, __p2_860); \ + __ret_860 = *(float16x8_t *) &__reint2_860; \ + __ret_860 = __builtin_shufflevector(__ret_860, __ret_860, 7, 6, 5, 4, 3, 2, 1, 0); \ + __ret_860; \ }) #endif #if defined(__ARM_FEATURE_BF16_VECTOR_ARITHMETIC) #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_lane_f32(__p0_769, __p1_769, __p2_769, __p3_769) __extension__ ({ \ - float32x4_t __s0_769 = __p0_769; \ - bfloat16x8_t __s1_769 = __p1_769; \ - bfloat16x4_t __s2_769 = __p2_769; \ - float32x4_t __ret_769; \ - __ret_769 = vbfmlalbq_f32(__s0_769, __s1_769, (bfloat16x8_t) {vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769), vget_lane_bf16(__s2_769, __p3_769)}); \ - __ret_769; \ +#define vbfmlalbq_lane_f32(__p0_861, __p1_861, __p2_861, __p3_861) __extension__ ({ \ + float32x4_t __s0_861 = __p0_861; \ + bfloat16x8_t __s1_861 = __p1_861; \ + bfloat16x4_t __s2_861 = __p2_861; \ + float32x4_t __ret_861; \ + __ret_861 = vbfmlalbq_f32(__s0_861, __s1_861, (bfloat16x8_t) {vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861), vget_lane_bf16(__s2_861, __p3_861)}); \ + __ret_861; \ }) #else -#define vbfmlalbq_lane_f32(__p0_770, __p1_770, __p2_770, __p3_770) __extension__ ({ \ - float32x4_t __s0_770 = __p0_770; \ - bfloat16x8_t __s1_770 = __p1_770; \ - bfloat16x4_t __s2_770 = __p2_770; \ - float32x4_t __rev0_770; __rev0_770 = __builtin_shufflevector(__s0_770, __s0_770, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_770; __rev1_770 = __builtin_shufflevector(__s1_770, __s1_770, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_770; __rev2_770 = __builtin_shufflevector(__s2_770, __s2_770, 3, 2, 1, 0); \ - float32x4_t __ret_770; \ - __ret_770 = __noswap_vbfmlalbq_f32(__rev0_770, __rev1_770, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770), __noswap_vget_lane_bf16(__rev2_770, __p3_770)}); \ - __ret_770 = __builtin_shufflevector(__ret_770, __ret_770, 3, 2, 1, 0); \ - __ret_770; \ +#define vbfmlalbq_lane_f32(__p0_862, __p1_862, __p2_862, __p3_862) __extension__ ({ \ + float32x4_t __s0_862 = __p0_862; \ + bfloat16x8_t __s1_862 = __p1_862; \ + bfloat16x4_t __s2_862 = __p2_862; \ + float32x4_t __rev0_862; __rev0_862 = __builtin_shufflevector(__s0_862, __s0_862, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_862; __rev1_862 = __builtin_shufflevector(__s1_862, __s1_862, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_862; __rev2_862 = __builtin_shufflevector(__s2_862, __s2_862, 3, 2, 1, 0); \ + float32x4_t __ret_862; \ + __ret_862 = __noswap_vbfmlalbq_f32(__rev0_862, __rev1_862, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862), __noswap_vget_lane_bf16(__rev2_862, __p3_862)}); \ + __ret_862 = __builtin_shufflevector(__ret_862, __ret_862, 3, 2, 1, 0); \ + __ret_862; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlalbq_laneq_f32(__p0_771, __p1_771, __p2_771, __p3_771) __extension__ ({ \ - float32x4_t __s0_771 = __p0_771; \ - bfloat16x8_t __s1_771 = __p1_771; \ - bfloat16x8_t __s2_771 = __p2_771; \ - float32x4_t __ret_771; \ - __ret_771 = vbfmlalbq_f32(__s0_771, __s1_771, (bfloat16x8_t) {vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771), vgetq_lane_bf16(__s2_771, __p3_771)}); \ - __ret_771; \ +#define vbfmlalbq_laneq_f32(__p0_863, __p1_863, __p2_863, __p3_863) __extension__ ({ \ + float32x4_t __s0_863 = __p0_863; \ + bfloat16x8_t __s1_863 = __p1_863; \ + bfloat16x8_t __s2_863 = __p2_863; \ + float32x4_t __ret_863; \ + __ret_863 = vbfmlalbq_f32(__s0_863, __s1_863, (bfloat16x8_t) {vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863), vgetq_lane_bf16(__s2_863, __p3_863)}); \ + __ret_863; \ }) #else -#define vbfmlalbq_laneq_f32(__p0_772, __p1_772, __p2_772, __p3_772) __extension__ ({ \ - float32x4_t __s0_772 = __p0_772; \ - bfloat16x8_t __s1_772 = __p1_772; \ - bfloat16x8_t __s2_772 = __p2_772; \ - float32x4_t __rev0_772; __rev0_772 = __builtin_shufflevector(__s0_772, __s0_772, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_772; __rev1_772 = __builtin_shufflevector(__s1_772, __s1_772, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_772; __rev2_772 = __builtin_shufflevector(__s2_772, __s2_772, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_772; \ - __ret_772 = __noswap_vbfmlalbq_f32(__rev0_772, __rev1_772, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772), __noswap_vgetq_lane_bf16(__rev2_772, __p3_772)}); \ - __ret_772 = __builtin_shufflevector(__ret_772, __ret_772, 3, 2, 1, 0); \ - __ret_772; \ +#define vbfmlalbq_laneq_f32(__p0_864, __p1_864, __p2_864, __p3_864) __extension__ ({ \ + float32x4_t __s0_864 = __p0_864; \ + bfloat16x8_t __s1_864 = __p1_864; \ + bfloat16x8_t __s2_864 = __p2_864; \ + float32x4_t __rev0_864; __rev0_864 = __builtin_shufflevector(__s0_864, __s0_864, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_864; __rev1_864 = __builtin_shufflevector(__s1_864, __s1_864, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_864; __rev2_864 = __builtin_shufflevector(__s2_864, __s2_864, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_864; \ + __ret_864 = __noswap_vbfmlalbq_f32(__rev0_864, __rev1_864, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864), __noswap_vgetq_lane_bf16(__rev2_864, __p3_864)}); \ + __ret_864 = __builtin_shufflevector(__ret_864, __ret_864, 3, 2, 1, 0); \ + __ret_864; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_lane_f32(__p0_773, __p1_773, __p2_773, __p3_773) __extension__ ({ \ - float32x4_t __s0_773 = __p0_773; \ - bfloat16x8_t __s1_773 = __p1_773; \ - bfloat16x4_t __s2_773 = __p2_773; \ - float32x4_t __ret_773; \ - __ret_773 = vbfmlaltq_f32(__s0_773, __s1_773, (bfloat16x8_t) {vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773), vget_lane_bf16(__s2_773, __p3_773)}); \ - __ret_773; \ +#define vbfmlaltq_lane_f32(__p0_865, __p1_865, __p2_865, __p3_865) __extension__ ({ \ + float32x4_t __s0_865 = __p0_865; \ + bfloat16x8_t __s1_865 = __p1_865; \ + bfloat16x4_t __s2_865 = __p2_865; \ + float32x4_t __ret_865; \ + __ret_865 = vbfmlaltq_f32(__s0_865, __s1_865, (bfloat16x8_t) {vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865), vget_lane_bf16(__s2_865, __p3_865)}); \ + __ret_865; \ }) #else -#define vbfmlaltq_lane_f32(__p0_774, __p1_774, __p2_774, __p3_774) __extension__ ({ \ - float32x4_t __s0_774 = __p0_774; \ - bfloat16x8_t __s1_774 = __p1_774; \ - bfloat16x4_t __s2_774 = __p2_774; \ - float32x4_t __rev0_774; __rev0_774 = __builtin_shufflevector(__s0_774, __s0_774, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_774; __rev1_774 = __builtin_shufflevector(__s1_774, __s1_774, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x4_t __rev2_774; __rev2_774 = __builtin_shufflevector(__s2_774, __s2_774, 3, 2, 1, 0); \ - float32x4_t __ret_774; \ - __ret_774 = __noswap_vbfmlaltq_f32(__rev0_774, __rev1_774, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774), __noswap_vget_lane_bf16(__rev2_774, __p3_774)}); \ - __ret_774 = __builtin_shufflevector(__ret_774, __ret_774, 3, 2, 1, 0); \ - __ret_774; \ +#define vbfmlaltq_lane_f32(__p0_866, __p1_866, __p2_866, __p3_866) __extension__ ({ \ + float32x4_t __s0_866 = __p0_866; \ + bfloat16x8_t __s1_866 = __p1_866; \ + bfloat16x4_t __s2_866 = __p2_866; \ + float32x4_t __rev0_866; __rev0_866 = __builtin_shufflevector(__s0_866, __s0_866, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_866; __rev1_866 = __builtin_shufflevector(__s1_866, __s1_866, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x4_t __rev2_866; __rev2_866 = __builtin_shufflevector(__s2_866, __s2_866, 3, 2, 1, 0); \ + float32x4_t __ret_866; \ + __ret_866 = __noswap_vbfmlaltq_f32(__rev0_866, __rev1_866, (bfloat16x8_t) {__noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866), __noswap_vget_lane_bf16(__rev2_866, __p3_866)}); \ + __ret_866 = __builtin_shufflevector(__ret_866, __ret_866, 3, 2, 1, 0); \ + __ret_866; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vbfmlaltq_laneq_f32(__p0_775, __p1_775, __p2_775, __p3_775) __extension__ ({ \ - float32x4_t __s0_775 = __p0_775; \ - bfloat16x8_t __s1_775 = __p1_775; \ - bfloat16x8_t __s2_775 = __p2_775; \ - float32x4_t __ret_775; \ - __ret_775 = vbfmlaltq_f32(__s0_775, __s1_775, (bfloat16x8_t) {vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775), vgetq_lane_bf16(__s2_775, __p3_775)}); \ - __ret_775; \ +#define vbfmlaltq_laneq_f32(__p0_867, __p1_867, __p2_867, __p3_867) __extension__ ({ \ + float32x4_t __s0_867 = __p0_867; \ + bfloat16x8_t __s1_867 = __p1_867; \ + bfloat16x8_t __s2_867 = __p2_867; \ + float32x4_t __ret_867; \ + __ret_867 = vbfmlaltq_f32(__s0_867, __s1_867, (bfloat16x8_t) {vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867), vgetq_lane_bf16(__s2_867, __p3_867)}); \ + __ret_867; \ }) #else -#define vbfmlaltq_laneq_f32(__p0_776, __p1_776, __p2_776, __p3_776) __extension__ ({ \ - float32x4_t __s0_776 = __p0_776; \ - bfloat16x8_t __s1_776 = __p1_776; \ - bfloat16x8_t __s2_776 = __p2_776; \ - float32x4_t __rev0_776; __rev0_776 = __builtin_shufflevector(__s0_776, __s0_776, 3, 2, 1, 0); \ - bfloat16x8_t __rev1_776; __rev1_776 = __builtin_shufflevector(__s1_776, __s1_776, 7, 6, 5, 4, 3, 2, 1, 0); \ - bfloat16x8_t __rev2_776; __rev2_776 = __builtin_shufflevector(__s2_776, __s2_776, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_776; \ - __ret_776 = __noswap_vbfmlaltq_f32(__rev0_776, __rev1_776, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776), __noswap_vgetq_lane_bf16(__rev2_776, __p3_776)}); \ - __ret_776 = __builtin_shufflevector(__ret_776, __ret_776, 3, 2, 1, 0); \ - __ret_776; \ +#define vbfmlaltq_laneq_f32(__p0_868, __p1_868, __p2_868, __p3_868) __extension__ ({ \ + float32x4_t __s0_868 = __p0_868; \ + bfloat16x8_t __s1_868 = __p1_868; \ + bfloat16x8_t __s2_868 = __p2_868; \ + float32x4_t __rev0_868; __rev0_868 = __builtin_shufflevector(__s0_868, __s0_868, 3, 2, 1, 0); \ + bfloat16x8_t __rev1_868; __rev1_868 = __builtin_shufflevector(__s1_868, __s1_868, 7, 6, 5, 4, 3, 2, 1, 0); \ + bfloat16x8_t __rev2_868; __rev2_868 = __builtin_shufflevector(__s2_868, __s2_868, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_868; \ + __ret_868 = __noswap_vbfmlaltq_f32(__rev0_868, __rev1_868, (bfloat16x8_t) {__noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868), __noswap_vgetq_lane_bf16(__rev2_868, __p3_868)}); \ + __ret_868 = __builtin_shufflevector(__ret_868, __ret_868, 3, 2, 1, 0); \ + __ret_868; \ }) #endif @@ -65559,480 +67229,480 @@ __ai float32x4_t vcvtq_low_f32_bf16(bfloat16x8_t __p0) { #endif #if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_high_f16(__p0_777, __p1_777, __p2_777, __p3_777) __extension__ ({ \ - float32x4_t __s0_777 = __p0_777; \ - float16x8_t __s1_777 = __p1_777; \ - float16x4_t __s2_777 = __p2_777; \ - float32x4_t __ret_777; \ - __ret_777 = vfmlalq_high_f16(__s0_777, __s1_777, (float16x8_t) {vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777), vget_lane_f16(__s2_777, __p3_777)}); \ - __ret_777; \ +#define vfmlalq_lane_high_f16(__p0_869, __p1_869, __p2_869, __p3_869) __extension__ ({ \ + float32x4_t __s0_869 = __p0_869; \ + float16x8_t __s1_869 = __p1_869; \ + float16x4_t __s2_869 = __p2_869; \ + float32x4_t __ret_869; \ + __ret_869 = vfmlalq_high_f16(__s0_869, __s1_869, (float16x8_t) {vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869), vget_lane_f16(__s2_869, __p3_869)}); \ + __ret_869; \ }) #else -#define vfmlalq_lane_high_f16(__p0_778, __p1_778, __p2_778, __p3_778) __extension__ ({ \ - float32x4_t __s0_778 = __p0_778; \ - float16x8_t __s1_778 = __p1_778; \ - float16x4_t __s2_778 = __p2_778; \ - float32x4_t __rev0_778; __rev0_778 = __builtin_shufflevector(__s0_778, __s0_778, 3, 2, 1, 0); \ - float16x8_t __rev1_778; __rev1_778 = __builtin_shufflevector(__s1_778, __s1_778, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_778; __rev2_778 = __builtin_shufflevector(__s2_778, __s2_778, 3, 2, 1, 0); \ - float32x4_t __ret_778; \ - __ret_778 = __noswap_vfmlalq_high_f16(__rev0_778, __rev1_778, (float16x8_t) {__noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778), __noswap_vget_lane_f16(__rev2_778, __p3_778)}); \ - __ret_778 = __builtin_shufflevector(__ret_778, __ret_778, 3, 2, 1, 0); \ - __ret_778; \ +#define vfmlalq_lane_high_f16(__p0_870, __p1_870, __p2_870, __p3_870) __extension__ ({ \ + float32x4_t __s0_870 = __p0_870; \ + float16x8_t __s1_870 = __p1_870; \ + float16x4_t __s2_870 = __p2_870; \ + float32x4_t __rev0_870; __rev0_870 = __builtin_shufflevector(__s0_870, __s0_870, 3, 2, 1, 0); \ + float16x8_t __rev1_870; __rev1_870 = __builtin_shufflevector(__s1_870, __s1_870, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_870; __rev2_870 = __builtin_shufflevector(__s2_870, __s2_870, 3, 2, 1, 0); \ + float32x4_t __ret_870; \ + __ret_870 = __noswap_vfmlalq_high_f16(__rev0_870, __rev1_870, (float16x8_t) {__noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870), __noswap_vget_lane_f16(__rev2_870, __p3_870)}); \ + __ret_870 = __builtin_shufflevector(__ret_870, __ret_870, 3, 2, 1, 0); \ + __ret_870; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_high_f16(__p0_779, __p1_779, __p2_779, __p3_779) __extension__ ({ \ - float32x2_t __s0_779 = __p0_779; \ - float16x4_t __s1_779 = __p1_779; \ - float16x4_t __s2_779 = __p2_779; \ - float32x2_t __ret_779; \ - __ret_779 = vfmlal_high_f16(__s0_779, __s1_779, (float16x4_t) {vget_lane_f16(__s2_779, __p3_779), vget_lane_f16(__s2_779, __p3_779), vget_lane_f16(__s2_779, __p3_779), vget_lane_f16(__s2_779, __p3_779)}); \ - __ret_779; \ +#define vfmlal_lane_high_f16(__p0_871, __p1_871, __p2_871, __p3_871) __extension__ ({ \ + float32x2_t __s0_871 = __p0_871; \ + float16x4_t __s1_871 = __p1_871; \ + float16x4_t __s2_871 = __p2_871; \ + float32x2_t __ret_871; \ + __ret_871 = vfmlal_high_f16(__s0_871, __s1_871, (float16x4_t) {vget_lane_f16(__s2_871, __p3_871), vget_lane_f16(__s2_871, __p3_871), vget_lane_f16(__s2_871, __p3_871), vget_lane_f16(__s2_871, __p3_871)}); \ + __ret_871; \ }) #else -#define vfmlal_lane_high_f16(__p0_780, __p1_780, __p2_780, __p3_780) __extension__ ({ \ - float32x2_t __s0_780 = __p0_780; \ - float16x4_t __s1_780 = __p1_780; \ - float16x4_t __s2_780 = __p2_780; \ - float32x2_t __rev0_780; __rev0_780 = __builtin_shufflevector(__s0_780, __s0_780, 1, 0); \ - float16x4_t __rev1_780; __rev1_780 = __builtin_shufflevector(__s1_780, __s1_780, 3, 2, 1, 0); \ - float16x4_t __rev2_780; __rev2_780 = __builtin_shufflevector(__s2_780, __s2_780, 3, 2, 1, 0); \ - float32x2_t __ret_780; \ - __ret_780 = __noswap_vfmlal_high_f16(__rev0_780, __rev1_780, (float16x4_t) {__noswap_vget_lane_f16(__rev2_780, __p3_780), __noswap_vget_lane_f16(__rev2_780, __p3_780), __noswap_vget_lane_f16(__rev2_780, __p3_780), __noswap_vget_lane_f16(__rev2_780, __p3_780)}); \ - __ret_780 = __builtin_shufflevector(__ret_780, __ret_780, 1, 0); \ - __ret_780; \ +#define vfmlal_lane_high_f16(__p0_872, __p1_872, __p2_872, __p3_872) __extension__ ({ \ + float32x2_t __s0_872 = __p0_872; \ + float16x4_t __s1_872 = __p1_872; \ + float16x4_t __s2_872 = __p2_872; \ + float32x2_t __rev0_872; __rev0_872 = __builtin_shufflevector(__s0_872, __s0_872, 1, 0); \ + float16x4_t __rev1_872; __rev1_872 = __builtin_shufflevector(__s1_872, __s1_872, 3, 2, 1, 0); \ + float16x4_t __rev2_872; __rev2_872 = __builtin_shufflevector(__s2_872, __s2_872, 3, 2, 1, 0); \ + float32x2_t __ret_872; \ + __ret_872 = __noswap_vfmlal_high_f16(__rev0_872, __rev1_872, (float16x4_t) {__noswap_vget_lane_f16(__rev2_872, __p3_872), __noswap_vget_lane_f16(__rev2_872, __p3_872), __noswap_vget_lane_f16(__rev2_872, __p3_872), __noswap_vget_lane_f16(__rev2_872, __p3_872)}); \ + __ret_872 = __builtin_shufflevector(__ret_872, __ret_872, 1, 0); \ + __ret_872; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_low_f16(__p0_781, __p1_781, __p2_781, __p3_781) __extension__ ({ \ - float32x4_t __s0_781 = __p0_781; \ - float16x8_t __s1_781 = __p1_781; \ - float16x4_t __s2_781 = __p2_781; \ - float32x4_t __ret_781; \ - __ret_781 = vfmlalq_low_f16(__s0_781, __s1_781, (float16x8_t) {vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781), vget_lane_f16(__s2_781, __p3_781)}); \ - __ret_781; \ +#define vfmlalq_lane_low_f16(__p0_873, __p1_873, __p2_873, __p3_873) __extension__ ({ \ + float32x4_t __s0_873 = __p0_873; \ + float16x8_t __s1_873 = __p1_873; \ + float16x4_t __s2_873 = __p2_873; \ + float32x4_t __ret_873; \ + __ret_873 = vfmlalq_low_f16(__s0_873, __s1_873, (float16x8_t) {vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873), vget_lane_f16(__s2_873, __p3_873)}); \ + __ret_873; \ }) #else -#define vfmlalq_lane_low_f16(__p0_782, __p1_782, __p2_782, __p3_782) __extension__ ({ \ - float32x4_t __s0_782 = __p0_782; \ - float16x8_t __s1_782 = __p1_782; \ - float16x4_t __s2_782 = __p2_782; \ - float32x4_t __rev0_782; __rev0_782 = __builtin_shufflevector(__s0_782, __s0_782, 3, 2, 1, 0); \ - float16x8_t __rev1_782; __rev1_782 = __builtin_shufflevector(__s1_782, __s1_782, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_782; __rev2_782 = __builtin_shufflevector(__s2_782, __s2_782, 3, 2, 1, 0); \ - float32x4_t __ret_782; \ - __ret_782 = __noswap_vfmlalq_low_f16(__rev0_782, __rev1_782, (float16x8_t) {__noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782), __noswap_vget_lane_f16(__rev2_782, __p3_782)}); \ - __ret_782 = __builtin_shufflevector(__ret_782, __ret_782, 3, 2, 1, 0); \ - __ret_782; \ +#define vfmlalq_lane_low_f16(__p0_874, __p1_874, __p2_874, __p3_874) __extension__ ({ \ + float32x4_t __s0_874 = __p0_874; \ + float16x8_t __s1_874 = __p1_874; \ + float16x4_t __s2_874 = __p2_874; \ + float32x4_t __rev0_874; __rev0_874 = __builtin_shufflevector(__s0_874, __s0_874, 3, 2, 1, 0); \ + float16x8_t __rev1_874; __rev1_874 = __builtin_shufflevector(__s1_874, __s1_874, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_874; __rev2_874 = __builtin_shufflevector(__s2_874, __s2_874, 3, 2, 1, 0); \ + float32x4_t __ret_874; \ + __ret_874 = __noswap_vfmlalq_low_f16(__rev0_874, __rev1_874, (float16x8_t) {__noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874), __noswap_vget_lane_f16(__rev2_874, __p3_874)}); \ + __ret_874 = __builtin_shufflevector(__ret_874, __ret_874, 3, 2, 1, 0); \ + __ret_874; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_low_f16(__p0_783, __p1_783, __p2_783, __p3_783) __extension__ ({ \ - float32x2_t __s0_783 = __p0_783; \ - float16x4_t __s1_783 = __p1_783; \ - float16x4_t __s2_783 = __p2_783; \ - float32x2_t __ret_783; \ - __ret_783 = vfmlal_low_f16(__s0_783, __s1_783, (float16x4_t) {vget_lane_f16(__s2_783, __p3_783), vget_lane_f16(__s2_783, __p3_783), vget_lane_f16(__s2_783, __p3_783), vget_lane_f16(__s2_783, __p3_783)}); \ - __ret_783; \ +#define vfmlal_lane_low_f16(__p0_875, __p1_875, __p2_875, __p3_875) __extension__ ({ \ + float32x2_t __s0_875 = __p0_875; \ + float16x4_t __s1_875 = __p1_875; \ + float16x4_t __s2_875 = __p2_875; \ + float32x2_t __ret_875; \ + __ret_875 = vfmlal_low_f16(__s0_875, __s1_875, (float16x4_t) {vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875), vget_lane_f16(__s2_875, __p3_875)}); \ + __ret_875; \ }) #else -#define vfmlal_lane_low_f16(__p0_784, __p1_784, __p2_784, __p3_784) __extension__ ({ \ - float32x2_t __s0_784 = __p0_784; \ - float16x4_t __s1_784 = __p1_784; \ - float16x4_t __s2_784 = __p2_784; \ - float32x2_t __rev0_784; __rev0_784 = __builtin_shufflevector(__s0_784, __s0_784, 1, 0); \ - float16x4_t __rev1_784; __rev1_784 = __builtin_shufflevector(__s1_784, __s1_784, 3, 2, 1, 0); \ - float16x4_t __rev2_784; __rev2_784 = __builtin_shufflevector(__s2_784, __s2_784, 3, 2, 1, 0); \ - float32x2_t __ret_784; \ - __ret_784 = __noswap_vfmlal_low_f16(__rev0_784, __rev1_784, (float16x4_t) {__noswap_vget_lane_f16(__rev2_784, __p3_784), __noswap_vget_lane_f16(__rev2_784, __p3_784), __noswap_vget_lane_f16(__rev2_784, __p3_784), __noswap_vget_lane_f16(__rev2_784, __p3_784)}); \ - __ret_784 = __builtin_shufflevector(__ret_784, __ret_784, 1, 0); \ - __ret_784; \ +#define vfmlal_lane_low_f16(__p0_876, __p1_876, __p2_876, __p3_876) __extension__ ({ \ + float32x2_t __s0_876 = __p0_876; \ + float16x4_t __s1_876 = __p1_876; \ + float16x4_t __s2_876 = __p2_876; \ + float32x2_t __rev0_876; __rev0_876 = __builtin_shufflevector(__s0_876, __s0_876, 1, 0); \ + float16x4_t __rev1_876; __rev1_876 = __builtin_shufflevector(__s1_876, __s1_876, 3, 2, 1, 0); \ + float16x4_t __rev2_876; __rev2_876 = __builtin_shufflevector(__s2_876, __s2_876, 3, 2, 1, 0); \ + float32x2_t __ret_876; \ + __ret_876 = __noswap_vfmlal_low_f16(__rev0_876, __rev1_876, (float16x4_t) {__noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876), __noswap_vget_lane_f16(__rev2_876, __p3_876)}); \ + __ret_876 = __builtin_shufflevector(__ret_876, __ret_876, 1, 0); \ + __ret_876; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_high_f16(__p0_785, __p1_785, __p2_785, __p3_785) __extension__ ({ \ - float32x4_t __s0_785 = __p0_785; \ - float16x8_t __s1_785 = __p1_785; \ - float16x8_t __s2_785 = __p2_785; \ - float32x4_t __ret_785; \ - __ret_785 = vfmlalq_high_f16(__s0_785, __s1_785, (float16x8_t) {vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785), vgetq_lane_f16(__s2_785, __p3_785)}); \ - __ret_785; \ +#define vfmlalq_laneq_high_f16(__p0_877, __p1_877, __p2_877, __p3_877) __extension__ ({ \ + float32x4_t __s0_877 = __p0_877; \ + float16x8_t __s1_877 = __p1_877; \ + float16x8_t __s2_877 = __p2_877; \ + float32x4_t __ret_877; \ + __ret_877 = vfmlalq_high_f16(__s0_877, __s1_877, (float16x8_t) {vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877), vgetq_lane_f16(__s2_877, __p3_877)}); \ + __ret_877; \ }) #else -#define vfmlalq_laneq_high_f16(__p0_786, __p1_786, __p2_786, __p3_786) __extension__ ({ \ - float32x4_t __s0_786 = __p0_786; \ - float16x8_t __s1_786 = __p1_786; \ - float16x8_t __s2_786 = __p2_786; \ - float32x4_t __rev0_786; __rev0_786 = __builtin_shufflevector(__s0_786, __s0_786, 3, 2, 1, 0); \ - float16x8_t __rev1_786; __rev1_786 = __builtin_shufflevector(__s1_786, __s1_786, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_786; __rev2_786 = __builtin_shufflevector(__s2_786, __s2_786, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_786; \ - __ret_786 = __noswap_vfmlalq_high_f16(__rev0_786, __rev1_786, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786), __noswap_vgetq_lane_f16(__rev2_786, __p3_786)}); \ - __ret_786 = __builtin_shufflevector(__ret_786, __ret_786, 3, 2, 1, 0); \ - __ret_786; \ +#define vfmlalq_laneq_high_f16(__p0_878, __p1_878, __p2_878, __p3_878) __extension__ ({ \ + float32x4_t __s0_878 = __p0_878; \ + float16x8_t __s1_878 = __p1_878; \ + float16x8_t __s2_878 = __p2_878; \ + float32x4_t __rev0_878; __rev0_878 = __builtin_shufflevector(__s0_878, __s0_878, 3, 2, 1, 0); \ + float16x8_t __rev1_878; __rev1_878 = __builtin_shufflevector(__s1_878, __s1_878, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_878; __rev2_878 = __builtin_shufflevector(__s2_878, __s2_878, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_878; \ + __ret_878 = __noswap_vfmlalq_high_f16(__rev0_878, __rev1_878, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878), __noswap_vgetq_lane_f16(__rev2_878, __p3_878)}); \ + __ret_878 = __builtin_shufflevector(__ret_878, __ret_878, 3, 2, 1, 0); \ + __ret_878; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_high_f16(__p0_787, __p1_787, __p2_787, __p3_787) __extension__ ({ \ - float32x2_t __s0_787 = __p0_787; \ - float16x4_t __s1_787 = __p1_787; \ - float16x8_t __s2_787 = __p2_787; \ - float32x2_t __ret_787; \ - __ret_787 = vfmlal_high_f16(__s0_787, __s1_787, (float16x4_t) {vgetq_lane_f16(__s2_787, __p3_787), vgetq_lane_f16(__s2_787, __p3_787), vgetq_lane_f16(__s2_787, __p3_787), vgetq_lane_f16(__s2_787, __p3_787)}); \ - __ret_787; \ +#define vfmlal_laneq_high_f16(__p0_879, __p1_879, __p2_879, __p3_879) __extension__ ({ \ + float32x2_t __s0_879 = __p0_879; \ + float16x4_t __s1_879 = __p1_879; \ + float16x8_t __s2_879 = __p2_879; \ + float32x2_t __ret_879; \ + __ret_879 = vfmlal_high_f16(__s0_879, __s1_879, (float16x4_t) {vgetq_lane_f16(__s2_879, __p3_879), vgetq_lane_f16(__s2_879, __p3_879), vgetq_lane_f16(__s2_879, __p3_879), vgetq_lane_f16(__s2_879, __p3_879)}); \ + __ret_879; \ }) #else -#define vfmlal_laneq_high_f16(__p0_788, __p1_788, __p2_788, __p3_788) __extension__ ({ \ - float32x2_t __s0_788 = __p0_788; \ - float16x4_t __s1_788 = __p1_788; \ - float16x8_t __s2_788 = __p2_788; \ - float32x2_t __rev0_788; __rev0_788 = __builtin_shufflevector(__s0_788, __s0_788, 1, 0); \ - float16x4_t __rev1_788; __rev1_788 = __builtin_shufflevector(__s1_788, __s1_788, 3, 2, 1, 0); \ - float16x8_t __rev2_788; __rev2_788 = __builtin_shufflevector(__s2_788, __s2_788, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_788; \ - __ret_788 = __noswap_vfmlal_high_f16(__rev0_788, __rev1_788, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_788, __p3_788), __noswap_vgetq_lane_f16(__rev2_788, __p3_788), __noswap_vgetq_lane_f16(__rev2_788, __p3_788), __noswap_vgetq_lane_f16(__rev2_788, __p3_788)}); \ - __ret_788 = __builtin_shufflevector(__ret_788, __ret_788, 1, 0); \ - __ret_788; \ +#define vfmlal_laneq_high_f16(__p0_880, __p1_880, __p2_880, __p3_880) __extension__ ({ \ + float32x2_t __s0_880 = __p0_880; \ + float16x4_t __s1_880 = __p1_880; \ + float16x8_t __s2_880 = __p2_880; \ + float32x2_t __rev0_880; __rev0_880 = __builtin_shufflevector(__s0_880, __s0_880, 1, 0); \ + float16x4_t __rev1_880; __rev1_880 = __builtin_shufflevector(__s1_880, __s1_880, 3, 2, 1, 0); \ + float16x8_t __rev2_880; __rev2_880 = __builtin_shufflevector(__s2_880, __s2_880, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_880; \ + __ret_880 = __noswap_vfmlal_high_f16(__rev0_880, __rev1_880, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_880, __p3_880), __noswap_vgetq_lane_f16(__rev2_880, __p3_880), __noswap_vgetq_lane_f16(__rev2_880, __p3_880), __noswap_vgetq_lane_f16(__rev2_880, __p3_880)}); \ + __ret_880 = __builtin_shufflevector(__ret_880, __ret_880, 1, 0); \ + __ret_880; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_low_f16(__p0_789, __p1_789, __p2_789, __p3_789) __extension__ ({ \ - float32x4_t __s0_789 = __p0_789; \ - float16x8_t __s1_789 = __p1_789; \ - float16x8_t __s2_789 = __p2_789; \ - float32x4_t __ret_789; \ - __ret_789 = vfmlalq_low_f16(__s0_789, __s1_789, (float16x8_t) {vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789), vgetq_lane_f16(__s2_789, __p3_789)}); \ - __ret_789; \ +#define vfmlalq_laneq_low_f16(__p0_881, __p1_881, __p2_881, __p3_881) __extension__ ({ \ + float32x4_t __s0_881 = __p0_881; \ + float16x8_t __s1_881 = __p1_881; \ + float16x8_t __s2_881 = __p2_881; \ + float32x4_t __ret_881; \ + __ret_881 = vfmlalq_low_f16(__s0_881, __s1_881, (float16x8_t) {vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881), vgetq_lane_f16(__s2_881, __p3_881)}); \ + __ret_881; \ }) #else -#define vfmlalq_laneq_low_f16(__p0_790, __p1_790, __p2_790, __p3_790) __extension__ ({ \ - float32x4_t __s0_790 = __p0_790; \ - float16x8_t __s1_790 = __p1_790; \ - float16x8_t __s2_790 = __p2_790; \ - float32x4_t __rev0_790; __rev0_790 = __builtin_shufflevector(__s0_790, __s0_790, 3, 2, 1, 0); \ - float16x8_t __rev1_790; __rev1_790 = __builtin_shufflevector(__s1_790, __s1_790, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_790; __rev2_790 = __builtin_shufflevector(__s2_790, __s2_790, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_790; \ - __ret_790 = __noswap_vfmlalq_low_f16(__rev0_790, __rev1_790, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790), __noswap_vgetq_lane_f16(__rev2_790, __p3_790)}); \ - __ret_790 = __builtin_shufflevector(__ret_790, __ret_790, 3, 2, 1, 0); \ - __ret_790; \ +#define vfmlalq_laneq_low_f16(__p0_882, __p1_882, __p2_882, __p3_882) __extension__ ({ \ + float32x4_t __s0_882 = __p0_882; \ + float16x8_t __s1_882 = __p1_882; \ + float16x8_t __s2_882 = __p2_882; \ + float32x4_t __rev0_882; __rev0_882 = __builtin_shufflevector(__s0_882, __s0_882, 3, 2, 1, 0); \ + float16x8_t __rev1_882; __rev1_882 = __builtin_shufflevector(__s1_882, __s1_882, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_882; __rev2_882 = __builtin_shufflevector(__s2_882, __s2_882, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_882; \ + __ret_882 = __noswap_vfmlalq_low_f16(__rev0_882, __rev1_882, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882), __noswap_vgetq_lane_f16(__rev2_882, __p3_882)}); \ + __ret_882 = __builtin_shufflevector(__ret_882, __ret_882, 3, 2, 1, 0); \ + __ret_882; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_low_f16(__p0_791, __p1_791, __p2_791, __p3_791) __extension__ ({ \ - float32x2_t __s0_791 = __p0_791; \ - float16x4_t __s1_791 = __p1_791; \ - float16x8_t __s2_791 = __p2_791; \ - float32x2_t __ret_791; \ - __ret_791 = vfmlal_low_f16(__s0_791, __s1_791, (float16x4_t) {vgetq_lane_f16(__s2_791, __p3_791), vgetq_lane_f16(__s2_791, __p3_791), vgetq_lane_f16(__s2_791, __p3_791), vgetq_lane_f16(__s2_791, __p3_791)}); \ - __ret_791; \ +#define vfmlal_laneq_low_f16(__p0_883, __p1_883, __p2_883, __p3_883) __extension__ ({ \ + float32x2_t __s0_883 = __p0_883; \ + float16x4_t __s1_883 = __p1_883; \ + float16x8_t __s2_883 = __p2_883; \ + float32x2_t __ret_883; \ + __ret_883 = vfmlal_low_f16(__s0_883, __s1_883, (float16x4_t) {vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883), vgetq_lane_f16(__s2_883, __p3_883)}); \ + __ret_883; \ }) #else -#define vfmlal_laneq_low_f16(__p0_792, __p1_792, __p2_792, __p3_792) __extension__ ({ \ - float32x2_t __s0_792 = __p0_792; \ - float16x4_t __s1_792 = __p1_792; \ - float16x8_t __s2_792 = __p2_792; \ - float32x2_t __rev0_792; __rev0_792 = __builtin_shufflevector(__s0_792, __s0_792, 1, 0); \ - float16x4_t __rev1_792; __rev1_792 = __builtin_shufflevector(__s1_792, __s1_792, 3, 2, 1, 0); \ - float16x8_t __rev2_792; __rev2_792 = __builtin_shufflevector(__s2_792, __s2_792, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_792; \ - __ret_792 = __noswap_vfmlal_low_f16(__rev0_792, __rev1_792, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_792, __p3_792), __noswap_vgetq_lane_f16(__rev2_792, __p3_792), __noswap_vgetq_lane_f16(__rev2_792, __p3_792), __noswap_vgetq_lane_f16(__rev2_792, __p3_792)}); \ - __ret_792 = __builtin_shufflevector(__ret_792, __ret_792, 1, 0); \ - __ret_792; \ +#define vfmlal_laneq_low_f16(__p0_884, __p1_884, __p2_884, __p3_884) __extension__ ({ \ + float32x2_t __s0_884 = __p0_884; \ + float16x4_t __s1_884 = __p1_884; \ + float16x8_t __s2_884 = __p2_884; \ + float32x2_t __rev0_884; __rev0_884 = __builtin_shufflevector(__s0_884, __s0_884, 1, 0); \ + float16x4_t __rev1_884; __rev1_884 = __builtin_shufflevector(__s1_884, __s1_884, 3, 2, 1, 0); \ + float16x8_t __rev2_884; __rev2_884 = __builtin_shufflevector(__s2_884, __s2_884, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_884; \ + __ret_884 = __noswap_vfmlal_low_f16(__rev0_884, __rev1_884, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884), __noswap_vgetq_lane_f16(__rev2_884, __p3_884)}); \ + __ret_884 = __builtin_shufflevector(__ret_884, __ret_884, 1, 0); \ + __ret_884; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_high_f16(__p0_793, __p1_793, __p2_793, __p3_793) __extension__ ({ \ - float32x4_t __s0_793 = __p0_793; \ - float16x8_t __s1_793 = __p1_793; \ - float16x4_t __s2_793 = __p2_793; \ - float32x4_t __ret_793; \ - __ret_793 = vfmlslq_high_f16(__s0_793, __s1_793, (float16x8_t) {vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793), vget_lane_f16(__s2_793, __p3_793)}); \ - __ret_793; \ +#define vfmlslq_lane_high_f16(__p0_885, __p1_885, __p2_885, __p3_885) __extension__ ({ \ + float32x4_t __s0_885 = __p0_885; \ + float16x8_t __s1_885 = __p1_885; \ + float16x4_t __s2_885 = __p2_885; \ + float32x4_t __ret_885; \ + __ret_885 = vfmlslq_high_f16(__s0_885, __s1_885, (float16x8_t) {vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885), vget_lane_f16(__s2_885, __p3_885)}); \ + __ret_885; \ }) #else -#define vfmlslq_lane_high_f16(__p0_794, __p1_794, __p2_794, __p3_794) __extension__ ({ \ - float32x4_t __s0_794 = __p0_794; \ - float16x8_t __s1_794 = __p1_794; \ - float16x4_t __s2_794 = __p2_794; \ - float32x4_t __rev0_794; __rev0_794 = __builtin_shufflevector(__s0_794, __s0_794, 3, 2, 1, 0); \ - float16x8_t __rev1_794; __rev1_794 = __builtin_shufflevector(__s1_794, __s1_794, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_794; __rev2_794 = __builtin_shufflevector(__s2_794, __s2_794, 3, 2, 1, 0); \ - float32x4_t __ret_794; \ - __ret_794 = __noswap_vfmlslq_high_f16(__rev0_794, __rev1_794, (float16x8_t) {__noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794), __noswap_vget_lane_f16(__rev2_794, __p3_794)}); \ - __ret_794 = __builtin_shufflevector(__ret_794, __ret_794, 3, 2, 1, 0); \ - __ret_794; \ +#define vfmlslq_lane_high_f16(__p0_886, __p1_886, __p2_886, __p3_886) __extension__ ({ \ + float32x4_t __s0_886 = __p0_886; \ + float16x8_t __s1_886 = __p1_886; \ + float16x4_t __s2_886 = __p2_886; \ + float32x4_t __rev0_886; __rev0_886 = __builtin_shufflevector(__s0_886, __s0_886, 3, 2, 1, 0); \ + float16x8_t __rev1_886; __rev1_886 = __builtin_shufflevector(__s1_886, __s1_886, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_886; __rev2_886 = __builtin_shufflevector(__s2_886, __s2_886, 3, 2, 1, 0); \ + float32x4_t __ret_886; \ + __ret_886 = __noswap_vfmlslq_high_f16(__rev0_886, __rev1_886, (float16x8_t) {__noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886), __noswap_vget_lane_f16(__rev2_886, __p3_886)}); \ + __ret_886 = __builtin_shufflevector(__ret_886, __ret_886, 3, 2, 1, 0); \ + __ret_886; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_high_f16(__p0_795, __p1_795, __p2_795, __p3_795) __extension__ ({ \ - float32x2_t __s0_795 = __p0_795; \ - float16x4_t __s1_795 = __p1_795; \ - float16x4_t __s2_795 = __p2_795; \ - float32x2_t __ret_795; \ - __ret_795 = vfmlsl_high_f16(__s0_795, __s1_795, (float16x4_t) {vget_lane_f16(__s2_795, __p3_795), vget_lane_f16(__s2_795, __p3_795), vget_lane_f16(__s2_795, __p3_795), vget_lane_f16(__s2_795, __p3_795)}); \ - __ret_795; \ +#define vfmlsl_lane_high_f16(__p0_887, __p1_887, __p2_887, __p3_887) __extension__ ({ \ + float32x2_t __s0_887 = __p0_887; \ + float16x4_t __s1_887 = __p1_887; \ + float16x4_t __s2_887 = __p2_887; \ + float32x2_t __ret_887; \ + __ret_887 = vfmlsl_high_f16(__s0_887, __s1_887, (float16x4_t) {vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887), vget_lane_f16(__s2_887, __p3_887)}); \ + __ret_887; \ }) #else -#define vfmlsl_lane_high_f16(__p0_796, __p1_796, __p2_796, __p3_796) __extension__ ({ \ - float32x2_t __s0_796 = __p0_796; \ - float16x4_t __s1_796 = __p1_796; \ - float16x4_t __s2_796 = __p2_796; \ - float32x2_t __rev0_796; __rev0_796 = __builtin_shufflevector(__s0_796, __s0_796, 1, 0); \ - float16x4_t __rev1_796; __rev1_796 = __builtin_shufflevector(__s1_796, __s1_796, 3, 2, 1, 0); \ - float16x4_t __rev2_796; __rev2_796 = __builtin_shufflevector(__s2_796, __s2_796, 3, 2, 1, 0); \ - float32x2_t __ret_796; \ - __ret_796 = __noswap_vfmlsl_high_f16(__rev0_796, __rev1_796, (float16x4_t) {__noswap_vget_lane_f16(__rev2_796, __p3_796), __noswap_vget_lane_f16(__rev2_796, __p3_796), __noswap_vget_lane_f16(__rev2_796, __p3_796), __noswap_vget_lane_f16(__rev2_796, __p3_796)}); \ - __ret_796 = __builtin_shufflevector(__ret_796, __ret_796, 1, 0); \ - __ret_796; \ +#define vfmlsl_lane_high_f16(__p0_888, __p1_888, __p2_888, __p3_888) __extension__ ({ \ + float32x2_t __s0_888 = __p0_888; \ + float16x4_t __s1_888 = __p1_888; \ + float16x4_t __s2_888 = __p2_888; \ + float32x2_t __rev0_888; __rev0_888 = __builtin_shufflevector(__s0_888, __s0_888, 1, 0); \ + float16x4_t __rev1_888; __rev1_888 = __builtin_shufflevector(__s1_888, __s1_888, 3, 2, 1, 0); \ + float16x4_t __rev2_888; __rev2_888 = __builtin_shufflevector(__s2_888, __s2_888, 3, 2, 1, 0); \ + float32x2_t __ret_888; \ + __ret_888 = __noswap_vfmlsl_high_f16(__rev0_888, __rev1_888, (float16x4_t) {__noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888), __noswap_vget_lane_f16(__rev2_888, __p3_888)}); \ + __ret_888 = __builtin_shufflevector(__ret_888, __ret_888, 1, 0); \ + __ret_888; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_low_f16(__p0_797, __p1_797, __p2_797, __p3_797) __extension__ ({ \ - float32x4_t __s0_797 = __p0_797; \ - float16x8_t __s1_797 = __p1_797; \ - float16x4_t __s2_797 = __p2_797; \ - float32x4_t __ret_797; \ - __ret_797 = vfmlslq_low_f16(__s0_797, __s1_797, (float16x8_t) {vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797), vget_lane_f16(__s2_797, __p3_797)}); \ - __ret_797; \ +#define vfmlslq_lane_low_f16(__p0_889, __p1_889, __p2_889, __p3_889) __extension__ ({ \ + float32x4_t __s0_889 = __p0_889; \ + float16x8_t __s1_889 = __p1_889; \ + float16x4_t __s2_889 = __p2_889; \ + float32x4_t __ret_889; \ + __ret_889 = vfmlslq_low_f16(__s0_889, __s1_889, (float16x8_t) {vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889), vget_lane_f16(__s2_889, __p3_889)}); \ + __ret_889; \ }) #else -#define vfmlslq_lane_low_f16(__p0_798, __p1_798, __p2_798, __p3_798) __extension__ ({ \ - float32x4_t __s0_798 = __p0_798; \ - float16x8_t __s1_798 = __p1_798; \ - float16x4_t __s2_798 = __p2_798; \ - float32x4_t __rev0_798; __rev0_798 = __builtin_shufflevector(__s0_798, __s0_798, 3, 2, 1, 0); \ - float16x8_t __rev1_798; __rev1_798 = __builtin_shufflevector(__s1_798, __s1_798, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x4_t __rev2_798; __rev2_798 = __builtin_shufflevector(__s2_798, __s2_798, 3, 2, 1, 0); \ - float32x4_t __ret_798; \ - __ret_798 = __noswap_vfmlslq_low_f16(__rev0_798, __rev1_798, (float16x8_t) {__noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798), __noswap_vget_lane_f16(__rev2_798, __p3_798)}); \ - __ret_798 = __builtin_shufflevector(__ret_798, __ret_798, 3, 2, 1, 0); \ - __ret_798; \ +#define vfmlslq_lane_low_f16(__p0_890, __p1_890, __p2_890, __p3_890) __extension__ ({ \ + float32x4_t __s0_890 = __p0_890; \ + float16x8_t __s1_890 = __p1_890; \ + float16x4_t __s2_890 = __p2_890; \ + float32x4_t __rev0_890; __rev0_890 = __builtin_shufflevector(__s0_890, __s0_890, 3, 2, 1, 0); \ + float16x8_t __rev1_890; __rev1_890 = __builtin_shufflevector(__s1_890, __s1_890, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x4_t __rev2_890; __rev2_890 = __builtin_shufflevector(__s2_890, __s2_890, 3, 2, 1, 0); \ + float32x4_t __ret_890; \ + __ret_890 = __noswap_vfmlslq_low_f16(__rev0_890, __rev1_890, (float16x8_t) {__noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890), __noswap_vget_lane_f16(__rev2_890, __p3_890)}); \ + __ret_890 = __builtin_shufflevector(__ret_890, __ret_890, 3, 2, 1, 0); \ + __ret_890; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_low_f16(__p0_799, __p1_799, __p2_799, __p3_799) __extension__ ({ \ - float32x2_t __s0_799 = __p0_799; \ - float16x4_t __s1_799 = __p1_799; \ - float16x4_t __s2_799 = __p2_799; \ - float32x2_t __ret_799; \ - __ret_799 = vfmlsl_low_f16(__s0_799, __s1_799, (float16x4_t) {vget_lane_f16(__s2_799, __p3_799), vget_lane_f16(__s2_799, __p3_799), vget_lane_f16(__s2_799, __p3_799), vget_lane_f16(__s2_799, __p3_799)}); \ - __ret_799; \ +#define vfmlsl_lane_low_f16(__p0_891, __p1_891, __p2_891, __p3_891) __extension__ ({ \ + float32x2_t __s0_891 = __p0_891; \ + float16x4_t __s1_891 = __p1_891; \ + float16x4_t __s2_891 = __p2_891; \ + float32x2_t __ret_891; \ + __ret_891 = vfmlsl_low_f16(__s0_891, __s1_891, (float16x4_t) {vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891), vget_lane_f16(__s2_891, __p3_891)}); \ + __ret_891; \ }) #else -#define vfmlsl_lane_low_f16(__p0_800, __p1_800, __p2_800, __p3_800) __extension__ ({ \ - float32x2_t __s0_800 = __p0_800; \ - float16x4_t __s1_800 = __p1_800; \ - float16x4_t __s2_800 = __p2_800; \ - float32x2_t __rev0_800; __rev0_800 = __builtin_shufflevector(__s0_800, __s0_800, 1, 0); \ - float16x4_t __rev1_800; __rev1_800 = __builtin_shufflevector(__s1_800, __s1_800, 3, 2, 1, 0); \ - float16x4_t __rev2_800; __rev2_800 = __builtin_shufflevector(__s2_800, __s2_800, 3, 2, 1, 0); \ - float32x2_t __ret_800; \ - __ret_800 = __noswap_vfmlsl_low_f16(__rev0_800, __rev1_800, (float16x4_t) {__noswap_vget_lane_f16(__rev2_800, __p3_800), __noswap_vget_lane_f16(__rev2_800, __p3_800), __noswap_vget_lane_f16(__rev2_800, __p3_800), __noswap_vget_lane_f16(__rev2_800, __p3_800)}); \ - __ret_800 = __builtin_shufflevector(__ret_800, __ret_800, 1, 0); \ - __ret_800; \ +#define vfmlsl_lane_low_f16(__p0_892, __p1_892, __p2_892, __p3_892) __extension__ ({ \ + float32x2_t __s0_892 = __p0_892; \ + float16x4_t __s1_892 = __p1_892; \ + float16x4_t __s2_892 = __p2_892; \ + float32x2_t __rev0_892; __rev0_892 = __builtin_shufflevector(__s0_892, __s0_892, 1, 0); \ + float16x4_t __rev1_892; __rev1_892 = __builtin_shufflevector(__s1_892, __s1_892, 3, 2, 1, 0); \ + float16x4_t __rev2_892; __rev2_892 = __builtin_shufflevector(__s2_892, __s2_892, 3, 2, 1, 0); \ + float32x2_t __ret_892; \ + __ret_892 = __noswap_vfmlsl_low_f16(__rev0_892, __rev1_892, (float16x4_t) {__noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892), __noswap_vget_lane_f16(__rev2_892, __p3_892)}); \ + __ret_892 = __builtin_shufflevector(__ret_892, __ret_892, 1, 0); \ + __ret_892; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_high_f16(__p0_801, __p1_801, __p2_801, __p3_801) __extension__ ({ \ - float32x4_t __s0_801 = __p0_801; \ - float16x8_t __s1_801 = __p1_801; \ - float16x8_t __s2_801 = __p2_801; \ - float32x4_t __ret_801; \ - __ret_801 = vfmlslq_high_f16(__s0_801, __s1_801, (float16x8_t) {vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801), vgetq_lane_f16(__s2_801, __p3_801)}); \ - __ret_801; \ +#define vfmlslq_laneq_high_f16(__p0_893, __p1_893, __p2_893, __p3_893) __extension__ ({ \ + float32x4_t __s0_893 = __p0_893; \ + float16x8_t __s1_893 = __p1_893; \ + float16x8_t __s2_893 = __p2_893; \ + float32x4_t __ret_893; \ + __ret_893 = vfmlslq_high_f16(__s0_893, __s1_893, (float16x8_t) {vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893), vgetq_lane_f16(__s2_893, __p3_893)}); \ + __ret_893; \ }) #else -#define vfmlslq_laneq_high_f16(__p0_802, __p1_802, __p2_802, __p3_802) __extension__ ({ \ - float32x4_t __s0_802 = __p0_802; \ - float16x8_t __s1_802 = __p1_802; \ - float16x8_t __s2_802 = __p2_802; \ - float32x4_t __rev0_802; __rev0_802 = __builtin_shufflevector(__s0_802, __s0_802, 3, 2, 1, 0); \ - float16x8_t __rev1_802; __rev1_802 = __builtin_shufflevector(__s1_802, __s1_802, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_802; __rev2_802 = __builtin_shufflevector(__s2_802, __s2_802, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_802; \ - __ret_802 = __noswap_vfmlslq_high_f16(__rev0_802, __rev1_802, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802), __noswap_vgetq_lane_f16(__rev2_802, __p3_802)}); \ - __ret_802 = __builtin_shufflevector(__ret_802, __ret_802, 3, 2, 1, 0); \ - __ret_802; \ +#define vfmlslq_laneq_high_f16(__p0_894, __p1_894, __p2_894, __p3_894) __extension__ ({ \ + float32x4_t __s0_894 = __p0_894; \ + float16x8_t __s1_894 = __p1_894; \ + float16x8_t __s2_894 = __p2_894; \ + float32x4_t __rev0_894; __rev0_894 = __builtin_shufflevector(__s0_894, __s0_894, 3, 2, 1, 0); \ + float16x8_t __rev1_894; __rev1_894 = __builtin_shufflevector(__s1_894, __s1_894, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_894; __rev2_894 = __builtin_shufflevector(__s2_894, __s2_894, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_894; \ + __ret_894 = __noswap_vfmlslq_high_f16(__rev0_894, __rev1_894, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894), __noswap_vgetq_lane_f16(__rev2_894, __p3_894)}); \ + __ret_894 = __builtin_shufflevector(__ret_894, __ret_894, 3, 2, 1, 0); \ + __ret_894; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_high_f16(__p0_803, __p1_803, __p2_803, __p3_803) __extension__ ({ \ - float32x2_t __s0_803 = __p0_803; \ - float16x4_t __s1_803 = __p1_803; \ - float16x8_t __s2_803 = __p2_803; \ - float32x2_t __ret_803; \ - __ret_803 = vfmlsl_high_f16(__s0_803, __s1_803, (float16x4_t) {vgetq_lane_f16(__s2_803, __p3_803), vgetq_lane_f16(__s2_803, __p3_803), vgetq_lane_f16(__s2_803, __p3_803), vgetq_lane_f16(__s2_803, __p3_803)}); \ - __ret_803; \ +#define vfmlsl_laneq_high_f16(__p0_895, __p1_895, __p2_895, __p3_895) __extension__ ({ \ + float32x2_t __s0_895 = __p0_895; \ + float16x4_t __s1_895 = __p1_895; \ + float16x8_t __s2_895 = __p2_895; \ + float32x2_t __ret_895; \ + __ret_895 = vfmlsl_high_f16(__s0_895, __s1_895, (float16x4_t) {vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895), vgetq_lane_f16(__s2_895, __p3_895)}); \ + __ret_895; \ }) #else -#define vfmlsl_laneq_high_f16(__p0_804, __p1_804, __p2_804, __p3_804) __extension__ ({ \ - float32x2_t __s0_804 = __p0_804; \ - float16x4_t __s1_804 = __p1_804; \ - float16x8_t __s2_804 = __p2_804; \ - float32x2_t __rev0_804; __rev0_804 = __builtin_shufflevector(__s0_804, __s0_804, 1, 0); \ - float16x4_t __rev1_804; __rev1_804 = __builtin_shufflevector(__s1_804, __s1_804, 3, 2, 1, 0); \ - float16x8_t __rev2_804; __rev2_804 = __builtin_shufflevector(__s2_804, __s2_804, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_804; \ - __ret_804 = __noswap_vfmlsl_high_f16(__rev0_804, __rev1_804, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_804, __p3_804), __noswap_vgetq_lane_f16(__rev2_804, __p3_804), __noswap_vgetq_lane_f16(__rev2_804, __p3_804), __noswap_vgetq_lane_f16(__rev2_804, __p3_804)}); \ - __ret_804 = __builtin_shufflevector(__ret_804, __ret_804, 1, 0); \ - __ret_804; \ +#define vfmlsl_laneq_high_f16(__p0_896, __p1_896, __p2_896, __p3_896) __extension__ ({ \ + float32x2_t __s0_896 = __p0_896; \ + float16x4_t __s1_896 = __p1_896; \ + float16x8_t __s2_896 = __p2_896; \ + float32x2_t __rev0_896; __rev0_896 = __builtin_shufflevector(__s0_896, __s0_896, 1, 0); \ + float16x4_t __rev1_896; __rev1_896 = __builtin_shufflevector(__s1_896, __s1_896, 3, 2, 1, 0); \ + float16x8_t __rev2_896; __rev2_896 = __builtin_shufflevector(__s2_896, __s2_896, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_896; \ + __ret_896 = __noswap_vfmlsl_high_f16(__rev0_896, __rev1_896, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896), __noswap_vgetq_lane_f16(__rev2_896, __p3_896)}); \ + __ret_896 = __builtin_shufflevector(__ret_896, __ret_896, 1, 0); \ + __ret_896; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_low_f16(__p0_805, __p1_805, __p2_805, __p3_805) __extension__ ({ \ - float32x4_t __s0_805 = __p0_805; \ - float16x8_t __s1_805 = __p1_805; \ - float16x8_t __s2_805 = __p2_805; \ - float32x4_t __ret_805; \ - __ret_805 = vfmlslq_low_f16(__s0_805, __s1_805, (float16x8_t) {vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805), vgetq_lane_f16(__s2_805, __p3_805)}); \ - __ret_805; \ +#define vfmlslq_laneq_low_f16(__p0_897, __p1_897, __p2_897, __p3_897) __extension__ ({ \ + float32x4_t __s0_897 = __p0_897; \ + float16x8_t __s1_897 = __p1_897; \ + float16x8_t __s2_897 = __p2_897; \ + float32x4_t __ret_897; \ + __ret_897 = vfmlslq_low_f16(__s0_897, __s1_897, (float16x8_t) {vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897), vgetq_lane_f16(__s2_897, __p3_897)}); \ + __ret_897; \ }) #else -#define vfmlslq_laneq_low_f16(__p0_806, __p1_806, __p2_806, __p3_806) __extension__ ({ \ - float32x4_t __s0_806 = __p0_806; \ - float16x8_t __s1_806 = __p1_806; \ - float16x8_t __s2_806 = __p2_806; \ - float32x4_t __rev0_806; __rev0_806 = __builtin_shufflevector(__s0_806, __s0_806, 3, 2, 1, 0); \ - float16x8_t __rev1_806; __rev1_806 = __builtin_shufflevector(__s1_806, __s1_806, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16x8_t __rev2_806; __rev2_806 = __builtin_shufflevector(__s2_806, __s2_806, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x4_t __ret_806; \ - __ret_806 = __noswap_vfmlslq_low_f16(__rev0_806, __rev1_806, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806), __noswap_vgetq_lane_f16(__rev2_806, __p3_806)}); \ - __ret_806 = __builtin_shufflevector(__ret_806, __ret_806, 3, 2, 1, 0); \ - __ret_806; \ +#define vfmlslq_laneq_low_f16(__p0_898, __p1_898, __p2_898, __p3_898) __extension__ ({ \ + float32x4_t __s0_898 = __p0_898; \ + float16x8_t __s1_898 = __p1_898; \ + float16x8_t __s2_898 = __p2_898; \ + float32x4_t __rev0_898; __rev0_898 = __builtin_shufflevector(__s0_898, __s0_898, 3, 2, 1, 0); \ + float16x8_t __rev1_898; __rev1_898 = __builtin_shufflevector(__s1_898, __s1_898, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16x8_t __rev2_898; __rev2_898 = __builtin_shufflevector(__s2_898, __s2_898, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x4_t __ret_898; \ + __ret_898 = __noswap_vfmlslq_low_f16(__rev0_898, __rev1_898, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898), __noswap_vgetq_lane_f16(__rev2_898, __p3_898)}); \ + __ret_898 = __builtin_shufflevector(__ret_898, __ret_898, 3, 2, 1, 0); \ + __ret_898; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_low_f16(__p0_807, __p1_807, __p2_807, __p3_807) __extension__ ({ \ - float32x2_t __s0_807 = __p0_807; \ - float16x4_t __s1_807 = __p1_807; \ - float16x8_t __s2_807 = __p2_807; \ - float32x2_t __ret_807; \ - __ret_807 = vfmlsl_low_f16(__s0_807, __s1_807, (float16x4_t) {vgetq_lane_f16(__s2_807, __p3_807), vgetq_lane_f16(__s2_807, __p3_807), vgetq_lane_f16(__s2_807, __p3_807), vgetq_lane_f16(__s2_807, __p3_807)}); \ - __ret_807; \ +#define vfmlsl_laneq_low_f16(__p0_899, __p1_899, __p2_899, __p3_899) __extension__ ({ \ + float32x2_t __s0_899 = __p0_899; \ + float16x4_t __s1_899 = __p1_899; \ + float16x8_t __s2_899 = __p2_899; \ + float32x2_t __ret_899; \ + __ret_899 = vfmlsl_low_f16(__s0_899, __s1_899, (float16x4_t) {vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899), vgetq_lane_f16(__s2_899, __p3_899)}); \ + __ret_899; \ }) #else -#define vfmlsl_laneq_low_f16(__p0_808, __p1_808, __p2_808, __p3_808) __extension__ ({ \ - float32x2_t __s0_808 = __p0_808; \ - float16x4_t __s1_808 = __p1_808; \ - float16x8_t __s2_808 = __p2_808; \ - float32x2_t __rev0_808; __rev0_808 = __builtin_shufflevector(__s0_808, __s0_808, 1, 0); \ - float16x4_t __rev1_808; __rev1_808 = __builtin_shufflevector(__s1_808, __s1_808, 3, 2, 1, 0); \ - float16x8_t __rev2_808; __rev2_808 = __builtin_shufflevector(__s2_808, __s2_808, 7, 6, 5, 4, 3, 2, 1, 0); \ - float32x2_t __ret_808; \ - __ret_808 = __noswap_vfmlsl_low_f16(__rev0_808, __rev1_808, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_808, __p3_808), __noswap_vgetq_lane_f16(__rev2_808, __p3_808), __noswap_vgetq_lane_f16(__rev2_808, __p3_808), __noswap_vgetq_lane_f16(__rev2_808, __p3_808)}); \ - __ret_808 = __builtin_shufflevector(__ret_808, __ret_808, 1, 0); \ - __ret_808; \ +#define vfmlsl_laneq_low_f16(__p0_900, __p1_900, __p2_900, __p3_900) __extension__ ({ \ + float32x2_t __s0_900 = __p0_900; \ + float16x4_t __s1_900 = __p1_900; \ + float16x8_t __s2_900 = __p2_900; \ + float32x2_t __rev0_900; __rev0_900 = __builtin_shufflevector(__s0_900, __s0_900, 1, 0); \ + float16x4_t __rev1_900; __rev1_900 = __builtin_shufflevector(__s1_900, __s1_900, 3, 2, 1, 0); \ + float16x8_t __rev2_900; __rev2_900 = __builtin_shufflevector(__s2_900, __s2_900, 7, 6, 5, 4, 3, 2, 1, 0); \ + float32x2_t __ret_900; \ + __ret_900 = __noswap_vfmlsl_low_f16(__rev0_900, __rev1_900, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900), __noswap_vgetq_lane_f16(__rev2_900, __p3_900)}); \ + __ret_900 = __builtin_shufflevector(__ret_900, __ret_900, 1, 0); \ + __ret_900; \ }) #endif #endif #if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vmulh_lane_f16(__p0_809, __p1_809, __p2_809) __extension__ ({ \ - float16_t __s0_809 = __p0_809; \ - float16x4_t __s1_809 = __p1_809; \ - float16_t __ret_809; \ - __ret_809 = __s0_809 * vget_lane_f16(__s1_809, __p2_809); \ - __ret_809; \ +#define vmulh_lane_f16(__p0_901, __p1_901, __p2_901) __extension__ ({ \ + float16_t __s0_901 = __p0_901; \ + float16x4_t __s1_901 = __p1_901; \ + float16_t __ret_901; \ + __ret_901 = __s0_901 * vget_lane_f16(__s1_901, __p2_901); \ + __ret_901; \ }) #else -#define vmulh_lane_f16(__p0_810, __p1_810, __p2_810) __extension__ ({ \ - float16_t __s0_810 = __p0_810; \ - float16x4_t __s1_810 = __p1_810; \ - float16x4_t __rev1_810; __rev1_810 = __builtin_shufflevector(__s1_810, __s1_810, 3, 2, 1, 0); \ - float16_t __ret_810; \ - __ret_810 = __s0_810 * __noswap_vget_lane_f16(__rev1_810, __p2_810); \ - __ret_810; \ +#define vmulh_lane_f16(__p0_902, __p1_902, __p2_902) __extension__ ({ \ + float16_t __s0_902 = __p0_902; \ + float16x4_t __s1_902 = __p1_902; \ + float16x4_t __rev1_902; __rev1_902 = __builtin_shufflevector(__s1_902, __s1_902, 3, 2, 1, 0); \ + float16_t __ret_902; \ + __ret_902 = __s0_902 * __noswap_vget_lane_f16(__rev1_902, __p2_902); \ + __ret_902; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vmulh_laneq_f16(__p0_811, __p1_811, __p2_811) __extension__ ({ \ - float16_t __s0_811 = __p0_811; \ - float16x8_t __s1_811 = __p1_811; \ - float16_t __ret_811; \ - __ret_811 = __s0_811 * vgetq_lane_f16(__s1_811, __p2_811); \ - __ret_811; \ +#define vmulh_laneq_f16(__p0_903, __p1_903, __p2_903) __extension__ ({ \ + float16_t __s0_903 = __p0_903; \ + float16x8_t __s1_903 = __p1_903; \ + float16_t __ret_903; \ + __ret_903 = __s0_903 * vgetq_lane_f16(__s1_903, __p2_903); \ + __ret_903; \ }) #else -#define vmulh_laneq_f16(__p0_812, __p1_812, __p2_812) __extension__ ({ \ - float16_t __s0_812 = __p0_812; \ - float16x8_t __s1_812 = __p1_812; \ - float16x8_t __rev1_812; __rev1_812 = __builtin_shufflevector(__s1_812, __s1_812, 7, 6, 5, 4, 3, 2, 1, 0); \ - float16_t __ret_812; \ - __ret_812 = __s0_812 * __noswap_vgetq_lane_f16(__rev1_812, __p2_812); \ - __ret_812; \ +#define vmulh_laneq_f16(__p0_904, __p1_904, __p2_904) __extension__ ({ \ + float16_t __s0_904 = __p0_904; \ + float16x8_t __s1_904 = __p1_904; \ + float16x8_t __rev1_904; __rev1_904 = __builtin_shufflevector(__s1_904, __s1_904, 7, 6, 5, 4, 3, 2, 1, 0); \ + float16_t __ret_904; \ + __ret_904 = __s0_904 * __noswap_vgetq_lane_f16(__rev1_904, __p2_904); \ + __ret_904; \ }) #endif #endif #if defined(__ARM_FEATURE_MATMUL_INT8) #ifdef __LITTLE_ENDIAN__ -#define vsudotq_lane_s32(__p0_813, __p1_813, __p2_813, __p3_813) __extension__ ({ \ - int32x4_t __s0_813 = __p0_813; \ - int8x16_t __s1_813 = __p1_813; \ - uint8x8_t __s2_813 = __p2_813; \ - int32x4_t __ret_813; \ -uint8x8_t __reint_813 = __s2_813; \ - __ret_813 = vusdotq_s32(__s0_813, (uint8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_813, __p3_813)), __s1_813); \ - __ret_813; \ +#define vsudotq_lane_s32(__p0_905, __p1_905, __p2_905, __p3_905) __extension__ ({ \ + int32x4_t __s0_905 = __p0_905; \ + int8x16_t __s1_905 = __p1_905; \ + uint8x8_t __s2_905 = __p2_905; \ + int32x4_t __ret_905; \ +uint8x8_t __reint_905 = __s2_905; \ + __ret_905 = vusdotq_s32(__s0_905, (uint8x16_t)(splatq_lane_s32(*(int32x2_t *) &__reint_905, __p3_905)), __s1_905); \ + __ret_905; \ }) #else -#define vsudotq_lane_s32(__p0_814, __p1_814, __p2_814, __p3_814) __extension__ ({ \ - int32x4_t __s0_814 = __p0_814; \ - int8x16_t __s1_814 = __p1_814; \ - uint8x8_t __s2_814 = __p2_814; \ - int32x4_t __rev0_814; __rev0_814 = __builtin_shufflevector(__s0_814, __s0_814, 3, 2, 1, 0); \ - int8x16_t __rev1_814; __rev1_814 = __builtin_shufflevector(__s1_814, __s1_814, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_814; __rev2_814 = __builtin_shufflevector(__s2_814, __s2_814, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x4_t __ret_814; \ -uint8x8_t __reint_814 = __rev2_814; \ - __ret_814 = __noswap_vusdotq_s32(__rev0_814, (uint8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_814, __p3_814)), __rev1_814); \ - __ret_814 = __builtin_shufflevector(__ret_814, __ret_814, 3, 2, 1, 0); \ - __ret_814; \ +#define vsudotq_lane_s32(__p0_906, __p1_906, __p2_906, __p3_906) __extension__ ({ \ + int32x4_t __s0_906 = __p0_906; \ + int8x16_t __s1_906 = __p1_906; \ + uint8x8_t __s2_906 = __p2_906; \ + int32x4_t __rev0_906; __rev0_906 = __builtin_shufflevector(__s0_906, __s0_906, 3, 2, 1, 0); \ + int8x16_t __rev1_906; __rev1_906 = __builtin_shufflevector(__s1_906, __s1_906, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_906; __rev2_906 = __builtin_shufflevector(__s2_906, __s2_906, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x4_t __ret_906; \ +uint8x8_t __reint_906 = __rev2_906; \ + __ret_906 = __noswap_vusdotq_s32(__rev0_906, (uint8x16_t)(__noswap_splatq_lane_s32(*(int32x2_t *) &__reint_906, __p3_906)), __rev1_906); \ + __ret_906 = __builtin_shufflevector(__ret_906, __ret_906, 3, 2, 1, 0); \ + __ret_906; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vsudot_lane_s32(__p0_815, __p1_815, __p2_815, __p3_815) __extension__ ({ \ - int32x2_t __s0_815 = __p0_815; \ - int8x8_t __s1_815 = __p1_815; \ - uint8x8_t __s2_815 = __p2_815; \ - int32x2_t __ret_815; \ -uint8x8_t __reint_815 = __s2_815; \ - __ret_815 = vusdot_s32(__s0_815, (uint8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_815, __p3_815)), __s1_815); \ - __ret_815; \ +#define vsudot_lane_s32(__p0_907, __p1_907, __p2_907, __p3_907) __extension__ ({ \ + int32x2_t __s0_907 = __p0_907; \ + int8x8_t __s1_907 = __p1_907; \ + uint8x8_t __s2_907 = __p2_907; \ + int32x2_t __ret_907; \ +uint8x8_t __reint_907 = __s2_907; \ + __ret_907 = vusdot_s32(__s0_907, (uint8x8_t)(splat_lane_s32(*(int32x2_t *) &__reint_907, __p3_907)), __s1_907); \ + __ret_907; \ }) #else -#define vsudot_lane_s32(__p0_816, __p1_816, __p2_816, __p3_816) __extension__ ({ \ - int32x2_t __s0_816 = __p0_816; \ - int8x8_t __s1_816 = __p1_816; \ - uint8x8_t __s2_816 = __p2_816; \ - int32x2_t __rev0_816; __rev0_816 = __builtin_shufflevector(__s0_816, __s0_816, 1, 0); \ - int8x8_t __rev1_816; __rev1_816 = __builtin_shufflevector(__s1_816, __s1_816, 7, 6, 5, 4, 3, 2, 1, 0); \ - uint8x8_t __rev2_816; __rev2_816 = __builtin_shufflevector(__s2_816, __s2_816, 7, 6, 5, 4, 3, 2, 1, 0); \ - int32x2_t __ret_816; \ -uint8x8_t __reint_816 = __rev2_816; \ - __ret_816 = __noswap_vusdot_s32(__rev0_816, (uint8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_816, __p3_816)), __rev1_816); \ - __ret_816 = __builtin_shufflevector(__ret_816, __ret_816, 1, 0); \ - __ret_816; \ +#define vsudot_lane_s32(__p0_908, __p1_908, __p2_908, __p3_908) __extension__ ({ \ + int32x2_t __s0_908 = __p0_908; \ + int8x8_t __s1_908 = __p1_908; \ + uint8x8_t __s2_908 = __p2_908; \ + int32x2_t __rev0_908; __rev0_908 = __builtin_shufflevector(__s0_908, __s0_908, 1, 0); \ + int8x8_t __rev1_908; __rev1_908 = __builtin_shufflevector(__s1_908, __s1_908, 7, 6, 5, 4, 3, 2, 1, 0); \ + uint8x8_t __rev2_908; __rev2_908 = __builtin_shufflevector(__s2_908, __s2_908, 7, 6, 5, 4, 3, 2, 1, 0); \ + int32x2_t __ret_908; \ +uint8x8_t __reint_908 = __rev2_908; \ + __ret_908 = __noswap_vusdot_s32(__rev0_908, (uint8x8_t)(__noswap_splat_lane_s32(*(int32x2_t *) &__reint_908, __p3_908)), __rev1_908); \ + __ret_908 = __builtin_shufflevector(__ret_908, __ret_908, 1, 0); \ + __ret_908; \ }) #endif @@ -66049,86 +67719,86 @@ __ai int16_t vqrdmlahh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_lane_s32(__p0_817, __p1_817, __p2_817, __p3_817) __extension__ ({ \ - int32_t __s0_817 = __p0_817; \ - int32_t __s1_817 = __p1_817; \ - int32x2_t __s2_817 = __p2_817; \ - int32_t __ret_817; \ - __ret_817 = vqadds_s32(__s0_817, vqrdmulhs_s32(__s1_817, vget_lane_s32(__s2_817, __p3_817))); \ - __ret_817; \ +#define vqrdmlahs_lane_s32(__p0_909, __p1_909, __p2_909, __p3_909) __extension__ ({ \ + int32_t __s0_909 = __p0_909; \ + int32_t __s1_909 = __p1_909; \ + int32x2_t __s2_909 = __p2_909; \ + int32_t __ret_909; \ + __ret_909 = vqadds_s32(__s0_909, vqrdmulhs_s32(__s1_909, vget_lane_s32(__s2_909, __p3_909))); \ + __ret_909; \ }) #else -#define vqrdmlahs_lane_s32(__p0_818, __p1_818, __p2_818, __p3_818) __extension__ ({ \ - int32_t __s0_818 = __p0_818; \ - int32_t __s1_818 = __p1_818; \ - int32x2_t __s2_818 = __p2_818; \ - int32x2_t __rev2_818; __rev2_818 = __builtin_shufflevector(__s2_818, __s2_818, 1, 0); \ - int32_t __ret_818; \ - __ret_818 = vqadds_s32(__s0_818, vqrdmulhs_s32(__s1_818, __noswap_vget_lane_s32(__rev2_818, __p3_818))); \ - __ret_818; \ +#define vqrdmlahs_lane_s32(__p0_910, __p1_910, __p2_910, __p3_910) __extension__ ({ \ + int32_t __s0_910 = __p0_910; \ + int32_t __s1_910 = __p1_910; \ + int32x2_t __s2_910 = __p2_910; \ + int32x2_t __rev2_910; __rev2_910 = __builtin_shufflevector(__s2_910, __s2_910, 1, 0); \ + int32_t __ret_910; \ + __ret_910 = vqadds_s32(__s0_910, vqrdmulhs_s32(__s1_910, __noswap_vget_lane_s32(__rev2_910, __p3_910))); \ + __ret_910; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_lane_s16(__p0_819, __p1_819, __p2_819, __p3_819) __extension__ ({ \ - int16_t __s0_819 = __p0_819; \ - int16_t __s1_819 = __p1_819; \ - int16x4_t __s2_819 = __p2_819; \ - int16_t __ret_819; \ - __ret_819 = vqaddh_s16(__s0_819, vqrdmulhh_s16(__s1_819, vget_lane_s16(__s2_819, __p3_819))); \ - __ret_819; \ +#define vqrdmlahh_lane_s16(__p0_911, __p1_911, __p2_911, __p3_911) __extension__ ({ \ + int16_t __s0_911 = __p0_911; \ + int16_t __s1_911 = __p1_911; \ + int16x4_t __s2_911 = __p2_911; \ + int16_t __ret_911; \ + __ret_911 = vqaddh_s16(__s0_911, vqrdmulhh_s16(__s1_911, vget_lane_s16(__s2_911, __p3_911))); \ + __ret_911; \ }) #else -#define vqrdmlahh_lane_s16(__p0_820, __p1_820, __p2_820, __p3_820) __extension__ ({ \ - int16_t __s0_820 = __p0_820; \ - int16_t __s1_820 = __p1_820; \ - int16x4_t __s2_820 = __p2_820; \ - int16x4_t __rev2_820; __rev2_820 = __builtin_shufflevector(__s2_820, __s2_820, 3, 2, 1, 0); \ - int16_t __ret_820; \ - __ret_820 = vqaddh_s16(__s0_820, vqrdmulhh_s16(__s1_820, __noswap_vget_lane_s16(__rev2_820, __p3_820))); \ - __ret_820; \ +#define vqrdmlahh_lane_s16(__p0_912, __p1_912, __p2_912, __p3_912) __extension__ ({ \ + int16_t __s0_912 = __p0_912; \ + int16_t __s1_912 = __p1_912; \ + int16x4_t __s2_912 = __p2_912; \ + int16x4_t __rev2_912; __rev2_912 = __builtin_shufflevector(__s2_912, __s2_912, 3, 2, 1, 0); \ + int16_t __ret_912; \ + __ret_912 = vqaddh_s16(__s0_912, vqrdmulhh_s16(__s1_912, __noswap_vget_lane_s16(__rev2_912, __p3_912))); \ + __ret_912; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahs_laneq_s32(__p0_821, __p1_821, __p2_821, __p3_821) __extension__ ({ \ - int32_t __s0_821 = __p0_821; \ - int32_t __s1_821 = __p1_821; \ - int32x4_t __s2_821 = __p2_821; \ - int32_t __ret_821; \ - __ret_821 = vqadds_s32(__s0_821, vqrdmulhs_s32(__s1_821, vgetq_lane_s32(__s2_821, __p3_821))); \ - __ret_821; \ +#define vqrdmlahs_laneq_s32(__p0_913, __p1_913, __p2_913, __p3_913) __extension__ ({ \ + int32_t __s0_913 = __p0_913; \ + int32_t __s1_913 = __p1_913; \ + int32x4_t __s2_913 = __p2_913; \ + int32_t __ret_913; \ + __ret_913 = vqadds_s32(__s0_913, vqrdmulhs_s32(__s1_913, vgetq_lane_s32(__s2_913, __p3_913))); \ + __ret_913; \ }) #else -#define vqrdmlahs_laneq_s32(__p0_822, __p1_822, __p2_822, __p3_822) __extension__ ({ \ - int32_t __s0_822 = __p0_822; \ - int32_t __s1_822 = __p1_822; \ - int32x4_t __s2_822 = __p2_822; \ - int32x4_t __rev2_822; __rev2_822 = __builtin_shufflevector(__s2_822, __s2_822, 3, 2, 1, 0); \ - int32_t __ret_822; \ - __ret_822 = vqadds_s32(__s0_822, vqrdmulhs_s32(__s1_822, __noswap_vgetq_lane_s32(__rev2_822, __p3_822))); \ - __ret_822; \ +#define vqrdmlahs_laneq_s32(__p0_914, __p1_914, __p2_914, __p3_914) __extension__ ({ \ + int32_t __s0_914 = __p0_914; \ + int32_t __s1_914 = __p1_914; \ + int32x4_t __s2_914 = __p2_914; \ + int32x4_t __rev2_914; __rev2_914 = __builtin_shufflevector(__s2_914, __s2_914, 3, 2, 1, 0); \ + int32_t __ret_914; \ + __ret_914 = vqadds_s32(__s0_914, vqrdmulhs_s32(__s1_914, __noswap_vgetq_lane_s32(__rev2_914, __p3_914))); \ + __ret_914; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlahh_laneq_s16(__p0_823, __p1_823, __p2_823, __p3_823) __extension__ ({ \ - int16_t __s0_823 = __p0_823; \ - int16_t __s1_823 = __p1_823; \ - int16x8_t __s2_823 = __p2_823; \ - int16_t __ret_823; \ - __ret_823 = vqaddh_s16(__s0_823, vqrdmulhh_s16(__s1_823, vgetq_lane_s16(__s2_823, __p3_823))); \ - __ret_823; \ +#define vqrdmlahh_laneq_s16(__p0_915, __p1_915, __p2_915, __p3_915) __extension__ ({ \ + int16_t __s0_915 = __p0_915; \ + int16_t __s1_915 = __p1_915; \ + int16x8_t __s2_915 = __p2_915; \ + int16_t __ret_915; \ + __ret_915 = vqaddh_s16(__s0_915, vqrdmulhh_s16(__s1_915, vgetq_lane_s16(__s2_915, __p3_915))); \ + __ret_915; \ }) #else -#define vqrdmlahh_laneq_s16(__p0_824, __p1_824, __p2_824, __p3_824) __extension__ ({ \ - int16_t __s0_824 = __p0_824; \ - int16_t __s1_824 = __p1_824; \ - int16x8_t __s2_824 = __p2_824; \ - int16x8_t __rev2_824; __rev2_824 = __builtin_shufflevector(__s2_824, __s2_824, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_824; \ - __ret_824 = vqaddh_s16(__s0_824, vqrdmulhh_s16(__s1_824, __noswap_vgetq_lane_s16(__rev2_824, __p3_824))); \ - __ret_824; \ +#define vqrdmlahh_laneq_s16(__p0_916, __p1_916, __p2_916, __p3_916) __extension__ ({ \ + int16_t __s0_916 = __p0_916; \ + int16_t __s1_916 = __p1_916; \ + int16x8_t __s2_916 = __p2_916; \ + int16x8_t __rev2_916; __rev2_916 = __builtin_shufflevector(__s2_916, __s2_916, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_916; \ + __ret_916 = vqaddh_s16(__s0_916, vqrdmulhh_s16(__s1_916, __noswap_vgetq_lane_s16(__rev2_916, __p3_916))); \ + __ret_916; \ }) #endif @@ -66143,86 +67813,86 @@ __ai int16_t vqrdmlshh_s16(int16_t __p0, int16_t __p1, int16_t __p2) { return __ret; } #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_lane_s32(__p0_825, __p1_825, __p2_825, __p3_825) __extension__ ({ \ - int32_t __s0_825 = __p0_825; \ - int32_t __s1_825 = __p1_825; \ - int32x2_t __s2_825 = __p2_825; \ - int32_t __ret_825; \ - __ret_825 = vqsubs_s32(__s0_825, vqrdmulhs_s32(__s1_825, vget_lane_s32(__s2_825, __p3_825))); \ - __ret_825; \ +#define vqrdmlshs_lane_s32(__p0_917, __p1_917, __p2_917, __p3_917) __extension__ ({ \ + int32_t __s0_917 = __p0_917; \ + int32_t __s1_917 = __p1_917; \ + int32x2_t __s2_917 = __p2_917; \ + int32_t __ret_917; \ + __ret_917 = vqsubs_s32(__s0_917, vqrdmulhs_s32(__s1_917, vget_lane_s32(__s2_917, __p3_917))); \ + __ret_917; \ }) #else -#define vqrdmlshs_lane_s32(__p0_826, __p1_826, __p2_826, __p3_826) __extension__ ({ \ - int32_t __s0_826 = __p0_826; \ - int32_t __s1_826 = __p1_826; \ - int32x2_t __s2_826 = __p2_826; \ - int32x2_t __rev2_826; __rev2_826 = __builtin_shufflevector(__s2_826, __s2_826, 1, 0); \ - int32_t __ret_826; \ - __ret_826 = vqsubs_s32(__s0_826, vqrdmulhs_s32(__s1_826, __noswap_vget_lane_s32(__rev2_826, __p3_826))); \ - __ret_826; \ +#define vqrdmlshs_lane_s32(__p0_918, __p1_918, __p2_918, __p3_918) __extension__ ({ \ + int32_t __s0_918 = __p0_918; \ + int32_t __s1_918 = __p1_918; \ + int32x2_t __s2_918 = __p2_918; \ + int32x2_t __rev2_918; __rev2_918 = __builtin_shufflevector(__s2_918, __s2_918, 1, 0); \ + int32_t __ret_918; \ + __ret_918 = vqsubs_s32(__s0_918, vqrdmulhs_s32(__s1_918, __noswap_vget_lane_s32(__rev2_918, __p3_918))); \ + __ret_918; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_lane_s16(__p0_827, __p1_827, __p2_827, __p3_827) __extension__ ({ \ - int16_t __s0_827 = __p0_827; \ - int16_t __s1_827 = __p1_827; \ - int16x4_t __s2_827 = __p2_827; \ - int16_t __ret_827; \ - __ret_827 = vqsubh_s16(__s0_827, vqrdmulhh_s16(__s1_827, vget_lane_s16(__s2_827, __p3_827))); \ - __ret_827; \ +#define vqrdmlshh_lane_s16(__p0_919, __p1_919, __p2_919, __p3_919) __extension__ ({ \ + int16_t __s0_919 = __p0_919; \ + int16_t __s1_919 = __p1_919; \ + int16x4_t __s2_919 = __p2_919; \ + int16_t __ret_919; \ + __ret_919 = vqsubh_s16(__s0_919, vqrdmulhh_s16(__s1_919, vget_lane_s16(__s2_919, __p3_919))); \ + __ret_919; \ }) #else -#define vqrdmlshh_lane_s16(__p0_828, __p1_828, __p2_828, __p3_828) __extension__ ({ \ - int16_t __s0_828 = __p0_828; \ - int16_t __s1_828 = __p1_828; \ - int16x4_t __s2_828 = __p2_828; \ - int16x4_t __rev2_828; __rev2_828 = __builtin_shufflevector(__s2_828, __s2_828, 3, 2, 1, 0); \ - int16_t __ret_828; \ - __ret_828 = vqsubh_s16(__s0_828, vqrdmulhh_s16(__s1_828, __noswap_vget_lane_s16(__rev2_828, __p3_828))); \ - __ret_828; \ +#define vqrdmlshh_lane_s16(__p0_920, __p1_920, __p2_920, __p3_920) __extension__ ({ \ + int16_t __s0_920 = __p0_920; \ + int16_t __s1_920 = __p1_920; \ + int16x4_t __s2_920 = __p2_920; \ + int16x4_t __rev2_920; __rev2_920 = __builtin_shufflevector(__s2_920, __s2_920, 3, 2, 1, 0); \ + int16_t __ret_920; \ + __ret_920 = vqsubh_s16(__s0_920, vqrdmulhh_s16(__s1_920, __noswap_vget_lane_s16(__rev2_920, __p3_920))); \ + __ret_920; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshs_laneq_s32(__p0_829, __p1_829, __p2_829, __p3_829) __extension__ ({ \ - int32_t __s0_829 = __p0_829; \ - int32_t __s1_829 = __p1_829; \ - int32x4_t __s2_829 = __p2_829; \ - int32_t __ret_829; \ - __ret_829 = vqsubs_s32(__s0_829, vqrdmulhs_s32(__s1_829, vgetq_lane_s32(__s2_829, __p3_829))); \ - __ret_829; \ +#define vqrdmlshs_laneq_s32(__p0_921, __p1_921, __p2_921, __p3_921) __extension__ ({ \ + int32_t __s0_921 = __p0_921; \ + int32_t __s1_921 = __p1_921; \ + int32x4_t __s2_921 = __p2_921; \ + int32_t __ret_921; \ + __ret_921 = vqsubs_s32(__s0_921, vqrdmulhs_s32(__s1_921, vgetq_lane_s32(__s2_921, __p3_921))); \ + __ret_921; \ }) #else -#define vqrdmlshs_laneq_s32(__p0_830, __p1_830, __p2_830, __p3_830) __extension__ ({ \ - int32_t __s0_830 = __p0_830; \ - int32_t __s1_830 = __p1_830; \ - int32x4_t __s2_830 = __p2_830; \ - int32x4_t __rev2_830; __rev2_830 = __builtin_shufflevector(__s2_830, __s2_830, 3, 2, 1, 0); \ - int32_t __ret_830; \ - __ret_830 = vqsubs_s32(__s0_830, vqrdmulhs_s32(__s1_830, __noswap_vgetq_lane_s32(__rev2_830, __p3_830))); \ - __ret_830; \ +#define vqrdmlshs_laneq_s32(__p0_922, __p1_922, __p2_922, __p3_922) __extension__ ({ \ + int32_t __s0_922 = __p0_922; \ + int32_t __s1_922 = __p1_922; \ + int32x4_t __s2_922 = __p2_922; \ + int32x4_t __rev2_922; __rev2_922 = __builtin_shufflevector(__s2_922, __s2_922, 3, 2, 1, 0); \ + int32_t __ret_922; \ + __ret_922 = vqsubs_s32(__s0_922, vqrdmulhs_s32(__s1_922, __noswap_vgetq_lane_s32(__rev2_922, __p3_922))); \ + __ret_922; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vqrdmlshh_laneq_s16(__p0_831, __p1_831, __p2_831, __p3_831) __extension__ ({ \ - int16_t __s0_831 = __p0_831; \ - int16_t __s1_831 = __p1_831; \ - int16x8_t __s2_831 = __p2_831; \ - int16_t __ret_831; \ - __ret_831 = vqsubh_s16(__s0_831, vqrdmulhh_s16(__s1_831, vgetq_lane_s16(__s2_831, __p3_831))); \ - __ret_831; \ +#define vqrdmlshh_laneq_s16(__p0_923, __p1_923, __p2_923, __p3_923) __extension__ ({ \ + int16_t __s0_923 = __p0_923; \ + int16_t __s1_923 = __p1_923; \ + int16x8_t __s2_923 = __p2_923; \ + int16_t __ret_923; \ + __ret_923 = vqsubh_s16(__s0_923, vqrdmulhh_s16(__s1_923, vgetq_lane_s16(__s2_923, __p3_923))); \ + __ret_923; \ }) #else -#define vqrdmlshh_laneq_s16(__p0_832, __p1_832, __p2_832, __p3_832) __extension__ ({ \ - int16_t __s0_832 = __p0_832; \ - int16_t __s1_832 = __p1_832; \ - int16x8_t __s2_832 = __p2_832; \ - int16x8_t __rev2_832; __rev2_832 = __builtin_shufflevector(__s2_832, __s2_832, 7, 6, 5, 4, 3, 2, 1, 0); \ - int16_t __ret_832; \ - __ret_832 = vqsubh_s16(__s0_832, vqrdmulhh_s16(__s1_832, __noswap_vgetq_lane_s16(__rev2_832, __p3_832))); \ - __ret_832; \ +#define vqrdmlshh_laneq_s16(__p0_924, __p1_924, __p2_924, __p3_924) __extension__ ({ \ + int16_t __s0_924 = __p0_924; \ + int16_t __s1_924 = __p1_924; \ + int16x8_t __s2_924 = __p2_924; \ + int16x8_t __rev2_924; __rev2_924 = __builtin_shufflevector(__s2_924, __s2_924, 7, 6, 5, 4, 3, 2, 1, 0); \ + int16_t __ret_924; \ + __ret_924 = vqsubh_s16(__s0_924, vqrdmulhh_s16(__s1_924, __noswap_vgetq_lane_s16(__rev2_924, __p3_924))); \ + __ret_924; \ }) #endif @@ -66535,136 +68205,136 @@ __ai int32x4_t vaddw_high_s16(int32x4_t __p0, int16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_p64(__p0_833, __p1_833, __p2_833, __p3_833) __extension__ ({ \ - poly64x2_t __s0_833 = __p0_833; \ - poly64x1_t __s2_833 = __p2_833; \ - poly64x2_t __ret_833; \ - __ret_833 = vsetq_lane_p64(vget_lane_p64(__s2_833, __p3_833), __s0_833, __p1_833); \ - __ret_833; \ +#define vcopyq_lane_p64(__p0_925, __p1_925, __p2_925, __p3_925) __extension__ ({ \ + poly64x2_t __s0_925 = __p0_925; \ + poly64x1_t __s2_925 = __p2_925; \ + poly64x2_t __ret_925; \ + __ret_925 = vsetq_lane_p64(vget_lane_p64(__s2_925, __p3_925), __s0_925, __p1_925); \ + __ret_925; \ }) #else -#define vcopyq_lane_p64(__p0_834, __p1_834, __p2_834, __p3_834) __extension__ ({ \ - poly64x2_t __s0_834 = __p0_834; \ - poly64x1_t __s2_834 = __p2_834; \ - poly64x2_t __rev0_834; __rev0_834 = __builtin_shufflevector(__s0_834, __s0_834, 1, 0); \ - poly64x2_t __ret_834; \ - __ret_834 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_834, __p3_834), __rev0_834, __p1_834); \ - __ret_834 = __builtin_shufflevector(__ret_834, __ret_834, 1, 0); \ - __ret_834; \ +#define vcopyq_lane_p64(__p0_926, __p1_926, __p2_926, __p3_926) __extension__ ({ \ + poly64x2_t __s0_926 = __p0_926; \ + poly64x1_t __s2_926 = __p2_926; \ + poly64x2_t __rev0_926; __rev0_926 = __builtin_shufflevector(__s0_926, __s0_926, 1, 0); \ + poly64x2_t __ret_926; \ + __ret_926 = __noswap_vsetq_lane_p64(vget_lane_p64(__s2_926, __p3_926), __rev0_926, __p1_926); \ + __ret_926 = __builtin_shufflevector(__ret_926, __ret_926, 1, 0); \ + __ret_926; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_lane_f64(__p0_835, __p1_835, __p2_835, __p3_835) __extension__ ({ \ - float64x2_t __s0_835 = __p0_835; \ - float64x1_t __s2_835 = __p2_835; \ - float64x2_t __ret_835; \ - __ret_835 = vsetq_lane_f64(vget_lane_f64(__s2_835, __p3_835), __s0_835, __p1_835); \ - __ret_835; \ +#define vcopyq_lane_f64(__p0_927, __p1_927, __p2_927, __p3_927) __extension__ ({ \ + float64x2_t __s0_927 = __p0_927; \ + float64x1_t __s2_927 = __p2_927; \ + float64x2_t __ret_927; \ + __ret_927 = vsetq_lane_f64(vget_lane_f64(__s2_927, __p3_927), __s0_927, __p1_927); \ + __ret_927; \ }) #else -#define vcopyq_lane_f64(__p0_836, __p1_836, __p2_836, __p3_836) __extension__ ({ \ - float64x2_t __s0_836 = __p0_836; \ - float64x1_t __s2_836 = __p2_836; \ - float64x2_t __rev0_836; __rev0_836 = __builtin_shufflevector(__s0_836, __s0_836, 1, 0); \ - float64x2_t __ret_836; \ - __ret_836 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_836, __p3_836), __rev0_836, __p1_836); \ - __ret_836 = __builtin_shufflevector(__ret_836, __ret_836, 1, 0); \ - __ret_836; \ +#define vcopyq_lane_f64(__p0_928, __p1_928, __p2_928, __p3_928) __extension__ ({ \ + float64x2_t __s0_928 = __p0_928; \ + float64x1_t __s2_928 = __p2_928; \ + float64x2_t __rev0_928; __rev0_928 = __builtin_shufflevector(__s0_928, __s0_928, 1, 0); \ + float64x2_t __ret_928; \ + __ret_928 = __noswap_vsetq_lane_f64(vget_lane_f64(__s2_928, __p3_928), __rev0_928, __p1_928); \ + __ret_928 = __builtin_shufflevector(__ret_928, __ret_928, 1, 0); \ + __ret_928; \ }) #endif -#define vcopy_lane_p64(__p0_837, __p1_837, __p2_837, __p3_837) __extension__ ({ \ - poly64x1_t __s0_837 = __p0_837; \ - poly64x1_t __s2_837 = __p2_837; \ - poly64x1_t __ret_837; \ - __ret_837 = vset_lane_p64(vget_lane_p64(__s2_837, __p3_837), __s0_837, __p1_837); \ - __ret_837; \ +#define vcopy_lane_p64(__p0_929, __p1_929, __p2_929, __p3_929) __extension__ ({ \ + poly64x1_t __s0_929 = __p0_929; \ + poly64x1_t __s2_929 = __p2_929; \ + poly64x1_t __ret_929; \ + __ret_929 = vset_lane_p64(vget_lane_p64(__s2_929, __p3_929), __s0_929, __p1_929); \ + __ret_929; \ }) -#define vcopy_lane_f64(__p0_838, __p1_838, __p2_838, __p3_838) __extension__ ({ \ - float64x1_t __s0_838 = __p0_838; \ - float64x1_t __s2_838 = __p2_838; \ - float64x1_t __ret_838; \ - __ret_838 = vset_lane_f64(vget_lane_f64(__s2_838, __p3_838), __s0_838, __p1_838); \ - __ret_838; \ +#define vcopy_lane_f64(__p0_930, __p1_930, __p2_930, __p3_930) __extension__ ({ \ + float64x1_t __s0_930 = __p0_930; \ + float64x1_t __s2_930 = __p2_930; \ + float64x1_t __ret_930; \ + __ret_930 = vset_lane_f64(vget_lane_f64(__s2_930, __p3_930), __s0_930, __p1_930); \ + __ret_930; \ }) #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_p64(__p0_839, __p1_839, __p2_839, __p3_839) __extension__ ({ \ - poly64x2_t __s0_839 = __p0_839; \ - poly64x2_t __s2_839 = __p2_839; \ - poly64x2_t __ret_839; \ - __ret_839 = vsetq_lane_p64(vgetq_lane_p64(__s2_839, __p3_839), __s0_839, __p1_839); \ - __ret_839; \ +#define vcopyq_laneq_p64(__p0_931, __p1_931, __p2_931, __p3_931) __extension__ ({ \ + poly64x2_t __s0_931 = __p0_931; \ + poly64x2_t __s2_931 = __p2_931; \ + poly64x2_t __ret_931; \ + __ret_931 = vsetq_lane_p64(vgetq_lane_p64(__s2_931, __p3_931), __s0_931, __p1_931); \ + __ret_931; \ }) #else -#define vcopyq_laneq_p64(__p0_840, __p1_840, __p2_840, __p3_840) __extension__ ({ \ - poly64x2_t __s0_840 = __p0_840; \ - poly64x2_t __s2_840 = __p2_840; \ - poly64x2_t __rev0_840; __rev0_840 = __builtin_shufflevector(__s0_840, __s0_840, 1, 0); \ - poly64x2_t __rev2_840; __rev2_840 = __builtin_shufflevector(__s2_840, __s2_840, 1, 0); \ - poly64x2_t __ret_840; \ - __ret_840 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_840, __p3_840), __rev0_840, __p1_840); \ - __ret_840 = __builtin_shufflevector(__ret_840, __ret_840, 1, 0); \ - __ret_840; \ +#define vcopyq_laneq_p64(__p0_932, __p1_932, __p2_932, __p3_932) __extension__ ({ \ + poly64x2_t __s0_932 = __p0_932; \ + poly64x2_t __s2_932 = __p2_932; \ + poly64x2_t __rev0_932; __rev0_932 = __builtin_shufflevector(__s0_932, __s0_932, 1, 0); \ + poly64x2_t __rev2_932; __rev2_932 = __builtin_shufflevector(__s2_932, __s2_932, 1, 0); \ + poly64x2_t __ret_932; \ + __ret_932 = __noswap_vsetq_lane_p64(__noswap_vgetq_lane_p64(__rev2_932, __p3_932), __rev0_932, __p1_932); \ + __ret_932 = __builtin_shufflevector(__ret_932, __ret_932, 1, 0); \ + __ret_932; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopyq_laneq_f64(__p0_841, __p1_841, __p2_841, __p3_841) __extension__ ({ \ - float64x2_t __s0_841 = __p0_841; \ - float64x2_t __s2_841 = __p2_841; \ - float64x2_t __ret_841; \ - __ret_841 = vsetq_lane_f64(vgetq_lane_f64(__s2_841, __p3_841), __s0_841, __p1_841); \ - __ret_841; \ +#define vcopyq_laneq_f64(__p0_933, __p1_933, __p2_933, __p3_933) __extension__ ({ \ + float64x2_t __s0_933 = __p0_933; \ + float64x2_t __s2_933 = __p2_933; \ + float64x2_t __ret_933; \ + __ret_933 = vsetq_lane_f64(vgetq_lane_f64(__s2_933, __p3_933), __s0_933, __p1_933); \ + __ret_933; \ }) #else -#define vcopyq_laneq_f64(__p0_842, __p1_842, __p2_842, __p3_842) __extension__ ({ \ - float64x2_t __s0_842 = __p0_842; \ - float64x2_t __s2_842 = __p2_842; \ - float64x2_t __rev0_842; __rev0_842 = __builtin_shufflevector(__s0_842, __s0_842, 1, 0); \ - float64x2_t __rev2_842; __rev2_842 = __builtin_shufflevector(__s2_842, __s2_842, 1, 0); \ - float64x2_t __ret_842; \ - __ret_842 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_842, __p3_842), __rev0_842, __p1_842); \ - __ret_842 = __builtin_shufflevector(__ret_842, __ret_842, 1, 0); \ - __ret_842; \ +#define vcopyq_laneq_f64(__p0_934, __p1_934, __p2_934, __p3_934) __extension__ ({ \ + float64x2_t __s0_934 = __p0_934; \ + float64x2_t __s2_934 = __p2_934; \ + float64x2_t __rev0_934; __rev0_934 = __builtin_shufflevector(__s0_934, __s0_934, 1, 0); \ + float64x2_t __rev2_934; __rev2_934 = __builtin_shufflevector(__s2_934, __s2_934, 1, 0); \ + float64x2_t __ret_934; \ + __ret_934 = __noswap_vsetq_lane_f64(__noswap_vgetq_lane_f64(__rev2_934, __p3_934), __rev0_934, __p1_934); \ + __ret_934 = __builtin_shufflevector(__ret_934, __ret_934, 1, 0); \ + __ret_934; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_p64(__p0_843, __p1_843, __p2_843, __p3_843) __extension__ ({ \ - poly64x1_t __s0_843 = __p0_843; \ - poly64x2_t __s2_843 = __p2_843; \ - poly64x1_t __ret_843; \ - __ret_843 = vset_lane_p64(vgetq_lane_p64(__s2_843, __p3_843), __s0_843, __p1_843); \ - __ret_843; \ +#define vcopy_laneq_p64(__p0_935, __p1_935, __p2_935, __p3_935) __extension__ ({ \ + poly64x1_t __s0_935 = __p0_935; \ + poly64x2_t __s2_935 = __p2_935; \ + poly64x1_t __ret_935; \ + __ret_935 = vset_lane_p64(vgetq_lane_p64(__s2_935, __p3_935), __s0_935, __p1_935); \ + __ret_935; \ }) #else -#define vcopy_laneq_p64(__p0_844, __p1_844, __p2_844, __p3_844) __extension__ ({ \ - poly64x1_t __s0_844 = __p0_844; \ - poly64x2_t __s2_844 = __p2_844; \ - poly64x2_t __rev2_844; __rev2_844 = __builtin_shufflevector(__s2_844, __s2_844, 1, 0); \ - poly64x1_t __ret_844; \ - __ret_844 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_844, __p3_844), __s0_844, __p1_844); \ - __ret_844; \ +#define vcopy_laneq_p64(__p0_936, __p1_936, __p2_936, __p3_936) __extension__ ({ \ + poly64x1_t __s0_936 = __p0_936; \ + poly64x2_t __s2_936 = __p2_936; \ + poly64x2_t __rev2_936; __rev2_936 = __builtin_shufflevector(__s2_936, __s2_936, 1, 0); \ + poly64x1_t __ret_936; \ + __ret_936 = vset_lane_p64(__noswap_vgetq_lane_p64(__rev2_936, __p3_936), __s0_936, __p1_936); \ + __ret_936; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vcopy_laneq_f64(__p0_845, __p1_845, __p2_845, __p3_845) __extension__ ({ \ - float64x1_t __s0_845 = __p0_845; \ - float64x2_t __s2_845 = __p2_845; \ - float64x1_t __ret_845; \ - __ret_845 = vset_lane_f64(vgetq_lane_f64(__s2_845, __p3_845), __s0_845, __p1_845); \ - __ret_845; \ +#define vcopy_laneq_f64(__p0_937, __p1_937, __p2_937, __p3_937) __extension__ ({ \ + float64x1_t __s0_937 = __p0_937; \ + float64x2_t __s2_937 = __p2_937; \ + float64x1_t __ret_937; \ + __ret_937 = vset_lane_f64(vgetq_lane_f64(__s2_937, __p3_937), __s0_937, __p1_937); \ + __ret_937; \ }) #else -#define vcopy_laneq_f64(__p0_846, __p1_846, __p2_846, __p3_846) __extension__ ({ \ - float64x1_t __s0_846 = __p0_846; \ - float64x2_t __s2_846 = __p2_846; \ - float64x2_t __rev2_846; __rev2_846 = __builtin_shufflevector(__s2_846, __s2_846, 1, 0); \ - float64x1_t __ret_846; \ - __ret_846 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_846, __p3_846), __s0_846, __p1_846); \ - __ret_846; \ +#define vcopy_laneq_f64(__p0_938, __p1_938, __p2_938, __p3_938) __extension__ ({ \ + float64x1_t __s0_938 = __p0_938; \ + float64x2_t __s2_938 = __p2_938; \ + float64x2_t __rev2_938; __rev2_938 = __builtin_shufflevector(__s2_938, __s2_938, 1, 0); \ + float64x1_t __ret_938; \ + __ret_938 = vset_lane_f64(__noswap_vgetq_lane_f64(__rev2_938, __p3_938), __s0_938, __p1_938); \ + __ret_938; \ }) #endif @@ -67020,38 +68690,38 @@ __ai int32x4_t vmlsl_high_n_s16(int32x4_t __p0, int16x8_t __p1, int16_t __p2) { } #endif -#define vmulx_lane_f64(__p0_847, __p1_847, __p2_847) __extension__ ({ \ - float64x1_t __s0_847 = __p0_847; \ - float64x1_t __s1_847 = __p1_847; \ - float64x1_t __ret_847; \ - float64_t __x_847 = vget_lane_f64(__s0_847, 0); \ - float64_t __y_847 = vget_lane_f64(__s1_847, __p2_847); \ - float64_t __z_847 = vmulxd_f64(__x_847, __y_847); \ - __ret_847 = vset_lane_f64(__z_847, __s0_847, __p2_847); \ - __ret_847; \ +#define vmulx_lane_f64(__p0_939, __p1_939, __p2_939) __extension__ ({ \ + float64x1_t __s0_939 = __p0_939; \ + float64x1_t __s1_939 = __p1_939; \ + float64x1_t __ret_939; \ + float64_t __x_939 = vget_lane_f64(__s0_939, 0); \ + float64_t __y_939 = vget_lane_f64(__s1_939, __p2_939); \ + float64_t __z_939 = vmulxd_f64(__x_939, __y_939); \ + __ret_939 = vset_lane_f64(__z_939, __s0_939, __p2_939); \ + __ret_939; \ }) #ifdef __LITTLE_ENDIAN__ -#define vmulx_laneq_f64(__p0_848, __p1_848, __p2_848) __extension__ ({ \ - float64x1_t __s0_848 = __p0_848; \ - float64x2_t __s1_848 = __p1_848; \ - float64x1_t __ret_848; \ - float64_t __x_848 = vget_lane_f64(__s0_848, 0); \ - float64_t __y_848 = vgetq_lane_f64(__s1_848, __p2_848); \ - float64_t __z_848 = vmulxd_f64(__x_848, __y_848); \ - __ret_848 = vset_lane_f64(__z_848, __s0_848, 0); \ - __ret_848; \ +#define vmulx_laneq_f64(__p0_940, __p1_940, __p2_940) __extension__ ({ \ + float64x1_t __s0_940 = __p0_940; \ + float64x2_t __s1_940 = __p1_940; \ + float64x1_t __ret_940; \ + float64_t __x_940 = vget_lane_f64(__s0_940, 0); \ + float64_t __y_940 = vgetq_lane_f64(__s1_940, __p2_940); \ + float64_t __z_940 = vmulxd_f64(__x_940, __y_940); \ + __ret_940 = vset_lane_f64(__z_940, __s0_940, 0); \ + __ret_940; \ }) #else -#define vmulx_laneq_f64(__p0_849, __p1_849, __p2_849) __extension__ ({ \ - float64x1_t __s0_849 = __p0_849; \ - float64x2_t __s1_849 = __p1_849; \ - float64x2_t __rev1_849; __rev1_849 = __builtin_shufflevector(__s1_849, __s1_849, 1, 0); \ - float64x1_t __ret_849; \ - float64_t __x_849 = vget_lane_f64(__s0_849, 0); \ - float64_t __y_849 = __noswap_vgetq_lane_f64(__rev1_849, __p2_849); \ - float64_t __z_849 = vmulxd_f64(__x_849, __y_849); \ - __ret_849 = vset_lane_f64(__z_849, __s0_849, 0); \ - __ret_849; \ +#define vmulx_laneq_f64(__p0_941, __p1_941, __p2_941) __extension__ ({ \ + float64x1_t __s0_941 = __p0_941; \ + float64x2_t __s1_941 = __p1_941; \ + float64x2_t __rev1_941; __rev1_941 = __builtin_shufflevector(__s1_941, __s1_941, 1, 0); \ + float64x1_t __ret_941; \ + float64_t __x_941 = vget_lane_f64(__s0_941, 0); \ + float64_t __y_941 = __noswap_vgetq_lane_f64(__rev1_941, __p2_941); \ + float64_t __z_941 = vmulxd_f64(__x_941, __y_941); \ + __ret_941 = vset_lane_f64(__z_941, __s0_941, 0); \ + __ret_941; \ }) #endif diff --git a/lib/include/arm_sve.h b/lib/include/arm_sve.h index 1035d41811..8a03f9da58 100644 --- a/lib/include/arm_sve.h +++ b/lib/include/arm_sve.h @@ -94,7 +94,7 @@ typedef __clang_svbfloat16x2_t svbfloat16x2_t; typedef __clang_svbfloat16x3_t svbfloat16x3_t; typedef __clang_svbfloat16x4_t svbfloat16x4_t; #endif -typedef enum +enum svpattern { SV_POW2 = 0, SV_VL1 = 1, @@ -113,9 +113,9 @@ typedef enum SV_MUL4 = 29, SV_MUL3 = 30, SV_ALL = 31 -} sv_pattern; +}; -typedef enum +enum svprfop { SV_PLDL1KEEP = 0, SV_PLDL1STRM = 1, @@ -129,7 +129,7 @@ typedef enum SV_PSTL2STRM = 11, SV_PSTL3KEEP = 12, SV_PSTL3STRM = 13 -} sv_prfop; +}; /* Function attributes */ #define __aio static inline __attribute__((__always_inline__, __nodebug__, __overloadable__)) @@ -10013,69 +10013,69 @@ int16_t svorv(svbool_t, svint16_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svpfirst_b))) svbool_t svpfirst(svbool_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base))) -void svprfb_gather(svbool_t, svuint32_t, sv_prfop); +void svprfb_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base))) -void svprfb_gather(svbool_t, svuint64_t, sv_prfop); +void svprfb_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32base_offset))) -void svprfb_gather_offset(svbool_t, svuint32_t, int64_t, sv_prfop); +void svprfb_gather_offset(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64base_offset))) -void svprfb_gather_offset(svbool_t, svuint64_t, int64_t, sv_prfop); +void svprfb_gather_offset(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s32offset))) -void svprfb_gather_offset(svbool_t, void const *, svint32_t, sv_prfop); +void svprfb_gather_offset(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u32offset))) -void svprfb_gather_offset(svbool_t, void const *, svuint32_t, sv_prfop); +void svprfb_gather_offset(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_s64offset))) -void svprfb_gather_offset(svbool_t, void const *, svint64_t, sv_prfop); +void svprfb_gather_offset(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfb_gather_u64offset))) -void svprfb_gather_offset(svbool_t, void const *, svuint64_t, sv_prfop); +void svprfb_gather_offset(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base))) -void svprfd_gather(svbool_t, svuint32_t, sv_prfop); +void svprfd_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base))) -void svprfd_gather(svbool_t, svuint64_t, sv_prfop); +void svprfd_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32base_index))) -void svprfd_gather_index(svbool_t, svuint32_t, int64_t, sv_prfop); +void svprfd_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64base_index))) -void svprfd_gather_index(svbool_t, svuint64_t, int64_t, sv_prfop); +void svprfd_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s32index))) -void svprfd_gather_index(svbool_t, void const *, svint32_t, sv_prfop); +void svprfd_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u32index))) -void svprfd_gather_index(svbool_t, void const *, svuint32_t, sv_prfop); +void svprfd_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_s64index))) -void svprfd_gather_index(svbool_t, void const *, svint64_t, sv_prfop); +void svprfd_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfd_gather_u64index))) -void svprfd_gather_index(svbool_t, void const *, svuint64_t, sv_prfop); +void svprfd_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base))) -void svprfh_gather(svbool_t, svuint32_t, sv_prfop); +void svprfh_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base))) -void svprfh_gather(svbool_t, svuint64_t, sv_prfop); +void svprfh_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32base_index))) -void svprfh_gather_index(svbool_t, svuint32_t, int64_t, sv_prfop); +void svprfh_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64base_index))) -void svprfh_gather_index(svbool_t, svuint64_t, int64_t, sv_prfop); +void svprfh_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s32index))) -void svprfh_gather_index(svbool_t, void const *, svint32_t, sv_prfop); +void svprfh_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u32index))) -void svprfh_gather_index(svbool_t, void const *, svuint32_t, sv_prfop); +void svprfh_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_s64index))) -void svprfh_gather_index(svbool_t, void const *, svint64_t, sv_prfop); +void svprfh_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfh_gather_u64index))) -void svprfh_gather_index(svbool_t, void const *, svuint64_t, sv_prfop); +void svprfh_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base))) -void svprfw_gather(svbool_t, svuint32_t, sv_prfop); +void svprfw_gather(svbool_t, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base))) -void svprfw_gather(svbool_t, svuint64_t, sv_prfop); +void svprfw_gather(svbool_t, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32base_index))) -void svprfw_gather_index(svbool_t, svuint32_t, int64_t, sv_prfop); +void svprfw_gather_index(svbool_t, svuint32_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64base_index))) -void svprfw_gather_index(svbool_t, svuint64_t, int64_t, sv_prfop); +void svprfw_gather_index(svbool_t, svuint64_t, int64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s32index))) -void svprfw_gather_index(svbool_t, void const *, svint32_t, sv_prfop); +void svprfw_gather_index(svbool_t, void const *, svint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u32index))) -void svprfw_gather_index(svbool_t, void const *, svuint32_t, sv_prfop); +void svprfw_gather_index(svbool_t, void const *, svuint32_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_s64index))) -void svprfw_gather_index(svbool_t, void const *, svint64_t, sv_prfop); +void svprfw_gather_index(svbool_t, void const *, svint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svprfw_gather_u64index))) -void svprfw_gather_index(svbool_t, void const *, svuint64_t, sv_prfop); +void svprfw_gather_index(svbool_t, void const *, svuint64_t, enum svprfop); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s8))) svint8_t svqadd(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqadd_n_s32))) @@ -10117,13 +10117,13 @@ uint32_t svqdecb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_n_u64))) uint64_t svqdecb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s32))) -int32_t svqdecb_pat(int32_t, sv_pattern, uint64_t); +int32_t svqdecb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_s64))) -int64_t svqdecb_pat(int64_t, sv_pattern, uint64_t); +int64_t svqdecb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u32))) -uint32_t svqdecb_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqdecb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecb_pat_n_u64))) -uint64_t svqdecb_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqdecb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s32))) int32_t svqdecd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_n_s64))) @@ -10137,17 +10137,17 @@ svint64_t svqdecd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_u64))) svuint64_t svqdecd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s32))) -int32_t svqdecd_pat(int32_t, sv_pattern, uint64_t); +int32_t svqdecd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_s64))) -int64_t svqdecd_pat(int64_t, sv_pattern, uint64_t); +int64_t svqdecd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u32))) -uint32_t svqdecd_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqdecd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_n_u64))) -uint64_t svqdecd_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqdecd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_s64))) -svint64_t svqdecd_pat(svint64_t, sv_pattern, uint64_t); +svint64_t svqdecd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecd_pat_u64))) -svuint64_t svqdecd_pat(svuint64_t, sv_pattern, uint64_t); +svuint64_t svqdecd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s32))) int32_t svqdech(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_n_s64))) @@ -10161,17 +10161,17 @@ svint16_t svqdech(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_u16))) svuint16_t svqdech(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s32))) -int32_t svqdech_pat(int32_t, sv_pattern, uint64_t); +int32_t svqdech_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_s64))) -int64_t svqdech_pat(int64_t, sv_pattern, uint64_t); +int64_t svqdech_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u32))) -uint32_t svqdech_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqdech_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_n_u64))) -uint64_t svqdech_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqdech_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_s16))) -svint16_t svqdech_pat(svint16_t, sv_pattern, uint64_t); +svint16_t svqdech_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdech_pat_u16))) -svuint16_t svqdech_pat(svuint16_t, sv_pattern, uint64_t); +svuint16_t svqdech_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b8))) int32_t svqdecp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecp_n_s32_b32))) @@ -10229,17 +10229,17 @@ svint32_t svqdecw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_u32))) svuint32_t svqdecw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s32))) -int32_t svqdecw_pat(int32_t, sv_pattern, uint64_t); +int32_t svqdecw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_s64))) -int64_t svqdecw_pat(int64_t, sv_pattern, uint64_t); +int64_t svqdecw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u32))) -uint32_t svqdecw_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqdecw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_n_u64))) -uint64_t svqdecw_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqdecw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_s32))) -svint32_t svqdecw_pat(svint32_t, sv_pattern, uint64_t); +svint32_t svqdecw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqdecw_pat_u32))) -svuint32_t svqdecw_pat(svuint32_t, sv_pattern, uint64_t); +svuint32_t svqdecw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s32))) int32_t svqincb(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_s64))) @@ -10249,13 +10249,13 @@ uint32_t svqincb(uint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_n_u64))) uint64_t svqincb(uint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s32))) -int32_t svqincb_pat(int32_t, sv_pattern, uint64_t); +int32_t svqincb_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_s64))) -int64_t svqincb_pat(int64_t, sv_pattern, uint64_t); +int64_t svqincb_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u32))) -uint32_t svqincb_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqincb_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincb_pat_n_u64))) -uint64_t svqincb_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqincb_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s32))) int32_t svqincd(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_n_s64))) @@ -10269,17 +10269,17 @@ svint64_t svqincd(svint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_u64))) svuint64_t svqincd(svuint64_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s32))) -int32_t svqincd_pat(int32_t, sv_pattern, uint64_t); +int32_t svqincd_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_s64))) -int64_t svqincd_pat(int64_t, sv_pattern, uint64_t); +int64_t svqincd_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u32))) -uint32_t svqincd_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqincd_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_n_u64))) -uint64_t svqincd_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqincd_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_s64))) -svint64_t svqincd_pat(svint64_t, sv_pattern, uint64_t); +svint64_t svqincd_pat(svint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincd_pat_u64))) -svuint64_t svqincd_pat(svuint64_t, sv_pattern, uint64_t); +svuint64_t svqincd_pat(svuint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s32))) int32_t svqinch(int32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_n_s64))) @@ -10293,17 +10293,17 @@ svint16_t svqinch(svint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_u16))) svuint16_t svqinch(svuint16_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s32))) -int32_t svqinch_pat(int32_t, sv_pattern, uint64_t); +int32_t svqinch_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_s64))) -int64_t svqinch_pat(int64_t, sv_pattern, uint64_t); +int64_t svqinch_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u32))) -uint32_t svqinch_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqinch_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_n_u64))) -uint64_t svqinch_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqinch_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_s16))) -svint16_t svqinch_pat(svint16_t, sv_pattern, uint64_t); +svint16_t svqinch_pat(svint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqinch_pat_u16))) -svuint16_t svqinch_pat(svuint16_t, sv_pattern, uint64_t); +svuint16_t svqinch_pat(svuint16_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b8))) int32_t svqincp_b8(int32_t, svbool_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincp_n_s32_b32))) @@ -10361,17 +10361,17 @@ svint32_t svqincw(svint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_u32))) svuint32_t svqincw(svuint32_t, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s32))) -int32_t svqincw_pat(int32_t, sv_pattern, uint64_t); +int32_t svqincw_pat(int32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_s64))) -int64_t svqincw_pat(int64_t, sv_pattern, uint64_t); +int64_t svqincw_pat(int64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u32))) -uint32_t svqincw_pat(uint32_t, sv_pattern, uint64_t); +uint32_t svqincw_pat(uint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_n_u64))) -uint64_t svqincw_pat(uint64_t, sv_pattern, uint64_t); +uint64_t svqincw_pat(uint64_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_s32))) -svint32_t svqincw_pat(svint32_t, sv_pattern, uint64_t); +svint32_t svqincw_pat(svint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqincw_pat_u32))) -svuint32_t svqincw_pat(svuint32_t, sv_pattern, uint64_t); +svuint32_t svqincw_pat(svuint32_t, enum svpattern, uint64_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s8))) svint8_t svqsub(svint8_t, int8_t); __aio __attribute__((__clang_arm_builtin_alias(__builtin_sve_svqsub_n_s32))) diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index fa22ef3fdd..f226382cbb 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -9297,303 +9297,232 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) /* Vector-reduction arithmetic accepts vectors as inputs and produces scalars as * outputs. This class of vector operation forms the basis of many scientific - * computations. In vector-reduction arithmetic, the evaluation off is + * computations. In vector-reduction arithmetic, the evaluation order is * independent of the order of the input elements of V. + * For floating point types, we always assume the elements are reassociable even + * if -fast-math is off. + * Used bisection method. At each step, we partition the vector with previous * step in half, and the operation is performed on its two halves. * This takes log2(n) steps where n is the number of elements in the vector. */ -#define _mm512_mask_reduce_operator(op) \ - __v4du __t1 = (__v4du)_mm512_extracti64x4_epi64(__W, 0); \ - __v4du __t2 = (__v4du)_mm512_extracti64x4_epi64(__W, 1); \ - __m256i __t3 = (__m256i)(__t1 op __t2); \ - __v2du __t4 = (__v2du)_mm256_extracti128_si256(__t3, 0); \ - __v2du __t5 = (__v2du)_mm256_extracti128_si256(__t3, 1); \ - __v2du __t6 = __t4 op __t5; \ - __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ - __v2du __t8 = __t6 op __t7; \ - return __t8[0] - static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_add_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_mul_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { - _mm512_mask_reduce_operator(&); + return __builtin_ia32_reduce_and_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { - _mm512_mask_reduce_operator(|); + return __builtin_ia32_reduce_or_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_add_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_mul_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W); - _mm512_mask_reduce_operator(&); + return __builtin_ia32_reduce_and_q512(__W); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); - _mm512_mask_reduce_operator(|); + return __builtin_ia32_reduce_or_q512(__W); } -#undef _mm512_mask_reduce_operator - -#define _mm512_mask_reduce_operator(op) \ - __m256d __t1 = _mm512_extractf64x4_pd(__W, 0); \ - __m256d __t2 = _mm512_extractf64x4_pd(__W, 1); \ - __m256d __t3 = __t1 op __t2; \ - __m128d __t4 = _mm256_extractf128_pd(__t3, 0); \ - __m128d __t5 = _mm256_extractf128_pd(__t3, 1); \ - __m128d __t6 = __t4 op __t5; \ - __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ - __m128d __t8 = __t6 op __t7; \ - return __t8[0] +// -0.0 is used to ignore the start value since it is the neutral value of +// floating point addition. For more information, please refer to +// https://llvm.org/docs/LangRef.html#llvm-vector-reduce-fadd-intrinsic static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { __W = _mm512_maskz_mov_pd(__M, __W); - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_fadd_pd512(-0.0, __W); } static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_fmul_pd512(1.0, __W); } -#undef _mm512_mask_reduce_operator - -#define _mm512_mask_reduce_operator(op) \ - __v8su __t1 = (__v8su)_mm512_extracti64x4_epi64(__W, 0); \ - __v8su __t2 = (__v8su)_mm512_extracti64x4_epi64(__W, 1); \ - __m256i __t3 = (__m256i)(__t1 op __t2); \ - __v4su __t4 = (__v4su)_mm256_extracti128_si256(__t3, 0); \ - __v4su __t5 = (__v4su)_mm256_extracti128_si256(__t3, 1); \ - __v4su __t6 = __t4 op __t5; \ - __v4su __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ - __v4su __t8 = __t6 op __t7; \ - __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ - __v4su __t10 = __t8 op __t9; \ - return __t10[0] static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_add_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W) { - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_mul_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W) { - _mm512_mask_reduce_operator(&); + return __builtin_ia32_reduce_and_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W) { - _mm512_mask_reduce_operator(|); + return __builtin_ia32_reduce_or_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_add_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_mul_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W); - _mm512_mask_reduce_operator(&); + return __builtin_ia32_reduce_and_d512((__v16si)__W); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); - _mm512_mask_reduce_operator(|); + return __builtin_ia32_reduce_or_d512((__v16si)__W); } -#undef _mm512_mask_reduce_operator - -#define _mm512_mask_reduce_operator(op) \ - __m256 __t1 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 0); \ - __m256 __t2 = (__m256)_mm512_extractf64x4_pd((__m512d)__W, 1); \ - __m256 __t3 = __t1 op __t2; \ - __m128 __t4 = _mm256_extractf128_ps(__t3, 0); \ - __m128 __t5 = _mm256_extractf128_ps(__t3, 1); \ - __m128 __t6 = __t4 op __t5; \ - __m128 __t7 = __builtin_shufflevector(__t6, __t6, 2, 3, 0, 1); \ - __m128 __t8 = __t6 op __t7; \ - __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ - __m128 __t10 = __t8 op __t9; \ - return __t10[0] static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W) { - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { __W = _mm512_maskz_mov_ps(__M, __W); - _mm512_mask_reduce_operator(+); + return __builtin_ia32_reduce_fadd_ps512(-0.0f, __W); } static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); - _mm512_mask_reduce_operator(*); + return __builtin_ia32_reduce_fmul_ps512(1.0f, __W); } -#undef _mm512_mask_reduce_operator - -#define _mm512_mask_reduce_operator(op) \ - __m512i __t1 = (__m512i)__builtin_shufflevector((__v8di)__V, (__v8di)__V, 4, 5, 6, 7, 0, 1, 2, 3); \ - __m512i __t2 = _mm512_##op(__V, __t1); \ - __m512i __t3 = (__m512i)__builtin_shufflevector((__v8di)__t2, (__v8di)__t2, 2, 3, 0, 1, 6, 7, 4, 5); \ - __m512i __t4 = _mm512_##op(__t2, __t3); \ - __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ - __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ - return __t6[0] static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { - _mm512_mask_reduce_operator(max_epi64); + return __builtin_ia32_reduce_smax_q512(__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V) { - _mm512_mask_reduce_operator(max_epu64); + return __builtin_ia32_reduce_umax_q512(__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V) { - _mm512_mask_reduce_operator(min_epi64); + return __builtin_ia32_reduce_smin_q512(__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V) { - _mm512_mask_reduce_operator(min_epu64); + return __builtin_ia32_reduce_umin_q512(__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); - _mm512_mask_reduce_operator(max_epi64); + return __builtin_ia32_reduce_smax_q512(__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_maskz_mov_epi64(__M, __V); - _mm512_mask_reduce_operator(max_epu64); + return __builtin_ia32_reduce_umax_q512(__V); } static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); - _mm512_mask_reduce_operator(min_epi64); + return __builtin_ia32_reduce_smin_q512(__V); } static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V); - _mm512_mask_reduce_operator(min_epu64); + return __builtin_ia32_reduce_umin_q512(__V); } -#undef _mm512_mask_reduce_operator - -#define _mm512_mask_reduce_operator(op) \ - __m256i __t1 = _mm512_extracti64x4_epi64(__V, 0); \ - __m256i __t2 = _mm512_extracti64x4_epi64(__V, 1); \ - __m256i __t3 = _mm256_##op(__t1, __t2); \ - __m128i __t4 = _mm256_extracti128_si256(__t3, 0); \ - __m128i __t5 = _mm256_extracti128_si256(__t3, 1); \ - __m128i __t6 = _mm_##op(__t4, __t5); \ - __m128i __t7 = (__m128i)__builtin_shufflevector((__v4si)__t6, (__v4si)__t6, 2, 3, 0, 1); \ - __m128i __t8 = _mm_##op(__t6, __t7); \ - __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ - __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ - return __t10[0] - static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { - _mm512_mask_reduce_operator(max_epi32); + return __builtin_ia32_reduce_smax_d512((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V) { - _mm512_mask_reduce_operator(max_epu32); + return __builtin_ia32_reduce_umax_d512((__v16si)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V) { - _mm512_mask_reduce_operator(min_epi32); + return __builtin_ia32_reduce_smin_d512((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V) { - _mm512_mask_reduce_operator(min_epu32); + return __builtin_ia32_reduce_umin_d512((__v16si)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); - _mm512_mask_reduce_operator(max_epi32); + return __builtin_ia32_reduce_smax_d512((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_maskz_mov_epi32(__M, __V); - _mm512_mask_reduce_operator(max_epu32); + return __builtin_ia32_reduce_umax_d512((__v16si)__V); } static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); - _mm512_mask_reduce_operator(min_epi32); + return __builtin_ia32_reduce_smin_d512((__v16si)__V); } static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V); - _mm512_mask_reduce_operator(min_epu32); + return __builtin_ia32_reduce_umin_d512((__v16si)__V); } -#undef _mm512_mask_reduce_operator #define _mm512_mask_reduce_operator(op) \ __m256d __t1 = _mm512_extractf64x4_pd(__V, 0); \ diff --git a/lib/include/avx512vlvnniintrin.h b/lib/include/avx512vlvnniintrin.h index b7c8fa08c6..71ac1b4370 100644 --- a/lib/include/avx512vlvnniintrin.h +++ b/lib/include/avx512vlvnniintrin.h @@ -18,13 +18,157 @@ #define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with +/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) +/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpbusd_epi32(S, A, B) \ + (__m256i)__builtin_ia32_vpdpbusd256((__v8si)(S), (__v8si)(A), (__v8si)(B)) -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with +/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpbusds_epi32(S, A, B) \ + (__m256i)__builtin_ia32_vpdpbusds256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with +/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) +/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) +/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpwssd_epi32(S, A, B) \ + (__m256i)__builtin_ia32_vpdpwssd256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with +/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) +/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) +/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +#define _mm256_dpwssds_epi32(S, A, B) \ + (__m256i)__builtin_ia32_vpdpwssds256((__v8si)(S), (__v8si)(A), (__v8si)(B)) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with +/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) +/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpbusd_epi32(S, A, B) \ + (__m128i)__builtin_ia32_vpdpbusd128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a A with +/// corresponding signed 8-bit integers in \a B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(A.byte[4*j]) * SignExtend16(B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(A.byte[4*j+1]) * SignExtend16(B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(A.byte[4*j+2]) * SignExtend16(B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(A.byte[4*j+3]) * SignExtend16(B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpbusds_epi32(S, A, B) \ + (__m128i)__builtin_ia32_vpdpbusds128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with +/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) +/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) +/// DST.dword[j] := S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpwssd_epi32(S, A, B) \ + (__m128i)__builtin_ia32_vpdpwssd128((__v4si)(S), (__v4si)(A), (__v4si)(B)) + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a A with +/// corresponding 16-bit integers in \a B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(A.word[2*j]) * SignExtend32(B.word[2*j]) +/// tmp2.dword := SignExtend32(A.word[2*j+1]) * SignExtend32(B.word[2*j+1]) +/// DST.dword[j] := Saturate32(S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +#define _mm_dpwssds_epi32(S, A, B) \ + (__m128i)__builtin_ia32_vpdpwssds128((__v4si)(S), (__v4si)(A), (__v4si)(B)) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) @@ -42,13 +186,6 @@ _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -65,13 +202,6 @@ _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -88,13 +218,6 @@ _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) -{ - return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, - (__v8si)__B); -} - static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { @@ -111,13 +234,6 @@ _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -134,13 +250,6 @@ _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -157,13 +266,6 @@ _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { @@ -180,13 +282,6 @@ _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) -{ - return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, - (__v4si)__B); -} - static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index 84421bf1b9..382b621575 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -2245,7 +2245,7 @@ _mm256_cvttps_epi32(__m256 __a) /// Returns the first element of the input vector of [4 x double]. /// -/// \headerfile +/// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. @@ -2261,7 +2261,7 @@ _mm256_cvtsd_f64(__m256d __a) /// Returns the first element of the input vector of [8 x i32]. /// -/// \headerfile +/// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. @@ -2278,7 +2278,7 @@ _mm256_cvtsi256_si32(__m256i __a) /// Returns the first element of the input vector of [8 x float]. /// -/// \headerfile +/// \headerfile /// /// This intrinsic is a utility function and does not correspond to a specific /// instruction. diff --git a/lib/include/avxvnniintrin.h b/lib/include/avxvnniintrin.h new file mode 100644 index 0000000000..ad45cb7962 --- /dev/null +++ b/lib/include/avxvnniintrin.h @@ -0,0 +1,225 @@ +/*===--------------- avxvnniintrin.h - VNNI intrinsics --------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVXVNNIINTRIN_H +#define __AVXVNNIINTRIN_H + +/* Below intrinsics defined in avx512vlvnniintrin.h can be used for AVXVNNI */ +/// \fn __m256i _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) +/// \fn __m256i _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) +/// \fn __m256i _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) +/// \fn __m256i _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) +/// \fn __m128i _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) +/// \fn __m128i _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) +/// \fn __m128i _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) +/// \fn __m128i _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) + +/* Intrinsics with _avx_ prefix are for compatibility with msvc. */ +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avxvnni"), __min_vector_width__(128))) + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpbusds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssd_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 7 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:256] := 0 +/// \endoperation +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_dpwssds_avx_epi32(__m256i __S, __m256i __A, __m256i __B) +{ + return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 4 adjacent pairs of unsigned 8-bit integers in \a __A with +/// corresponding signed 8-bit integers in \a __B, producing 4 intermediate signed +/// 16-bit results. Sum these 4 results with the corresponding 32-bit integer +/// in \a __S using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPBUSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.word := Signed(ZeroExtend16(__A.byte[4*j]) * SignExtend16(__B.byte[4*j])) +/// tmp2.word := Signed(ZeroExtend16(__A.byte[4*j+1]) * SignExtend16(__B.byte[4*j+1])) +/// tmp3.word := Signed(ZeroExtend16(__A.byte[4*j+2]) * SignExtend16(__B.byte[4*j+2])) +/// tmp4.word := Signed(ZeroExtend16(__A.byte[4*j+3]) * SignExtend16(__B.byte[4*j+3])) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2 + tmp3 + tmp4) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpbusds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S, +/// and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSD instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := __S.dword[j] + tmp1 + tmp2 +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssd_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +/// Multiply groups of 2 adjacent pairs of signed 16-bit integers in \a __A with +/// corresponding 16-bit integers in \a __B, producing 2 intermediate signed 32-bit +/// results. Sum these 2 results with the corresponding 32-bit integer in \a __S +/// using signed saturation, and store the packed 32-bit results in DST. +/// +/// This intrinsic corresponds to the VPDPWSSDS instructions. +/// +/// \operation +/// FOR j := 0 to 3 +/// tmp1.dword := SignExtend32(__A.word[2*j]) * SignExtend32(__B.word[2*j]) +/// tmp2.dword := SignExtend32(__A.word[2*j+1]) * SignExtend32(__B.word[2*j+1]) +/// DST.dword[j] := Saturate32(__S.dword[j] + tmp1 + tmp2) +/// ENDFOR +/// DST[MAX:128] := 0 +/// \endoperation +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_dpwssds_avx_epi32(__m128i __S, __m128i __A, __m128i __B) +{ + return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif // __AVXVNNIINTRIN_H diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index 2a88c042d0..34f0e76807 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -7,6 +7,9 @@ *===-----------------------------------------------------------------------=== */ +#ifndef __CPUID_H +#define __CPUID_H + #if !(__x86_64__ || __i386__) #error this header is for x86 only #endif @@ -186,6 +189,7 @@ /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 #define bit_AVX5124FMAPS 0x00000008 +#define bit_UINTR 0x00000020 #define bit_SERIALIZE 0x00004000 #define bit_TSXLDTRK 0x00010000 #define bit_PCONFIG 0x00040000 @@ -195,7 +199,9 @@ #define bit_AMXINT8 0x02000000 /* Features in %eax for leaf 7 sub-leaf 1 */ +#define bit_AVXVNNI 0x00000008 #define bit_AVX512BF16 0x00000020 +#define bit_HRESET 0x00400000 /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 @@ -309,3 +315,5 @@ static __inline int __get_cpuid_count (unsigned int __leaf, __cpuid_count(__leaf, __subleaf, *__eax, *__ebx, *__ecx, *__edx); return 1; } + +#endif /* __CPUID_H */ diff --git a/lib/include/cuda_wrappers/algorithm b/lib/include/cuda_wrappers/algorithm index 01af18360d..f14a0b00bb 100644 --- a/lib/include/cuda_wrappers/algorithm +++ b/lib/include/cuda_wrappers/algorithm @@ -1,4 +1,4 @@ -/*===---- complex - CUDA wrapper for ----------------------------=== +/*===---- algorithm - CUDA wrapper for -------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal diff --git a/lib/include/cuda_wrappers/new b/lib/include/cuda_wrappers/new index f49811c5a5..d5fb3b7011 100644 --- a/lib/include/cuda_wrappers/new +++ b/lib/include/cuda_wrappers/new @@ -1,4 +1,4 @@ -/*===---- complex - CUDA wrapper for ------------------------------=== +/*===---- new - CUDA wrapper for -------------------------------------=== * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal @@ -26,6 +26,13 @@ #include_next +#if !defined(__device__) +// The header has been included too early from the standard C++ library +// and CUDA-specific macros are not available yet. +// Undo the include guard and try again later. +#undef __CLANG_CUDA_WRAPPERS_NEW +#else + #pragma push_macro("CUDA_NOEXCEPT") #if __cplusplus >= 201103L #define CUDA_NOEXCEPT noexcept @@ -95,4 +102,5 @@ __device__ inline void operator delete[](void *, void *) CUDA_NOEXCEPT {} #pragma pop_macro("CUDA_NOEXCEPT") +#endif // __device__ #endif // include guard diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index 73a777b107..bb759721fa 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -4025,7 +4025,7 @@ _mm_storeu_si128(__m128i_u *__p, __m128i __b) /// /// \param __p /// A pointer to a 64-bit memory location. The address of the memory -/// location does not have to be algned. +/// location does not have to be aligned. /// \param __b /// A 128-bit integer vector containing the value to be stored. static __inline__ void __DEFAULT_FN_ATTRS diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h index 9bff0fcb60..11a321b7c9 100644 --- a/lib/include/gfniintrin.h +++ b/lib/include/gfniintrin.h @@ -14,38 +14,56 @@ #ifndef __GFNIINTRIN_H #define __GFNIINTRIN_H +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) + +/* Default attributes for YMM unmasked form. */ +#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) + +/* Default attributes for VLX forms. */ +#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) #define _mm_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ (__v16qi)(__m128i)(B), \ (char)(I)) -#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ - (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)) - - -#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ - (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I) +#define _mm_gf2p8affine_epi64_epi8(A, B, I) \ + (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)) +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, + (__v16qi) __B); +} +#ifdef __AVXINTRIN_H #define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ (__v32qi)(__m256i)(B), \ (char)(I)) -#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ - (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)) - -#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ - (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I) +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ + (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)) +static __inline__ __m256i __DEFAULT_FN_ATTRS_Y +_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, + (__v32qi) __B); +} +#endif /* __AVXINTRIN_H */ +#ifdef __AVX512BWINTRIN_H #define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ @@ -60,37 +78,6 @@ (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) -#define _mm_gf2p8affine_epi64_epi8(A, B, I) \ - (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ - (__v16qi)(__m128i)(B), \ - (char)(I)) - -#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ - (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ - (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ - (__v16qi)(__m128i)(S)) - - -#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ - (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ - U, A, B, I) - - -#define _mm256_gf2p8affine_epi64_epi8(A, B, I) \ - (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ - (__v32qi)(__m256i)(B), \ - (char)(I)) - -#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ - (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ - (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ - (__v32qi)(__m256i)(S)) - -#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ - (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ - U, A, B, I) - - #define _mm512_gf2p8affine_epi64_epi8(A, B, I) \ (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ (__v64qi)(__m512i)(B), \ @@ -105,63 +92,6 @@ (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_si512(), \ U, A, B, I) -/* Default attributes for simple form (no masking). */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) - -/* Default attributes for YMM unmasked form. */ -#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) - -/* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) - -/* Default attributes for VLX forms. */ -#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) -#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) - -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, - (__v16qi) __B); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 -_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) -{ - return (__m128i) __builtin_ia32_selectb_128(__U, - (__v16qi) _mm_gf2p8mul_epi8(__A, __B), - (__v16qi) __S); -} - -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 -_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) -{ - return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), - __U, __A, __B); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS_Y -_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, - (__v32qi) __B); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 -_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) -{ - return (__m256i) __builtin_ia32_selectb_256(__U, - (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), - (__v32qi) __S); -} - -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 -_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) -{ - return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), - __U, __A, __B); -} - static __inline__ __m512i __DEFAULT_FN_ATTRS_Z _mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) { @@ -183,6 +113,75 @@ _mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_si512(), __U, __A, __B); } +#endif /* __AVX512BWINTRIN_H */ + +#ifdef __AVX512VLBWINTRIN_H +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)) + +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ + (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I) + +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)) + +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) \ + (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I) + +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)) + +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ + (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I) + +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)) + +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) \ + (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I) + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 +_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128(__U, + (__v16qi) _mm_gf2p8mul_epi8(__A, __B), + (__v16qi) __S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 +_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) +{ + return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), + __U, __A, __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 +_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256(__U, + (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), + (__v32qi) __S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 +_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) +{ + return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), + __U, __A, __B); +} +#endif /* __AVX512VLBWINTRIN_H */ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_Y diff --git a/lib/include/hresetintrin.h b/lib/include/hresetintrin.h new file mode 100644 index 0000000000..13e31a2e03 --- /dev/null +++ b/lib/include/hresetintrin.h @@ -0,0 +1,49 @@ +/*===---------------- hresetintrin.h - HRESET intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __X86GPRINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __HRESETINTRIN_H +#define __HRESETINTRIN_H + +#if __has_extension(gnu_asm) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("hreset"))) + +/// Provides a hint to the processor to selectively reset the prediction +/// history of the current logical processor specified by a 32-bit integer +/// value \a __eax. +/// +/// This intrinsic corresponds to the HRESET instruction. +/// +/// \operation +/// IF __eax == 0 +/// // nop +/// ELSE +/// FOR i := 0 to 31 +/// IF __eax[i] +/// ResetPredictionFeature(i) +/// FI +/// ENDFOR +/// FI +/// \endoperation +static __inline void __DEFAULT_FN_ATTRS +_hreset(int __eax) +{ + __asm__ ("hreset $0" :: "a"(__eax)); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* __has_extension(gnu_asm) */ + +#endif /* __HRESETINTRIN_H */ diff --git a/lib/include/ia32intrin.h b/lib/include/ia32intrin.h index 79b7f0655c..00138effd5 100644 --- a/lib/include/ia32intrin.h +++ b/lib/include/ia32intrin.h @@ -14,6 +14,18 @@ #ifndef __IA32INTRIN_H #define __IA32INTRIN_H +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) +#define __DEFAULT_FN_ATTRS_SSE42 __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) + +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) constexpr +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CAST __attribute__((__always_inline__)) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /** Find the first set bit starting from the lsb. Result is undefined if * input is 0. * @@ -26,7 +38,7 @@ * A 32-bit integer operand. * \returns A 32-bit integer containing the bit number. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfd(int __A) { return __builtin_ctz(__A); } @@ -43,7 +55,7 @@ __bsfd(int __A) { * A 32-bit integer operand. * \returns A 32-bit integer containing the bit number. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrd(int __A) { return 31 - __builtin_clz(__A); } @@ -59,12 +71,12 @@ __bsrd(int __A) { * A 32-bit integer operand. * \returns A 32-bit integer containing the swapped bytes. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bswapd(int __A) { return __builtin_bswap32(__A); } -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _bswap(int __A) { return __builtin_bswap32(__A); } @@ -85,7 +97,7 @@ _bswap(int __A) { * A 64-bit integer operand. * \returns A 32-bit integer containing the bit number. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsfq(long long __A) { return __builtin_ctzll(__A); } @@ -102,7 +114,7 @@ __bsfq(long long __A) { * A 64-bit integer operand. * \returns A 32-bit integer containing the bit number. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __bsrq(long long __A) { return 63 - __builtin_clzll(__A); } @@ -118,7 +130,7 @@ __bsrq(long long __A) { * A 64-bit integer operand. * \returns A 64-bit integer containing the swapped bytes. */ -static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __bswapq(long long __A) { return __builtin_bswap64(__A); } @@ -138,7 +150,7 @@ __bswapq(long long __A) { * \returns A 32-bit integer containing the number of bits with value 1 in the * source operand. */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR __popcntd(unsigned int __A) { return __builtin_popcount(__A); @@ -159,7 +171,7 @@ __popcntd(unsigned int __A) * \returns A 64-bit integer containing the number of bits with value 1 in the * source operand. */ -static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR __popcntq(unsigned long long __A) { return __builtin_popcountll(__A); @@ -169,26 +181,26 @@ __popcntq(unsigned long long __A) #endif /* __x86_64__ */ #ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u64(); } -static __inline__ void __attribute__((__always_inline__, __nodebug__)) +static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned long long __f) { __builtin_ia32_writeeflags_u64(__f); } #else /* !__x86_64__ */ -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned int __DEFAULT_FN_ATTRS __readeflags(void) { return __builtin_ia32_readeflags_u32(); } -static __inline__ void __attribute__((__always_inline__, __nodebug__)) +static __inline__ void __DEFAULT_FN_ATTRS __writeeflags(unsigned int __f) { __builtin_ia32_writeeflags_u32(__f); @@ -205,11 +217,9 @@ __writeeflags(unsigned int __f) * A 32-bit float value. * \returns a 32-bit unsigned integer containing the converted value. */ -static __inline__ unsigned int __attribute__((__always_inline__)) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CAST _castf32_u32(float __A) { - unsigned int D; - __builtin_memcpy(&D, &__A, sizeof(__A)); - return D; + return __builtin_bit_cast(unsigned int, __A); } /** Cast a 64-bit float value to a 64-bit unsigned integer value @@ -222,11 +232,9 @@ _castf32_u32(float __A) { * A 64-bit float value. * \returns a 64-bit unsigned integer containing the converted value. */ -static __inline__ unsigned long long __attribute__((__always_inline__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CAST _castf64_u64(double __A) { - unsigned long long D; - __builtin_memcpy(&D, &__A, sizeof(__A)); - return D; + return __builtin_bit_cast(unsigned long long, __A); } /** Cast a 32-bit unsigned integer value to a 32-bit float value @@ -239,11 +247,9 @@ _castf64_u64(double __A) { * A 32-bit unsigned integer value. * \returns a 32-bit float value containing the converted value. */ -static __inline__ float __attribute__((__always_inline__)) +static __inline__ float __DEFAULT_FN_ATTRS_CAST _castu32_f32(unsigned int __A) { - float D; - __builtin_memcpy(&D, &__A, sizeof(__A)); - return D; + return __builtin_bit_cast(float, __A); } /** Cast a 64-bit unsigned integer value to a 64-bit float value @@ -256,11 +262,9 @@ _castu32_f32(unsigned int __A) { * A 64-bit unsigned integer value. * \returns a 64-bit float value containing the converted value. */ -static __inline__ double __attribute__((__always_inline__)) +static __inline__ double __DEFAULT_FN_ATTRS_CAST _castu64_f64(unsigned long long __A) { - double D; - __builtin_memcpy(&D, &__A, sizeof(__A)); - return D; + return __builtin_bit_cast(double, __A); } /** Adds the unsigned integer operand to the CRC-32C checksum of the @@ -278,7 +282,7 @@ _castu64_f64(unsigned long long __A) { * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 __crc32b(unsigned int __C, unsigned char __D) { return __builtin_ia32_crc32qi(__C, __D); @@ -299,7 +303,7 @@ __crc32b(unsigned int __C, unsigned char __D) * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 __crc32w(unsigned int __C, unsigned short __D) { return __builtin_ia32_crc32hi(__C, __D); @@ -320,7 +324,7 @@ __crc32w(unsigned int __C, unsigned short __D) * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_SSE42 __crc32d(unsigned int __C, unsigned int __D) { return __builtin_ia32_crc32si(__C, __D); @@ -342,20 +346,20 @@ __crc32d(unsigned int __C, unsigned int __D) * \returns The result of adding operand \a __C to the CRC-32C checksum of * operand \a __D. */ -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_SSE42 __crc32q(unsigned long long __C, unsigned long long __D) { return __builtin_ia32_crc32di(__C, __D); } #endif /* __x86_64__ */ -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdpmc(int __A) { return __builtin_ia32_rdpmc(__A); } /* __rdtscp */ -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS __rdtscp(unsigned int *__A) { return __builtin_ia32_rdtscp(__A); } @@ -364,48 +368,48 @@ __rdtscp(unsigned int *__A) { #define _rdpmc(A) __rdpmc(A) -static __inline__ void __attribute__((__always_inline__, __nodebug__)) +static __inline__ void __DEFAULT_FN_ATTRS _wbinvd(void) { __builtin_ia32_wbinvd(); } -static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rolb(unsigned char __X, int __C) { return __builtin_rotateleft8(__X, __C); } -static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned char __DEFAULT_FN_ATTRS_CONSTEXPR __rorb(unsigned char __X, int __C) { return __builtin_rotateright8(__X, __C); } -static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rolw(unsigned short __X, int __C) { return __builtin_rotateleft16(__X, __C); } -static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned short __DEFAULT_FN_ATTRS_CONSTEXPR __rorw(unsigned short __X, int __C) { return __builtin_rotateright16(__X, __C); } -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rold(unsigned int __X, int __C) { return __builtin_rotateleft32(__X, __C); } -static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned int __DEFAULT_FN_ATTRS_CONSTEXPR __rord(unsigned int __X, int __C) { return __builtin_rotateright32(__X, __C); } #ifdef __x86_64__ -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rolq(unsigned long long __X, int __C) { return __builtin_rotateleft64(__X, __C); } -static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +static __inline__ unsigned long long __DEFAULT_FN_ATTRS_CONSTEXPR __rorq(unsigned long long __X, int __C) { return __builtin_rotateright64(__X, __C); } @@ -429,4 +433,9 @@ __rorq(unsigned long long __X, int __C) { #define _rotwl(a,b) __rolw((a), (b)) #define _rotwr(a,b) __rorw((a), (b)) +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CAST +#undef __DEFAULT_FN_ATTRS_SSE42 +#undef __DEFAULT_FN_ATTRS_CONSTEXPR + #endif /* __IA32INTRIN_H */ diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index e9dff2310f..22f7a520c9 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -10,6 +10,8 @@ #ifndef __IMMINTRIN_H #define __IMMINTRIN_H +#include + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__MMX__) #include @@ -143,6 +145,11 @@ #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__AVXVNNI__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AVX512DQ__) #include @@ -471,6 +478,11 @@ _storebe_i64(void * __P, long long __D) { #include #endif +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__KL__) || defined(__WIDEKL__) +#include +#endif + #if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ defined(__AMXTILE__) || defined(__AMXINT8__) || defined(__AMXBF16__) #include diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 871b47ca82..a78b96997d 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -57,16 +57,11 @@ void __addfsbyte(unsigned long, unsigned char); void __addfsdword(unsigned long, unsigned long); void __addfsword(unsigned long, unsigned short); void __code_seg(const char *); -static __inline__ void __cpuid(int[4], int); -static __inline__ void __cpuidex(int[4], int, int); -static __inline__ __int64 __emul(int, int); -static __inline__ unsigned __int64 __emulu(unsigned int, unsigned int); unsigned int __getcallerseflags(void); -static __inline__ void __halt(void); unsigned char __inbyte(unsigned short); void __inbytestring(unsigned short, unsigned char *, unsigned long); @@ -82,13 +77,9 @@ void __inwordstring(unsigned short, unsigned short *, unsigned long); void __lidt(void *); unsigned __int64 __ll_lshift(unsigned __int64, int); __int64 __ll_rshift(__int64, int); -static __inline__ void __movsb(unsigned char *, unsigned char const *, size_t); -static __inline__ void __movsd(unsigned long *, unsigned long const *, size_t); -static __inline__ void __movsw(unsigned short *, unsigned short const *, size_t); -static __inline__ void __nop(void); void __nvreg_restore_fence(void); void __nvreg_save_fence(void); @@ -105,23 +96,16 @@ unsigned long __readcr4(void); unsigned long __readcr8(void); unsigned int __readdr(unsigned int); #ifdef __i386__ -static __inline__ unsigned char __readfsbyte(unsigned long); -static __inline__ unsigned __int64 __readfsqword(unsigned long); -static __inline__ unsigned short __readfsword(unsigned long); #endif -static __inline__ unsigned __int64 __readmsr(unsigned long); unsigned __int64 __readpmc(unsigned long); unsigned long __segmentlimit(unsigned long); void __sidt(void *); -static __inline__ void __stosb(unsigned char *, unsigned char, size_t); -static __inline__ void __stosd(unsigned long *, unsigned long, size_t); -static __inline__ void __stosw(unsigned short *, unsigned short, size_t); void __svm_clgi(void); void __svm_invlpga(void *, int); @@ -136,7 +120,6 @@ void __vmx_off(void); void __vmx_vmptrst(unsigned __int64 *); void __wbinvd(void); void __writecr0(unsigned int); -static __inline__ void __writecr3(unsigned __INTPTR_TYPE__); void __writecr4(unsigned int); void __writecr8(unsigned int); @@ -146,11 +129,8 @@ void __writefsdword(unsigned long, unsigned long); void __writefsqword(unsigned long, unsigned __int64); void __writefsword(unsigned long, unsigned short); void __writemsr(unsigned long, unsigned __int64); -static __inline__ void *_AddressOfReturnAddress(void); -static __inline__ unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask); -static __inline__ unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask); unsigned char _bittest(long const *, long); unsigned char _bittestandcomplement(long *, long); @@ -169,12 +149,10 @@ long _InterlockedExchangeAdd_HLEAcquire(long volatile *, long); long _InterlockedExchangeAdd_HLERelease(long volatile *, long); __int64 _InterlockedExchangeAdd64_HLEAcquire(__int64 volatile *, __int64); __int64 _InterlockedExchangeAdd64_HLERelease(__int64 volatile *, __int64); -static __inline__ void -__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) -_ReadBarrier(void); -static __inline__ void -__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) -_ReadWriteBarrier(void); +void __attribute__((__deprecated__( + "use other intrinsics or C++11 atomics instead"))) _ReadBarrier(void); +void __attribute__((__deprecated__( + "use other intrinsics or C++11 atomics instead"))) _ReadWriteBarrier(void); unsigned int _rorx_u32(unsigned int, const unsigned int); int _sarx_i32(int, unsigned int); #if __STDC_HOSTED__ @@ -185,9 +163,8 @@ unsigned int _shrx_u32(unsigned int, unsigned int); void _Store_HLERelease(long volatile *, long); void _Store64_HLERelease(__int64 volatile *, __int64); void _StorePointer_HLERelease(void *volatile *, void *); -static __inline__ void -__attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) -_WriteBarrier(void); +void __attribute__((__deprecated__( + "use other intrinsics or C++11 atomics instead"))) _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); @@ -197,19 +174,14 @@ void __addgsbyte(unsigned long, unsigned char); void __addgsdword(unsigned long, unsigned long); void __addgsqword(unsigned long, unsigned __int64); void __addgsword(unsigned long, unsigned short); -static __inline__ void __faststorefence(void); void __incgsbyte(unsigned long); void __incgsdword(unsigned long); void __incgsqword(unsigned long); void __incgsword(unsigned long); -static __inline__ void __movsq(unsigned long long *, unsigned long long const *, size_t); -static __inline__ unsigned char __readgsbyte(unsigned long); -static __inline__ unsigned long __readgsdword(unsigned long); -static __inline__ unsigned __int64 __readgsqword(unsigned long); unsigned short __readgsword(unsigned long); unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, @@ -218,7 +190,6 @@ unsigned __int64 __shiftleft128(unsigned __int64 _LowPart, unsigned __int64 __shiftright128(unsigned __int64 _LowPart, unsigned __int64 _HighPart, unsigned char _Shift); -static __inline__ void __stosq(unsigned __int64 *, unsigned __int64, size_t); unsigned char __vmx_on(unsigned __int64 *); unsigned char __vmx_vmclear(unsigned __int64 *); @@ -243,10 +214,6 @@ unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64); unsigned char _interlockedbittestandset64(__int64 volatile *, __int64); long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange, long _Comparand); -unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, - __int64 _ExchangeHigh, - __int64 _ExchangeLow, - __int64 *_CompareandResult); unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination, __int64 _ExchangeHigh, __int64 _ExchangeLow, @@ -269,13 +236,9 @@ unsigned __int64 _rorx_u64(unsigned __int64, const unsigned int); __int64 _sarx_i64(__int64, unsigned int); unsigned __int64 _shlx_u64(unsigned __int64, unsigned int); unsigned __int64 _shrx_u64(unsigned __int64, unsigned int); -static __inline__ __int64 __mulh(__int64, __int64); -static __inline__ unsigned __int64 __umulh(unsigned __int64, unsigned __int64); -static __inline__ __int64 _mul128(__int64, __int64, __int64*); -static __inline__ unsigned __int64 _umul128(unsigned __int64, unsigned __int64, unsigned __int64*); @@ -284,29 +247,19 @@ unsigned __int64 _umul128(unsigned __int64, #if defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) -static __inline__ unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask); -static __inline__ unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask); #endif #if defined(__i386__) || defined(__x86_64__) || defined(__arm__) || defined(__aarch64__) -static __inline__ __int64 _InterlockedDecrement64(__int64 volatile *_Addend); -static __inline__ __int64 _InterlockedExchange64(__int64 volatile *_Target, __int64 _Value); -static __inline__ __int64 _InterlockedExchangeAdd64(__int64 volatile *_Addend, __int64 _Value); -static __inline__ __int64 _InterlockedExchangeSub64(__int64 volatile *_Subend, __int64 _Value); -static __inline__ __int64 _InterlockedIncrement64(__int64 volatile *_Addend); -static __inline__ __int64 _InterlockedOr64(__int64 volatile *_Value, __int64 _Mask); -static __inline__ __int64 _InterlockedXor64(__int64 volatile *_Value, __int64 _Mask); -static __inline__ __int64 _InterlockedAnd64(__int64 volatile *_Value, __int64 _Mask); #endif @@ -470,45 +423,81 @@ __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination, __int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination, __int64 _Exchange, __int64 _Comparand); #endif +#if defined(__x86_64__) || defined(__aarch64__) +unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +#endif +#if defined(__aarch64__) +unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination, + __int64 _ExchangeHigh, + __int64 _ExchangeLow, + __int64 *_ComparandResult); +#endif /*----------------------------------------------------------------------------*\ |* movs, stos \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) -static __inline__ void __DEFAULT_FN_ATTRS -__movsb(unsigned char *__dst, unsigned char const *__src, size_t __n) { +static __inline__ void __DEFAULT_FN_ATTRS __movsb(unsigned char *__dst, + unsigned char const *__src, + size_t __n) { __asm__ __volatile__("rep movsb" : "+D"(__dst), "+S"(__src), "+c"(__n) : : "memory"); } -static __inline__ void __DEFAULT_FN_ATTRS -__movsd(unsigned long *__dst, unsigned long const *__src, size_t __n) { - __asm__ __volatile__("rep movsl" : "+D"(__dst), "+S"(__src), "+c"(__n) - : : "memory"); -} -static __inline__ void __DEFAULT_FN_ATTRS -__movsw(unsigned short *__dst, unsigned short const *__src, size_t __n) { - __asm__ __volatile__("rep movsw" : "+D"(__dst), "+S"(__src), "+c"(__n) - : : "memory"); -} -static __inline__ void __DEFAULT_FN_ATTRS -__stosd(unsigned long *__dst, unsigned long __x, size_t __n) { - __asm__ __volatile__("rep stosl" : "+D"(__dst), "+c"(__n) : "a"(__x) +static __inline__ void __DEFAULT_FN_ATTRS __movsd(unsigned long *__dst, + unsigned long const *__src, + size_t __n) { + __asm__ __volatile__("rep movsl" + : "+D"(__dst), "+S"(__src), "+c"(__n) + : : "memory"); } -static __inline__ void __DEFAULT_FN_ATTRS -__stosw(unsigned short *__dst, unsigned short __x, size_t __n) { - __asm__ __volatile__("rep stosw" : "+D"(__dst), "+c"(__n) : "a"(__x) +static __inline__ void __DEFAULT_FN_ATTRS __movsw(unsigned short *__dst, + unsigned short const *__src, + size_t __n) { + __asm__ __volatile__("rep movsw" + : "+D"(__dst), "+S"(__src), "+c"(__n) + : + : "memory"); +} +static __inline__ void __DEFAULT_FN_ATTRS __stosd(unsigned long *__dst, + unsigned long __x, + size_t __n) { + __asm__ __volatile__("rep stosl" + : "+D"(__dst), "+c"(__n) + : "a"(__x) + : "memory"); +} +static __inline__ void __DEFAULT_FN_ATTRS __stosw(unsigned short *__dst, + unsigned short __x, + size_t __n) { + __asm__ __volatile__("rep stosw" + : "+D"(__dst), "+c"(__n) + : "a"(__x) : "memory"); } #endif #ifdef __x86_64__ -static __inline__ void __DEFAULT_FN_ATTRS -__movsq(unsigned long long *__dst, unsigned long long const *__src, size_t __n) { - __asm__ __volatile__("rep movsq" : "+D"(__dst), "+S"(__src), "+c"(__n) - : : "memory"); +static __inline__ void __DEFAULT_FN_ATTRS __movsq( + unsigned long long *__dst, unsigned long long const *__src, size_t __n) { + __asm__ __volatile__("rep movsq" + : "+D"(__dst), "+S"(__src), "+c"(__n) + : + : "memory"); } -static __inline__ void __DEFAULT_FN_ATTRS -__stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { +static __inline__ void __DEFAULT_FN_ATTRS __stosq(unsigned __int64 *__dst, + unsigned __int64 __x, + size_t __n) { __asm__ __volatile__("rep stosq" : "+D"(__dst), "+c"(__n) : "a"(__x) : "memory"); } @@ -518,26 +507,25 @@ __stosq(unsigned __int64 *__dst, unsigned __int64 __x, size_t __n) { |* Misc \*----------------------------------------------------------------------------*/ #if defined(__i386__) || defined(__x86_64__) -static __inline__ void __DEFAULT_FN_ATTRS -__cpuid(int __info[4], int __level) { - __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) - : "a"(__level), "c"(0)); +static __inline__ void __DEFAULT_FN_ATTRS __cpuid(int __info[4], int __level) { + __asm__("cpuid" + : "=a"(__info[0]), "=b"(__info[1]), "=c"(__info[2]), "=d"(__info[3]) + : "a"(__level), "c"(0)); } -static __inline__ void __DEFAULT_FN_ATTRS -__cpuidex(int __info[4], int __level, int __ecx) { - __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) - : "a"(__level), "c"(__ecx)); +static __inline__ void __DEFAULT_FN_ATTRS __cpuidex(int __info[4], int __level, + int __ecx) { + __asm__("cpuid" + : "=a"(__info[0]), "=b"(__info[1]), "=c"(__info[2]), "=d"(__info[3]) + : "a"(__level), "c"(__ecx)); } -static __inline__ void __DEFAULT_FN_ATTRS -__halt(void) { - __asm__ volatile ("hlt"); +static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { + __asm__ volatile("hlt"); } #endif #if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__) -static __inline__ void __DEFAULT_FN_ATTRS -__nop(void) { - __asm__ volatile ("nop"); +static __inline__ void __DEFAULT_FN_ATTRS __nop(void) { + __asm__ volatile("nop"); } #endif @@ -574,8 +562,7 @@ __readmsr(unsigned long __register) { } #endif -static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS -__readcr3(void) { +static __inline__ unsigned __LPTRINT_TYPE__ __DEFAULT_FN_ATTRS __readcr3(void) { unsigned __LPTRINT_TYPE__ __cr3_val; __asm__ __volatile__ ("mov %%cr3, %0" : "=r"(__cr3_val) : : "memory"); return __cr3_val; diff --git a/lib/include/keylockerintrin.h b/lib/include/keylockerintrin.h new file mode 100644 index 0000000000..c15d39c8e3 --- /dev/null +++ b/lib/include/keylockerintrin.h @@ -0,0 +1,506 @@ +/*===----------------- keylockerintrin.h - KL Intrinsics -------------------=== + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _KEYLOCKERINTRIN_H +#define _KEYLOCKERINTRIN_H + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__KL__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("kl"),\ + __min_vector_width__(128))) + +/// Load internal wrapping key from __intkey, __enkey_lo and __enkey_hi. __ctl +/// will assigned to EAX, whch specifies the KeySource and whether backing up +/// the key is permitted. The 256-bit encryption key is loaded from the two +/// explicit operands (__enkey_lo and __enkey_hi). The 128-bit integrity key is +/// loaded from the implicit operand XMM0 which assigned by __intkey. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the LOADIWKEY instructions. +/// +/// \operation +/// IF CPL > 0 // LOADKWKEY only allowed at ring 0 (supervisor mode) +/// GP (0) +/// FI +/// IF “LOADIWKEY exiting” VM execution control set +/// VMexit +/// FI +/// IF __ctl[4:1] > 1 // Reserved KeySource encoding used +/// GP (0) +/// FI +/// IF __ctl[31:5] != 0 // Reserved bit in __ctl is set +/// GP (0) +/// FI +/// IF __ctl[0] AND (CPUID.19H.ECX[0] == 0) // NoBackup is not supported on this part +/// GP (0) +/// FI +/// IF (__ctl[4:1] == 1) AND (CPUID.19H.ECX[1] == 0) // KeySource of 1 is not supported on this part +/// GP (0) +/// FI +/// IF (__ctl[4:1] == 0) // KeySource of 0. +/// IWKey.Encryption Key[127:0] := __enkey_hi[127:0]: +/// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] +/// IWKey.IntegrityKey[127:0] := __intkey[127:0] +/// IWKey.NoBackup := __ctl[0] +/// IWKey.KeySource := __ctl[4:1] +/// ZF := 0 +/// ELSE // KeySource of 1. See RDSEED definition for details of randomness +/// IF HW_NRND_GEN.ready == 1 // Full-entropy random data from RDSEED was received +/// IWKey.Encryption Key[127:0] := __enkey_hi[127:0] XOR HW_NRND_GEN.data[127:0] +/// IWKey.Encryption Key[255:128] := __enkey_lo[127:0] XOR HW_NRND_GEN.data[255:128] +/// IWKey.Encryption Key[255:0] := __enkey_hi[127:0]:__enkey_lo[127:0] XOR HW_NRND_GEN.data[255:0] +/// IWKey.IntegrityKey[127:0] := __intkey[127:0] XOR HW_NRND_GEN.data[383:256] +/// IWKey.NoBackup := __ctl[0] +/// IWKey.KeySource := __ctl[4:1] +/// ZF := 0 +/// ELSE // Random data was not returned from RDSEED. IWKey was not loaded +/// ZF := 1 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_mm_loadiwkey (unsigned int __ctl, __m128i __intkey, + __m128i __enkey_lo, __m128i __enkey_hi) { + __builtin_ia32_loadiwkey (__intkey, __enkey_lo, __enkey_hi, __ctl); +} + +/// Wrap a 128-bit AES key from __key into a key handle and output in +/// ((__m128i*)__h) to ((__m128i*)__h) + 5 and a 32-bit value as return. +/// The explicit source operand __htype specifies handle restrictions. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the ENCODEKEY128 instructions. +/// +/// \operation +/// InputKey[127:0] := __key[127:0] +/// KeyMetadata[2:0] := __htype[2:0] +/// KeyMetadata[23:3] := 0 // Reserved for future usage +/// KeyMetadata[27:24] := 0 // KeyType is AES-128 (value of 0) +/// KeyMetadata[127:28] := 0 // Reserved for future usage +/// Handle[383:0] := WrapKey128(InputKey[127:0], KeyMetadata[127:0], +/// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) +/// dst[0] := IWKey.NoBackup +/// dst[4:1] := IWKey.KeySource[3:0] +/// dst[31:5] := 0 +/// MEM[__h+127:__h] := Handle[127:0] // AAD +/// MEM[__h+255:__h+128] := Handle[255:128] // Integrity Tag +/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText +/// MEM[__h+511:__h+384] := 0 // Reserved for future usage +/// MEM[__h+639:__h+512] := 0 // Reserved for future usage +/// MEM[__h+767:__h+640] := 0 // Reserved for future usage +/// OF := 0 +/// SF := 0 +/// ZF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_encodekey128_u32(unsigned int __htype, __m128i __key, void *__h) { + return __builtin_ia32_encodekey128_u32(__htype, (__v2di)__key, __h); +} + +/// Wrap a 256-bit AES key from __key_hi:__key_lo into a key handle, then +/// output handle in ((__m128i*)__h) to ((__m128i*)__h) + 6 and +/// a 32-bit value as return. +/// The explicit source operand __htype specifies handle restrictions. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the ENCODEKEY256 instructions. +/// +/// \operation +/// InputKey[127:0] := __key_lo[127:0] +/// InputKey[255:128] := __key_hi[255:128] +/// KeyMetadata[2:0] := __htype[2:0] +/// KeyMetadata[23:3] := 0 // Reserved for future usage +/// KeyMetadata[27:24] := 1 // KeyType is AES-256 (value of 1) +/// KeyMetadata[127:28] := 0 // Reserved for future usage +/// Handle[511:0] := WrapKey256(InputKey[255:0], KeyMetadata[127:0], +/// IWKey.Integrity Key[127:0], IWKey.Encryption Key[255:0]) +/// dst[0] := IWKey.NoBackup +/// dst[4:1] := IWKey.KeySource[3:0] +/// dst[31:5] := 0 +/// MEM[__h+127:__h] := Handle[127:0] // AAD +/// MEM[__h+255:__h+128] := Handle[255:128] // Tag +/// MEM[__h+383:__h+256] := Handle[383:256] // CipherText[127:0] +/// MEM[__h+511:__h+384] := Handle[511:384] // CipherText[255:128] +/// MEM[__h+639:__h+512] := 0 // Reserved for future usage +/// MEM[__h+767:__h+640] := 0 // Reserved for future usage +/// MEM[__h+895:__h+768] := 0 Integrity// Reserved for future usage +/// OF := 0 +/// SF := 0 +/// ZF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned int __DEFAULT_FN_ATTRS +_mm_encodekey256_u32(unsigned int __htype, __m128i __key_lo, __m128i __key_hi, + void *__h) { + return __builtin_ia32_encodekey256_u32(__htype, (__v2di)__key_lo, + (__v2di)__key_hi, __h); +} + +/// The AESENC128KL performs 10 rounds of AES to encrypt the __idata using +/// the 128-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENC128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES128Encrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesenc128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesenc128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); +} + +/// The AESENC256KL performs 14 rounds of AES to encrypt the __idata using +/// the 256-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENC256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES256Encrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesenc256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesenc256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); +} + +/// The AESDEC128KL performs 10 rounds of AES to decrypt the __idata using +/// the 128-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDEC128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] // Load is not guaranteed to be atomic. +/// IllegalHandle := (HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES128Decrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdec128kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesdec128kl_u8((__v2di *)__odata, (__v2di)__idata, __h); +} + +/// The AESDEC256KL performs 10 rounds of AES to decrypt the __idata using +/// the 256-bit key in the handle from the __h. It stores the result in the +/// __odata. And return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDEC256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle := (HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[383:256] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES256) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF (Authentic == 0) +/// ZF := 1 +/// ELSE +/// MEM[__odata+127:__odata] := AES256Decrypt (__idata[127:0], UnwrappedKey) +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdec256kl_u8(__m128i* __odata, __m128i __idata, const void *__h) { + return __builtin_ia32_aesdec256kl_u8((__v2di *)__odata, (__v2di)__idata, __h); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ + || defined(__KL__) */ + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__WIDEKL__) + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("kl,widekl"),\ + __min_vector_width__(128))) + +/// Encrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE128KL instructions. +/// +/// \operation +/// Handle := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[383:0]) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); +} + +/// Encrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESENCWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle[511:0]) != HANDLE_KEY_TYPE_AES512 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Encrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesencwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesencwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); +} + +/// Decrypt __idata[0] to __idata[7] using 128-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE128KL instructions. +/// +/// \operation +/// Handle[383:0] := MEM[__h+383:__h] +/// IllegalHandle := ( HandleReservedBitSet (Handle[383:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES128 ) +/// IF (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate384 (Handle[383:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES128Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide128kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide128kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); +} + +/// Decrypt __idata[0] to __idata[7] using 256-bit AES key indicated by handle +/// at __h and store each resultant block back from __odata to __odata+7. And +/// return the affected ZF flag status. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the AESDECWIDE256KL instructions. +/// +/// \operation +/// Handle[511:0] := MEM[__h+511:__h] +/// IllegalHandle = ( HandleReservedBitSet (Handle[511:0]) || +/// (Handle[127:0] AND (CPL > 0)) || +/// Handle[255:128] || +/// HandleKeyType (Handle) != HANDLE_KEY_TYPE_AES512 ) +/// If (IllegalHandle) +/// ZF := 1 +/// ELSE +/// (UnwrappedKey, Authentic) := UnwrapKeyAndAuthenticate512 (Handle[511:0], IWKey) +/// IF Authentic == 0 +/// ZF := 1 +/// ELSE +/// FOR i := 0 to 7 +/// __odata[i] := AES256Decrypt (__idata[i], UnwrappedKey) +/// ENDFOR +/// ZF := 0 +/// FI +/// FI +/// dst := ZF +/// OF := 0 +/// SF := 0 +/// AF := 0 +/// PF := 0 +/// CF := 0 +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_mm_aesdecwide256kl_u8(__m128i __odata[8], const __m128i __idata[8], const void* __h) { + return __builtin_ia32_aesdecwide256kl_u8((__v2di *)__odata, + (const __v2di *)__idata, __h); +} + +#undef __DEFAULT_FN_ATTRS + +#endif /* !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) \ + || defined(__WIDEKL__) */ + +#endif /* _KEYLOCKERINTRIN_H */ diff --git a/lib/include/mm_malloc.h b/lib/include/mm_malloc.h index 0ea32517ae..933dbaacad 100644 --- a/lib/include/mm_malloc.h +++ b/lib/include/mm_malloc.h @@ -54,7 +54,13 @@ _mm_malloc(size_t __size, size_t __align) static __inline__ void __attribute__((__always_inline__, __nodebug__)) _mm_free(void *__p) { +#if defined(__MINGW32__) + __mingw_aligned_free(__p); +#elif defined(_WIN32) + _aligned_free(__p); +#else free(__p); +#endif } #endif diff --git a/lib/include/opencl-c-base.h b/lib/include/opencl-c-base.h index 430e07d36f..e8dcd70377 100644 --- a/lib/include/opencl-c-base.h +++ b/lib/include/opencl-c-base.h @@ -9,6 +9,21 @@ #ifndef _OPENCL_BASE_H_ #define _OPENCL_BASE_H_ +// Define extension macros + +#if (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) +// For SPIR all extensions are supported. +#if defined(__SPIR__) +#define cl_khr_subgroup_extended_types 1 +#define cl_khr_subgroup_non_uniform_vote 1 +#define cl_khr_subgroup_ballot 1 +#define cl_khr_subgroup_non_uniform_arithmetic 1 +#define cl_khr_subgroup_shuffle 1 +#define cl_khr_subgroup_shuffle_relative 1 +#define cl_khr_subgroup_clustered_reduce 1 +#endif // defined(__SPIR__) +#endif // (defined(__OPENCL_CPP_VERSION__) || __OPENCL_C_VERSION__ >= 200) + // built-in scalar data types: /** @@ -568,4 +583,7 @@ typedef struct { #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end #endif // cl_intel_device_side_avc_motion_estimation +// Disable any extensions we may have enabled previously. +#pragma OPENCL EXTENSION all : disable + #endif //_OPENCL_BASE_H_ diff --git a/lib/include/opencl-c.h b/lib/include/opencl-c.h index 66e18bdd47..ab665628c8 100644 --- a/lib/include/opencl-c.h +++ b/lib/include/opencl-c.h @@ -4633,6 +4633,7 @@ float16 __ovld __cnfn convert_float16(float16); // Conversions with double data type parameters or return value. #ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable char __ovld __cnfn convert_char(double); char __ovld __cnfn convert_char_rte(double); char __ovld __cnfn convert_char_rtn(double); @@ -5455,6 +5456,7 @@ double16 __ovld __cnfn convert_double16_rtz(ushort16); #endif //cl_khr_fp64 #ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable // Convert half types to non-double types. uchar __ovld __cnfn convert_uchar(half); uchar __ovld __cnfn convert_uchar_rte(half); diff --git a/lib/include/openmp_wrappers/cmath b/lib/include/openmp_wrappers/cmath index bd6011eb6f..1aff66af7d 100644 --- a/lib/include/openmp_wrappers/cmath +++ b/lib/include/openmp_wrappers/cmath @@ -24,8 +24,11 @@ // which might live in cstdlib. #include +// We need limits because __clang_cuda_cmath.h below uses `std::numeric_limit`. +#include + #pragma omp begin declare variant match( \ - device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)}) + device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any, allow_templates)}) #define __CUDA__ #define __OPENMP_NVPTX__ diff --git a/lib/include/openmp_wrappers/complex b/lib/include/openmp_wrappers/complex index d8dcd41670..142e526b81 100644 --- a/lib/include/openmp_wrappers/complex +++ b/lib/include/openmp_wrappers/complex @@ -25,3 +25,28 @@ // Grab the host header too. #include_next + + +#ifdef __cplusplus + +// If we are compiling against libc++, the macro _LIBCPP_STD_VER should be set +// after including above. Since the complex header we use is a +// simplified version of the libc++, we don't need it in this case. If we +// compile against libstdc++, or any other standard library, we will overload +// the (hopefully template) functions in the header with the ones we +// got from libc++ which decomposes math functions, like `std::sin`, into +// arithmetic and calls to non-complex functions, all of which we can then +// handle. +#ifndef _LIBCPP_STD_VER + +#pragma omp begin declare variant match( \ + device = {arch(nvptx, nvptx64)}, \ + implementation = {extension(match_any, allow_templates)}) + +#include + +#pragma omp end declare variant + +#endif + +#endif diff --git a/lib/include/openmp_wrappers/complex_cmath.h b/lib/include/openmp_wrappers/complex_cmath.h new file mode 100644 index 0000000000..e3d9aebbbc --- /dev/null +++ b/lib/include/openmp_wrappers/complex_cmath.h @@ -0,0 +1,388 @@ +//===------------------------- __complex_cmath.h --------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// std::complex header copied from the libcxx source and simplified for use in +// OpenMP target offload regions. +// +//===----------------------------------------------------------------------===// + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#ifndef __cplusplus +#error "This file is for C++ compilation only." +#endif + +#ifndef _LIBCPP_COMPLEX +#define _LIBCPP_COMPLEX + +#include +#include + +#define __DEVICE__ static constexpr __attribute__((nothrow)) + +namespace std { + +// abs + +template __DEVICE__ _Tp abs(const std::complex<_Tp> &__c) { + return hypot(__c.real(), __c.imag()); +} + +// arg + +template __DEVICE__ _Tp arg(const std::complex<_Tp> &__c) { + return atan2(__c.imag(), __c.real()); +} + +template +typename enable_if::value || is_same<_Tp, double>::value, + double>::type +arg(_Tp __re) { + return atan2(0., __re); +} + +template +typename enable_if::value, float>::type arg(_Tp __re) { + return atan2f(0.F, __re); +} + +// norm + +template __DEVICE__ _Tp norm(const std::complex<_Tp> &__c) { + if (std::isinf(__c.real())) + return abs(__c.real()); + if (std::isinf(__c.imag())) + return abs(__c.imag()); + return __c.real() * __c.real() + __c.imag() * __c.imag(); +} + +// conj + +template std::complex<_Tp> conj(const std::complex<_Tp> &__c) { + return std::complex<_Tp>(__c.real(), -__c.imag()); +} + +// proj + +template std::complex<_Tp> proj(const std::complex<_Tp> &__c) { + std::complex<_Tp> __r = __c; + if (std::isinf(__c.real()) || std::isinf(__c.imag())) + __r = std::complex<_Tp>(INFINITY, copysign(_Tp(0), __c.imag())); + return __r; +} + +// polar + +template +complex<_Tp> polar(const _Tp &__rho, const _Tp &__theta = _Tp()) { + if (std::isnan(__rho) || signbit(__rho)) + return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); + if (std::isnan(__theta)) { + if (std::isinf(__rho)) + return std::complex<_Tp>(__rho, __theta); + return std::complex<_Tp>(__theta, __theta); + } + if (std::isinf(__theta)) { + if (std::isinf(__rho)) + return std::complex<_Tp>(__rho, _Tp(NAN)); + return std::complex<_Tp>(_Tp(NAN), _Tp(NAN)); + } + _Tp __x = __rho * cos(__theta); + if (std::isnan(__x)) + __x = 0; + _Tp __y = __rho * sin(__theta); + if (std::isnan(__y)) + __y = 0; + return std::complex<_Tp>(__x, __y); +} + +// log + +template std::complex<_Tp> log(const std::complex<_Tp> &__x) { + return std::complex<_Tp>(log(abs(__x)), arg(__x)); +} + +// log10 + +template std::complex<_Tp> log10(const std::complex<_Tp> &__x) { + return log(__x) / log(_Tp(10)); +} + +// sqrt + +template +__DEVICE__ std::complex<_Tp> sqrt(const std::complex<_Tp> &__x) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(_Tp(INFINITY), __x.imag()); + if (std::isinf(__x.real())) { + if (__x.real() > _Tp(0)) + return std::complex<_Tp>(__x.real(), std::isnan(__x.imag()) + ? __x.imag() + : copysign(_Tp(0), __x.imag())); + return std::complex<_Tp>(std::isnan(__x.imag()) ? __x.imag() : _Tp(0), + copysign(__x.real(), __x.imag())); + } + return polar(sqrt(abs(__x)), arg(__x) / _Tp(2)); +} + +// exp + +template +__DEVICE__ std::complex<_Tp> exp(const std::complex<_Tp> &__x) { + _Tp __i = __x.imag(); + if (std::isinf(__x.real())) { + if (__x.real() < _Tp(0)) { + if (!std::isfinite(__i)) + __i = _Tp(1); + } else if (__i == 0 || !std::isfinite(__i)) { + if (std::isinf(__i)) + __i = _Tp(NAN); + return std::complex<_Tp>(__x.real(), __i); + } + } else if (std::isnan(__x.real()) && __x.imag() == 0) + return __x; + _Tp __e = exp(__x.real()); + return std::complex<_Tp>(__e * cos(__i), __e * sin(__i)); +} + +// pow + +template +std::complex<_Tp> pow(const std::complex<_Tp> &__x, + const std::complex<_Tp> &__y) { + return exp(__y * log(__x)); +} + +// __sqr, computes pow(x, 2) + +template std::complex<_Tp> __sqr(const std::complex<_Tp> &__x) { + return std::complex<_Tp>((__x.real() - __x.imag()) * + (__x.real() + __x.imag()), + _Tp(2) * __x.real() * __x.imag()); +} + +// asinh + +template +__DEVICE__ std::complex<_Tp> asinh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return __x; + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.real(), + copysign(__pi * _Tp(0.25), __x.imag())); + return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.imag(), __x.real()); + if (__x.imag() == 0) + return __x; + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(copysign(__x.imag(), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) + _Tp(1))); + return std::complex<_Tp>(copysign(__z.real(), __x.real()), + copysign(__z.imag(), __x.imag())); +} + +// acosh + +template +__DEVICE__ std::complex<_Tp> acosh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return std::complex<_Tp>(abs(__x.real()), __x.imag()); + if (std::isinf(__x.imag())) { + if (__x.real() > 0) + return std::complex<_Tp>(__x.real(), + copysign(__pi * _Tp(0.25), __x.imag())); + else + return std::complex<_Tp>(-__x.real(), + copysign(__pi * _Tp(0.75), __x.imag())); + } + if (__x.real() < 0) + return std::complex<_Tp>(-__x.real(), copysign(__pi, __x.imag())); + return std::complex<_Tp>(__x.real(), copysign(_Tp(0), __x.imag())); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(abs(__x.imag()), __x.real()); + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(abs(__x.imag()), + copysign(__pi / _Tp(2), __x.imag())); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); + return std::complex<_Tp>(copysign(__z.real(), _Tp(0)), + copysign(__z.imag(), __x.imag())); +} + +// atanh + +template +__DEVICE__ std::complex<_Tp> atanh(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.imag())) { + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + } + if (std::isnan(__x.imag())) { + if (std::isinf(__x.real()) || __x.real() == 0) + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), __x.imag()); + return std::complex<_Tp>(__x.imag(), __x.imag()); + } + if (std::isnan(__x.real())) { + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.real())) { + return std::complex<_Tp>(copysign(_Tp(0), __x.real()), + copysign(__pi / _Tp(2), __x.imag())); + } + if (abs(__x.real()) == _Tp(1) && __x.imag() == _Tp(0)) { + return std::complex<_Tp>(copysign(_Tp(INFINITY), __x.real()), + copysign(_Tp(0), __x.imag())); + } + std::complex<_Tp> __z = log((_Tp(1) + __x) / (_Tp(1) - __x)) / _Tp(2); + return std::complex<_Tp>(copysign(__z.real(), __x.real()), + copysign(__z.imag(), __x.imag())); +} + +// sinh + +template +__DEVICE__ std::complex<_Tp> sinh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) + return std::complex<_Tp>(__x.real(), _Tp(NAN)); + if (__x.real() == 0 && !std::isfinite(__x.imag())) + return std::complex<_Tp>(__x.real(), _Tp(NAN)); + if (__x.imag() == 0 && !std::isfinite(__x.real())) + return __x; + return std::complex<_Tp>(sinh(__x.real()) * cos(__x.imag()), + cosh(__x.real()) * sin(__x.imag())); +} + +// cosh + +template +__DEVICE__ std::complex<_Tp> cosh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real()) && !std::isfinite(__x.imag())) + return std::complex<_Tp>(abs(__x.real()), _Tp(NAN)); + if (__x.real() == 0 && !std::isfinite(__x.imag())) + return std::complex<_Tp>(_Tp(NAN), __x.real()); + if (__x.real() == 0 && __x.imag() == 0) + return std::complex<_Tp>(_Tp(1), __x.imag()); + if (__x.imag() == 0 && !std::isfinite(__x.real())) + return std::complex<_Tp>(abs(__x.real()), __x.imag()); + return std::complex<_Tp>(cosh(__x.real()) * cos(__x.imag()), + sinh(__x.real()) * sin(__x.imag())); +} + +// tanh + +template +__DEVICE__ std::complex<_Tp> tanh(const std::complex<_Tp> &__x) { + if (std::isinf(__x.real())) { + if (!std::isfinite(__x.imag())) + return std::complex<_Tp>(_Tp(1), _Tp(0)); + return std::complex<_Tp>(_Tp(1), + copysign(_Tp(0), sin(_Tp(2) * __x.imag()))); + } + if (std::isnan(__x.real()) && __x.imag() == 0) + return __x; + _Tp __2r(_Tp(2) * __x.real()); + _Tp __2i(_Tp(2) * __x.imag()); + _Tp __d(cosh(__2r) + cos(__2i)); + _Tp __2rsh(sinh(__2r)); + if (std::isinf(__2rsh) && std::isinf(__d)) + return std::complex<_Tp>(__2rsh > _Tp(0) ? _Tp(1) : _Tp(-1), + __2i > _Tp(0) ? _Tp(0) : _Tp(-0.)); + return std::complex<_Tp>(__2rsh / __d, sin(__2i) / __d); +} + +// asin + +template +__DEVICE__ std::complex<_Tp> asin(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = asinh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// acos + +template +__DEVICE__ std::complex<_Tp> acos(const std::complex<_Tp> &__x) { + const _Tp __pi(atan2(+0., -0.)); + if (std::isinf(__x.real())) { + if (std::isnan(__x.imag())) + return std::complex<_Tp>(__x.imag(), __x.real()); + if (std::isinf(__x.imag())) { + if (__x.real() < _Tp(0)) + return std::complex<_Tp>(_Tp(0.75) * __pi, -__x.imag()); + return std::complex<_Tp>(_Tp(0.25) * __pi, -__x.imag()); + } + if (__x.real() < _Tp(0)) + return std::complex<_Tp>(__pi, + signbit(__x.imag()) ? -__x.real() : __x.real()); + return std::complex<_Tp>(_Tp(0), + signbit(__x.imag()) ? __x.real() : -__x.real()); + } + if (std::isnan(__x.real())) { + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__x.real(), -__x.imag()); + return std::complex<_Tp>(__x.real(), __x.real()); + } + if (std::isinf(__x.imag())) + return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); + if (__x.real() == 0 && (__x.imag() == 0 || isnan(__x.imag()))) + return std::complex<_Tp>(__pi / _Tp(2), -__x.imag()); + std::complex<_Tp> __z = log(__x + sqrt(__sqr(__x) - _Tp(1))); + if (signbit(__x.imag())) + return std::complex<_Tp>(abs(__z.imag()), abs(__z.real())); + return std::complex<_Tp>(abs(__z.imag()), -abs(__z.real())); +} + +// atan + +template +__DEVICE__ std::complex<_Tp> atan(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = atanh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// sin + +template +__DEVICE__ std::complex<_Tp> sin(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = sinh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +// cos + +template std::complex<_Tp> cos(const std::complex<_Tp> &__x) { + return cosh(complex<_Tp>(-__x.imag(), __x.real())); +} + +// tan + +template +__DEVICE__ std::complex<_Tp> tan(const std::complex<_Tp> &__x) { + std::complex<_Tp> __z = tanh(complex<_Tp>(-__x.imag(), __x.real())); + return std::complex<_Tp>(__z.imag(), -__z.real()); +} + +} // namespace std + +#endif diff --git a/lib/include/popcntintrin.h b/lib/include/popcntintrin.h index 3129010147..0aa94aecda 100644 --- a/lib/include/popcntintrin.h +++ b/lib/include/popcntintrin.h @@ -13,6 +13,12 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("popcnt"))) +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr +#else +#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS +#endif + /// Counts the number of bits in the source operand having a value of 1. /// /// \headerfile @@ -23,7 +29,7 @@ /// An unsigned 32-bit integer operand. /// \returns A 32-bit integer containing the number of bits with value 1 in the /// source operand. -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u32(unsigned int __A) { return __builtin_popcount(__A); @@ -40,7 +46,7 @@ _mm_popcnt_u32(unsigned int __A) /// An unsigned 64-bit integer operand. /// \returns A 64-bit integer containing the number of bits with value 1 in the /// source operand. -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS_CONSTEXPR _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); @@ -48,5 +54,6 @@ _mm_popcnt_u64(unsigned long long __A) #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif /* __POPCNTINTRIN_H */ diff --git a/lib/include/ppc_wrappers/smmintrin.h b/lib/include/ppc_wrappers/smmintrin.h index 56ef6ba76b..64f0c76199 100644 --- a/lib/include/ppc_wrappers/smmintrin.h +++ b/lib/include/ppc_wrappers/smmintrin.h @@ -78,6 +78,30 @@ extern __inline __m128i return (__m128i)vec_sel((__v16qu)__A, (__v16qu)__B, __lmask); } +extern __inline __m128i + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_insert_epi8(__m128i const __A, int const __D, int const __N) { + __v16qi result = (__v16qi)__A; + result[__N & 0xf] = __D; + return (__m128i)result; +} + +extern __inline __m128i + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_insert_epi32(__m128i const __A, int const __D, int const __N) { + __v4si result = (__v4si)__A; + result[__N & 3] = __D; + return (__m128i)result; +} + +extern __inline __m128i + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_insert_epi64(__m128i const __A, long long const __D, int const __N) { + __v2di result = (__v2di)__A; + result[__N & 1] = __D; + return (__m128i)result; +} + #else #include_next #endif /* defined(__linux__) && defined(__ppc64__) */ diff --git a/lib/include/uintrintrin.h b/lib/include/uintrintrin.h new file mode 100644 index 0000000000..78aa8779c3 --- /dev/null +++ b/lib/include/uintrintrin.h @@ -0,0 +1,150 @@ +/*===------------------ uintrintrin.h - UINTR intrinsics -------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86GPRINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __UINTRINTRIN_H +#define __UINTRINTRIN_H + +/* Define the default attributes for the functions in this file */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("uintr"))) + +#ifdef __x86_64__ + +/// Clears the user interrupt flag (UIF). Its effect takes place immediately: a +/// user interrupt cannot be delivered on the instruction boundary following +/// CLUI. Can be executed only if CR4.UINT = 1, the logical processor is in +/// 64-bit mode, and software is not executing inside an enclave; otherwise, +/// each causes an invalid-opcode exception. Causes a transactional abort if +/// executed inside a transactional region; the abort loads EAX as it would +/// had it been due to an execution of CLI. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the CLUI instruction. +/// +/// \operation +/// UIF := 0 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_clui (void) +{ + __builtin_ia32_clui(); +} + +/// Sets the user interrupt flag (UIF). Its effect takes place immediately; a +/// user interrupt may be delivered on the instruction boundary following +/// STUI. Can be executed only if CR4.UINT = 1, the logical processor is in +/// 64-bit mode, and software is not executing inside an enclave; otherwise, +/// each causes an invalid-opcode exception. Causes a transactional abort if +/// executed inside a transactional region; the abort loads EAX as it would +/// had it been due to an execution of STI. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the STUI instruction. +/// +/// \operation +/// UIF := 1 +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_stui (void) +{ + __builtin_ia32_stui(); +} + +/// Get the current value of the user interrupt flag (UIF). Can be executed +/// regardless of CPL and inside a transactional region. Can be executed only +/// if CR4.UINT = 1, the logical processor is in 64-bit mode, and software is +/// not executing inside an enclave; otherwise, it causes an invalid-opcode +/// exception. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the TESTUI instruction. +/// +/// \returns The current value of the user interrupt flag (UIF). +/// +/// \operation +/// CF := UIF +/// ZF := 0 +/// AF := 0 +/// OF := 0 +/// PF := 0 +/// SF := 0 +/// dst := CF +/// \endoperation +static __inline__ unsigned char __DEFAULT_FN_ATTRS +_testui (void) +{ + return __builtin_ia32_testui(); +} + +/// Send interprocessor user interrupt. Can be executed only if +/// CR4.UINT = IA32_UINT_TT[0] = 1, the logical processor is in 64-bit mode, +/// and software is not executing inside an enclave; otherwise, it causes an +/// invalid-opcode exception. May be executed at any privilege level, all of +/// its memory accesses are performed with supervisor privilege. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the SENDUIPI instruction +/// +/// \param __a +/// Index of user-interrupt target table entry in user-interrupt target +/// table. +/// +/// \operation +/// IF __a > UITTSZ +/// GP (0) +/// FI +/// tempUITTE := MEM[UITTADDR + (a<<4)] +/// // tempUITTE must be valid, and can't have any reserved bit set +/// IF (tempUITTE.V == 0 OR tempUITTE[7:1] != 0) +/// GP (0) +/// FI +/// tempUPID := MEM[tempUITTE.UPIDADDR] // under lock +/// // tempUPID can't have any reserved bit set +/// IF (tempUPID[15:2] != 0 OR tempUPID[31:24] != 0) +/// GP (0) // release lock +/// FI +/// tempUPID.PIR[tempUITTE.UV] := 1; +/// IF (tempUPID.SN == 0 AND tempUPID.ON == 0) +/// tempUPID.ON := 1 +/// sendNotify := 1 +/// ELSE +/// sendNotify := 0 +/// FI +/// MEM[tempUITTE.UPIDADDR] := tempUPID // release lock +/// IF sendNotify == 1 +/// IF IA32_APIC_BASE[10] == 1 // local APIC is in x2APIC mode +/// // send ordinary IPI with vector tempUPID.NV to 32-bit physical APIC +/// // ID tempUPID.NDST +/// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST) +/// ELSE +/// // send ordinary IPI with vector tempUPID.NV to 8-bit physical APIC +/// // ID tempUPID.NDST[15:8] +/// SendOrdinaryIPI(tempUPID.NV, tempUPID.NDST[15:8]) +/// FI +/// FI +/// \endoperation +static __inline__ void __DEFAULT_FN_ATTRS +_senduipi (unsigned long long __a) +{ + __builtin_ia32_senduipi(__a); +} + +#endif /* __x86_64__ */ + +#undef __DEFAULT_FN_ATTRS + +#endif /* __UINTRINTRIN_H */ diff --git a/lib/include/wasm_simd128.h b/lib/include/wasm_simd128.h index b78123834b..ac88516ac9 100644 --- a/lib/include/wasm_simd128.h +++ b/lib/include/wasm_simd128.h @@ -18,8 +18,7 @@ typedef int32_t v128_t __attribute__((__vector_size__(16), __aligned__(16))); // Internal types determined by clang builtin definitions typedef int32_t __v128_u __attribute__((__vector_size__(16), __aligned__(1))); -typedef char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); -typedef signed char __s8x16 +typedef signed char __i8x16 __attribute__((__vector_size__(16), __aligned__(16))); typedef unsigned char __u8x16 __attribute__((__vector_size__(16), __aligned__(16))); @@ -35,6 +34,13 @@ typedef unsigned long long __u64x2 typedef float __f32x4 __attribute__((__vector_size__(16), __aligned__(16))); typedef double __f64x2 __attribute__((__vector_size__(16), __aligned__(16))); +typedef signed char __i8x8 __attribute__((__vector_size__(8), __aligned__(8))); +typedef unsigned char __u8x8 + __attribute__((__vector_size__(8), __aligned__(8))); +typedef short __i16x4 __attribute__((__vector_size__(8), __aligned__(8))); +typedef unsigned short __u16x4 + __attribute__((__vector_size__(8), __aligned__(8))); + #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("simd128"), \ __min_vector_width__(128))) @@ -273,7 +279,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_splat(int8_t __a) { (__builtin_wasm_extract_lane_s_i8x16((__i8x16)(__a), __i)) #define wasm_u8x16_extract_lane(__a, __i) \ - (__builtin_wasm_extract_lane_u_i8x16((__i8x16)(__a), __i)) + (__builtin_wasm_extract_lane_u_i8x16((__u8x16)(__a), __i)) #define wasm_i8x16_replace_lane(__a, __i, __b) \ ((v128_t)__builtin_wasm_replace_lane_i8x16((__i8x16)(__a), __i, __b)) @@ -286,7 +292,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_splat(int16_t __a) { (__builtin_wasm_extract_lane_s_i16x8((__i16x8)(__a), __i)) #define wasm_u16x8_extract_lane(__a, __i) \ - (__builtin_wasm_extract_lane_u_i16x8((__i16x8)(__a), __i)) + (__builtin_wasm_extract_lane_u_i16x8((__u16x8)(__a), __i)) #define wasm_i16x8_replace_lane(__a, __i, __b) \ ((v128_t)__builtin_wasm_replace_lane_i16x8((__i16x8)(__a), __i, __b)) @@ -333,17 +339,17 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_f64x2_splat(double __a) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_eq(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a == (__s8x16)__b); + return (v128_t)((__i8x16)__a == (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ne(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a != (__s8x16)__b); + return (v128_t)((__i8x16)__a != (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_lt(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a < (__s8x16)__b); + return (v128_t)((__i8x16)__a < (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, @@ -353,7 +359,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_lt(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_gt(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a > (__s8x16)__b); + return (v128_t)((__i8x16)__a > (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, @@ -363,7 +369,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_gt(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_le(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a <= (__s8x16)__b); + return (v128_t)((__i8x16)__a <= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, @@ -373,7 +379,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_le(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_ge(v128_t __a, v128_t __b) { - return (v128_t)((__s8x16)__a >= (__s8x16)__b); + return (v128_t)((__i8x16)__a >= (__i8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_ge(v128_t __a, @@ -595,7 +601,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shl(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_shr(v128_t __a, int32_t __b) { - return (v128_t)((__s8x16)__a >> __b); + return (v128_t)((__i8x16)__a >> __b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_shr(v128_t __a, @@ -616,8 +622,8 @@ wasm_i8x16_add_saturate(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_add_saturate(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_saturate_u_i8x16((__i8x16)__a, - (__i8x16)__b); + return (v128_t)__builtin_wasm_add_saturate_u_i8x16((__u8x16)__a, + (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_sub(v128_t __a, @@ -633,8 +639,8 @@ wasm_i8x16_sub_saturate(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_sub_saturate(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_saturate_u_i8x16((__i8x16)__a, - (__i8x16)__b); + return (v128_t)__builtin_wasm_sub_saturate_u_i8x16((__u8x16)__a, + (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, @@ -644,7 +650,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_min(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_wasm_min_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, @@ -654,12 +660,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i8x16_max(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_wasm_max_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_avgr(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_avgr_u_i8x16((__i8x16)__a, (__i8x16)__b); + return (v128_t)__builtin_wasm_avgr_u_i8x16((__u8x16)__a, (__u8x16)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_abs(v128_t __a) { @@ -706,8 +712,8 @@ wasm_i16x8_add_saturate(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_add_saturate(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_add_saturate_u_i16x8((__i16x8)__a, - (__i16x8)__b); + return (v128_t)__builtin_wasm_add_saturate_u_i16x8((__u16x8)__a, + (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_sub(v128_t __a, @@ -723,8 +729,8 @@ wasm_i16x8_sub_saturate(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_sub_saturate(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_sub_saturate_u_i16x8((__i16x8)__a, - (__i16x8)__b); + return (v128_t)__builtin_wasm_sub_saturate_u_i16x8((__u16x8)__a, + (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_mul(v128_t __a, @@ -739,7 +745,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_min(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_wasm_min_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, @@ -749,12 +755,12 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_max(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_wasm_max_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_avgr(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_avgr_u_i16x8((__i16x8)__a, (__i16x8)__b); + return (v128_t)__builtin_wasm_avgr_u_i16x8((__u16x8)__a, (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_abs(v128_t __a) { @@ -810,7 +816,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_min(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_min(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_min_u_i32x4((__i32x4)__a, (__i32x4)__b); + return (v128_t)__builtin_wasm_min_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, @@ -820,7 +826,7 @@ static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_max(v128_t __a, static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u32x4_max(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_max_u_i32x4((__i32x4)__a, (__i32x4)__b); + return (v128_t)__builtin_wasm_max_u_i32x4((__u32x4)__a, (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i64x2_neg(v128_t __a) { @@ -1071,8 +1077,8 @@ wasm_i8x16_narrow_i16x8(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u8x16_narrow_i16x8(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__i16x8)__a, - (__i16x8)__b); + return (v128_t)__builtin_wasm_narrow_u_i8x16_i16x8((__u16x8)__a, + (__u16x8)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS @@ -1083,48 +1089,76 @@ wasm_i16x8_narrow_i32x4(v128_t __a, v128_t __b) { static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_u16x8_narrow_i32x4(v128_t __a, v128_t __b) { - return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__i32x4)__a, - (__i32x4)__b); + return (v128_t)__builtin_wasm_narrow_u_i16x8_i32x4((__u32x4)__a, + (__u32x4)__b); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_widen_low_i8x16(v128_t __a) { - return (v128_t)__builtin_wasm_widen_low_s_i16x8_i8x16((__i8x16)__a); + return (v128_t) __builtin_convertvector( + (__i8x8){((__i8x16)__a)[0], ((__i8x16)__a)[1], ((__i8x16)__a)[2], + ((__i8x16)__a)[3], ((__i8x16)__a)[4], ((__i8x16)__a)[5], + ((__i8x16)__a)[6], ((__i8x16)__a)[7]}, + __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_widen_high_i8x16(v128_t __a) { - return (v128_t)__builtin_wasm_widen_high_s_i16x8_i8x16((__i8x16)__a); + return (v128_t) __builtin_convertvector( + (__i8x8){((__i8x16)__a)[8], ((__i8x16)__a)[9], ((__i8x16)__a)[10], + ((__i8x16)__a)[11], ((__i8x16)__a)[12], ((__i8x16)__a)[13], + ((__i8x16)__a)[14], ((__i8x16)__a)[15]}, + __i16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_widen_low_u8x16(v128_t __a) { - return (v128_t)__builtin_wasm_widen_low_u_i16x8_i8x16((__i8x16)__a); + return (v128_t) __builtin_convertvector( + (__u8x8){((__u8x16)__a)[0], ((__u8x16)__a)[1], ((__u8x16)__a)[2], + ((__u8x16)__a)[3], ((__u8x16)__a)[4], ((__u8x16)__a)[5], + ((__u8x16)__a)[6], ((__u8x16)__a)[7]}, + __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i16x8_widen_high_u8x16(v128_t __a) { - return (v128_t)__builtin_wasm_widen_high_u_i16x8_i8x16((__i8x16)__a); + return (v128_t) __builtin_convertvector( + (__u8x8){((__u8x16)__a)[8], ((__u8x16)__a)[9], ((__u8x16)__a)[10], + ((__u8x16)__a)[11], ((__u8x16)__a)[12], ((__u8x16)__a)[13], + ((__u8x16)__a)[14], ((__u8x16)__a)[15]}, + __u16x8); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_widen_low_i16x8(v128_t __a) { - return (v128_t)__builtin_wasm_widen_low_s_i32x4_i16x8((__i16x8)__a); + return (v128_t) __builtin_convertvector( + (__i16x4){((__i16x8)__a)[0], ((__i16x8)__a)[1], ((__i16x8)__a)[2], + ((__i16x8)__a)[3]}, + __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_widen_high_i16x8(v128_t __a) { - return (v128_t)__builtin_wasm_widen_high_s_i32x4_i16x8((__i16x8)__a); + return (v128_t) __builtin_convertvector( + (__i16x4){((__i16x8)__a)[4], ((__i16x8)__a)[5], ((__i16x8)__a)[6], + ((__i16x8)__a)[7]}, + __i32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_widen_low_u16x8(v128_t __a) { - return (v128_t)__builtin_wasm_widen_low_u_i32x4_i16x8((__i16x8)__a); + return (v128_t) __builtin_convertvector( + (__u16x4){((__u16x8)__a)[0], ((__u16x8)__a)[1], ((__u16x8)__a)[2], + ((__u16x8)__a)[3]}, + __u32x4); } static __inline__ v128_t __DEFAULT_FN_ATTRS wasm_i32x4_widen_high_u16x8(v128_t __a) { - return (v128_t)__builtin_wasm_widen_high_u_i32x4_i16x8((__i16x8)__a); + return (v128_t) __builtin_convertvector( + (__u16x4){((__u16x8)__a)[4], ((__u16x8)__a)[5], ((__u16x8)__a)[6], + ((__u16x8)__a)[7]}, + __u32x4); } // Undefine helper macros diff --git a/lib/include/x86gprintrin.h b/lib/include/x86gprintrin.h new file mode 100644 index 0000000000..1fc6cab4b2 --- /dev/null +++ b/lib/include/x86gprintrin.h @@ -0,0 +1,23 @@ +/*===--------------- x86gprintrin.h - X86 GPR intrinsics ------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __X86GPRINTRIN_H +#define __X86GPRINTRIN_H + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__HRESET__) +#include +#endif + +#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \ + defined(__UINTR__) +#include +#endif + +#endif /* __X86GPRINTRIN_H */ diff --git a/lib/libcxx/include/__availability b/lib/libcxx/include/__availability new file mode 100644 index 0000000000..db2267c8eb --- /dev/null +++ b/lib/libcxx/include/__availability @@ -0,0 +1,206 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___AVAILABILITY +#define _LIBCPP___AVAILABILITY + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +// Libc++ is shipped by various vendors. In particular, it is used as a system +// library on macOS, iOS and other Apple platforms. In order for users to be +// able to compile a binary that is intended to be deployed to an older version +// of a platform, Clang provides availability attributes [1]. These attributes +// can be placed on declarations and are used to describe the life cycle of a +// symbol in the library. +// +// The main goal is to ensure a compile-time error if a symbol that hasn't been +// introduced in a previously released library is used in a program that targets +// that previously released library. Normally, this would be a load-time error +// when one tries to launch the program against the older library. +// +// For example, the filesystem library was introduced in the dylib in macOS 10.15. +// If a user compiles on a macOS 10.15 host but targets macOS 10.13 with their +// program, the compiler would normally not complain (because the required +// declarations are in the headers), but the dynamic loader would fail to find +// the symbols when actually trying to launch the program on macOS 10.13. To +// turn this into a compile-time issue instead, declarations are annotated with +// when they were introduced, and the compiler can produce a diagnostic if the +// program references something that isn't available on the deployment target. +// +// This mechanism is general in nature, and any vendor can add their markup to +// the library (see below). Whenever a new feature is added that requires support +// in the shared library, a macro should be added below to mark this feature +// as unavailable. When vendors decide to ship the feature as part of their +// shared library, they can update the markup appropriately. +// +// Note that this mechanism is disabled by default in the "upstream" libc++. +// Availability annotations are only meaningful when shipping libc++ inside +// a platform (i.e. as a system library), and so vendors that want them should +// turn those annotations on at CMake configuration time. +// +// [1]: https://clang.llvm.org/docs/AttributeReference.html#availability + + +// For backwards compatibility, allow users to define _LIBCPP_DISABLE_AVAILABILITY +// for a while. +#if defined(_LIBCPP_DISABLE_AVAILABILITY) +# if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif +#endif + +// Availability markup is disabled when building the library, or when the compiler +// doesn't support the proper attributes. +#if defined(_LIBCPP_BUILDING_LIBRARY) || \ + defined(_LIBCXXABI_BUILDING_LIBRARY) || \ + !__has_feature(attribute_availability_with_strict) || \ + !__has_feature(attribute_availability_in_templates) || \ + !__has_extension(pragma_clang_attribute_external_declaration) +# if !defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) +# define _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS +# endif +#endif + +#if defined(_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS) + + // This controls the availability of std::shared_mutex and std::shared_timed_mutex, + // which were added to the dylib later. +# define _LIBCPP_AVAILABILITY_SHARED_MUTEX + + // These macros control the availability of std::bad_optional_access and + // other exception types. These were put in the shared library to prevent + // code bloat from every user program defining the vtable for these exception + // types. +# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS +# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST + + // This controls the availability of std::uncaught_exceptions(). +# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS + + // This controls the availability of the sized version of ::operator delete, + // which was added to the dylib later. +# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE + + // This controls the availability of the std::future_error exception. +# define _LIBCPP_AVAILABILITY_FUTURE_ERROR + + // This controls the availability of std::type_info's vtable. + // I can't imagine how using std::type_info can work at all if + // this isn't supported. +# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE + + // This controls the availability of std::locale::category members + // (e.g. std::locale::collate), which are defined in the dylib. +# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY + + // This controls the availability of atomic operations on std::shared_ptr + // (e.g. `std::atomic_store(std::shared_ptr)`), which require a shared + // lock table located in the dylib. +# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR + + // These macros control the availability of all parts of that + // depend on something in the dylib. +# define _LIBCPP_AVAILABILITY_FILESYSTEM +# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH +# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP + + // This controls the availability of std::to_chars. +# define _LIBCPP_AVAILABILITY_TO_CHARS + + // This controls the availability of the C++20 synchronization library, + // which requires shared library support for various operations + // (see libcxx/src/atomic.cpp). +# define _LIBCPP_AVAILABILITY_SYNC + +#elif defined(__APPLE__) + +# define _LIBCPP_AVAILABILITY_SHARED_MUTEX \ + __attribute__((availability(macosx,strict,introduced=10.12))) \ + __attribute__((availability(ios,strict,introduced=10.0))) \ + __attribute__((availability(tvos,strict,introduced=10.0))) \ + __attribute__((availability(watchos,strict,introduced=3.0))) +# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ + __attribute__((availability(macosx,strict,introduced=10.13))) \ + __attribute__((availability(ios,strict,introduced=11.0))) \ + __attribute__((availability(tvos,strict,introduced=11.0))) \ + __attribute__((availability(watchos,strict,introduced=4.0))) +# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ + _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ + _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ + __attribute__((availability(macosx,strict,introduced=10.12))) \ + __attribute__((availability(ios,strict,introduced=10.0))) \ + __attribute__((availability(tvos,strict,introduced=10.0))) \ + __attribute__((availability(watchos,strict,introduced=3.0))) +# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ + __attribute__((availability(macosx,strict,introduced=10.12))) \ + __attribute__((availability(ios,strict,introduced=10.0))) \ + __attribute__((availability(tvos,strict,introduced=10.0))) \ + __attribute__((availability(watchos,strict,introduced=3.0))) +# define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ + __attribute__((availability(ios,strict,introduced=6.0))) +# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ + __attribute__((availability(macosx,strict,introduced=10.9))) \ + __attribute__((availability(ios,strict,introduced=7.0))) +# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ + __attribute__((availability(macosx,strict,introduced=10.9))) \ + __attribute__((availability(ios,strict,introduced=7.0))) +# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ + __attribute__((availability(macosx,strict,introduced=10.9))) \ + __attribute__((availability(ios,strict,introduced=7.0))) +# define _LIBCPP_AVAILABILITY_FILESYSTEM \ + __attribute__((availability(macosx,strict,introduced=10.15))) \ + __attribute__((availability(ios,strict,introduced=13.0))) \ + __attribute__((availability(tvos,strict,introduced=13.0))) \ + __attribute__((availability(watchos,strict,introduced=6.0))) +# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH \ + _Pragma("clang attribute push(__attribute__((availability(macosx,strict,introduced=10.15))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") +# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") +# define _LIBCPP_AVAILABILITY_TO_CHARS \ + _LIBCPP_AVAILABILITY_FILESYSTEM +# define _LIBCPP_AVAILABILITY_SYNC \ + __attribute__((unavailable)) + +#else + +// ...New vendors can add availability markup here... + +# error "It looks like you're trying to enable vendor availability markup, but you haven't defined the corresponding macros yet!" + +#endif + +// Define availability attributes that depend on _LIBCPP_NO_EXCEPTIONS. +// Those are defined in terms of the availability attributes above, and +// should not be vendor-specific. +#if defined(_LIBCPP_NO_EXCEPTIONS) +# define _LIBCPP_AVAILABILITY_FUTURE +# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST +# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS +#else +# define _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_AVAILABILITY_FUTURE_ERROR +# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_ANY_CAST +# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS +# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS +#endif + +#endif // _LIBCPP___AVAILABILITY diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference index 4a2b82064b..9cfb4b84e6 100644 --- a/lib/libcxx/include/__bit_reference +++ b/lib/libcxx/include/__bit_reference @@ -11,7 +11,7 @@ #define _LIBCPP___BIT_REFERENCE #include <__config> -#include +#include <__bits> #include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -239,8 +239,8 @@ __bit_iterator<_Cp, _IsConst> find(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { if (static_cast(__value_)) - return __find_bool_true(__first, static_cast(__last - __first)); - return __find_bool_false(__first, static_cast(__last - __first)); + return _VSTD::__find_bool_true(__first, static_cast(__last - __first)); + return _VSTD::__find_bool_false(__first, static_cast(__last - __first)); } // count @@ -313,8 +313,8 @@ typename __bit_iterator<_Cp, _IsConst>::difference_type count(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, const _Tp& __value_) { if (static_cast(__value_)) - return __count_bool_true(__first, static_cast(__last - __first)); - return __count_bool_false(__first, static_cast(__last - __first)); + return _VSTD::__count_bool_true(__first, static_cast(__last - __first)); + return _VSTD::__count_bool_false(__first, static_cast(__last - __first)); } // fill_n @@ -387,9 +387,9 @@ fill_n(__bit_iterator<_Cp, false> __first, typename _Cp::size_type __n, bool __v if (__n > 0) { if (__value_) - __fill_n_true(__first, __n); + _VSTD::__fill_n_true(__first, __n); else - __fill_n_false(__first, __n); + _VSTD::__fill_n_false(__first, __n); } } @@ -538,8 +538,8 @@ __bit_iterator<_Cp, false> copy(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { if (__first.__ctz_ == __result.__ctz_) - return __copy_aligned(__first, __last, __result); - return __copy_unaligned(__first, __last, __result); + return _VSTD::__copy_aligned(__first, __last, __result); + return _VSTD::__copy_unaligned(__first, __last, __result); } // copy_backward @@ -685,8 +685,8 @@ __bit_iterator<_Cp, false> copy_backward(__bit_iterator<_Cp, _IsConst> __first, __bit_iterator<_Cp, _IsConst> __last, __bit_iterator<_Cp, false> __result) { if (__last.__ctz_ == __result.__ctz_) - return __copy_backward_aligned(__first, __last, __result); - return __copy_backward_unaligned(__first, __last, __result); + return _VSTD::__copy_backward_aligned(__first, __last, __result); + return _VSTD::__copy_backward_unaligned(__first, __last, __result); } // move @@ -868,8 +868,8 @@ swap_ranges(__bit_iterator<__C1, false> __first1, __bit_iterator<__C1, false> __ __bit_iterator<__C2, false> __first2) { if (__first1.__ctz_ == __first2.__ctz_) - return __swap_ranges_aligned(__first1, __last1, __first2); - return __swap_ranges_unaligned(__first1, __last1, __first2); + return _VSTD::__swap_ranges_aligned(__first1, __last1, __first2); + return _VSTD::__swap_ranges_unaligned(__first1, __last1, __first2); } // rotate @@ -1083,8 +1083,8 @@ bool equal(__bit_iterator<_Cp, _IC1> __first1, __bit_iterator<_Cp, _IC1> __last1, __bit_iterator<_Cp, _IC2> __first2) { if (__first1.__ctz_ == __first2.__ctz_) - return __equal_aligned(__first1, __last1, __first2); - return __equal_unaligned(__first1, __last1, __first2); + return _VSTD::__equal_aligned(__first1, __last1, __first2); + return _VSTD::__equal_unaligned(__first1, __last1, __first2); } template + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + + +_LIBCPP_BEGIN_NAMESPACE_STD + +#ifndef _LIBCPP_COMPILER_MSVC + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_ctz(unsigned __x) _NOEXCEPT { return __builtin_ctz(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_ctz(unsigned long __x) _NOEXCEPT { return __builtin_ctzl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_ctz(unsigned long long __x) _NOEXCEPT { return __builtin_ctzll(__x); } + + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_clz(unsigned __x) _NOEXCEPT { return __builtin_clz(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_clz(unsigned long __x) _NOEXCEPT { return __builtin_clzl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_clz(unsigned long long __x) _NOEXCEPT { return __builtin_clzll(__x); } + + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_popcount(unsigned __x) _NOEXCEPT { return __builtin_popcount(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_popcount(unsigned long __x) _NOEXCEPT { return __builtin_popcountl(__x); } + +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR +int __libcpp_popcount(unsigned long long __x) _NOEXCEPT { return __builtin_popcountll(__x); } + +#else // _LIBCPP_COMPILER_MSVC + +// Precondition: __x != 0 +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_ctz(unsigned __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + static_assert(sizeof(unsigned long) == 4, ""); + unsigned long __where; + if (_BitScanForward(&__where, __x)) + return static_cast(__where); + return 32; +} + +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_ctz(unsigned long __x) { + static_assert(sizeof(unsigned long) == sizeof(unsigned), ""); + return __ctz(static_cast(__x)); +} + +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_ctz(unsigned long long __x) { + unsigned long __where; +#if defined(_LIBCPP_HAS_BITSCAN64) + (defined(_M_AMD64) || defined(__x86_64__)) + if (_BitScanForward64(&__where, __x)) + return static_cast(__where); +#else + // Win32 doesn't have _BitScanForward64 so emulate it with two 32 bit calls. + if (_BitScanForward(&__where, static_cast(__x))) + return static_cast(__where); + if (_BitScanForward(&__where, static_cast(__x >> 32))) + return static_cast(__where + 32); +#endif + return 64; +} + +// Precondition: __x != 0 +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_clz(unsigned __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + static_assert(sizeof(unsigned long) == 4, ""); + unsigned long __where; + if (_BitScanReverse(&__where, __x)) + return static_cast(31 - __where); + return 32; // Undefined Behavior. +} + +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_clz(unsigned long __x) { + static_assert(sizeof(unsigned) == sizeof(unsigned long), ""); + return __libcpp_clz(static_cast(__x)); +} + +inline _LIBCPP_INLINE_VISIBILITY +int __libcpp_clz(unsigned long long __x) { + unsigned long __where; +#if defined(_LIBCPP_HAS_BITSCAN64) + if (_BitScanReverse64(&__where, __x)) + return static_cast(63 - __where); +#else + // Win32 doesn't have _BitScanReverse64 so emulate it with two 32 bit calls. + if (_BitScanReverse(&__where, static_cast(__x >> 32))) + return static_cast(63 - (__where + 32)); + if (_BitScanReverse(&__where, static_cast(__x))) + return static_cast(63 - __where); +#endif + return 64; // Undefined Behavior. +} + +inline _LIBCPP_INLINE_VISIBILITY int __libcpp_popcount(unsigned __x) { + static_assert(sizeof(unsigned) == 4, ""); + return __popcnt(__x); +} + +inline _LIBCPP_INLINE_VISIBILITY int __libcpp_popcount(unsigned long __x) { + static_assert(sizeof(unsigned long) == 4, ""); + return __popcnt(__x); +} + +inline _LIBCPP_INLINE_VISIBILITY int __libcpp_popcount(unsigned long long __x) { + static_assert(sizeof(unsigned long long) == 8, ""); + return __popcnt64(__x); +} + +#endif // _LIBCPP_COMPILER_MSVC + +_LIBCPP_END_NAMESPACE_STD + +_LIBCPP_POP_MACROS + +#endif // _LIBCPP__BITS diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config index 575147cead..a3838c89e8 100644 --- a/lib/libcxx/include/__config +++ b/lib/libcxx/include/__config @@ -32,13 +32,13 @@ # define _GNUC_VER_NEW 0 #endif -#define _LIBCPP_VERSION 11000 +#define _LIBCPP_VERSION 12000 #ifndef _LIBCPP_ABI_VERSION # define _LIBCPP_ABI_VERSION 1 #endif -#ifndef __STDC_HOSTED__ +#if __STDC_HOSTED__ == 0 # define _LIBCPP_FREESTANDING #endif @@ -49,8 +49,10 @@ # define _LIBCPP_STD_VER 14 # elif __cplusplus <= 201703L # define _LIBCPP_STD_VER 17 +# elif __cplusplus <= 202002L +# define _LIBCPP_STD_VER 20 # else -# define _LIBCPP_STD_VER 18 // current year, or date of c++2a ratification +# define _LIBCPP_STD_VER 21 // current year, or date of c++2b ratification # endif #endif // _LIBCPP_STD_VER @@ -63,7 +65,7 @@ #elif defined(__wasm__) # define _LIBCPP_OBJECT_FORMAT_WASM 1 #else -# error Unknown object file format + // ... add new file formats here ... #endif #if defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 @@ -105,6 +107,10 @@ // Re-worked external template instantiations for std::string with a focus on // performance and fast-path inlining. # define _LIBCPP_ABI_STRING_OPTIMIZED_EXTERNAL_INSTANTIATION +// Enable clang::trivial_abi on std::unique_ptr. +# define _LIBCPP_ABI_ENABLE_UNIQUE_PTR_TRIVIAL_ABI +// Enable clang::trivial_abi on std::shared_ptr and std::weak_ptr +# define _LIBCPP_ABI_ENABLE_SHARED_PTR_TRIVIAL_ABI #elif _LIBCPP_ABI_VERSION == 1 # if !defined(_LIBCPP_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support @@ -121,9 +127,11 @@ # endif #endif -#ifdef _LIBCPP_TRIVIAL_PAIR_COPY_CTOR -#error "_LIBCPP_TRIVIAL_PAIR_COPY_CTOR" is no longer supported. \ - use _LIBCPP_DEPRECATED_ABI_DISABLE_PAIR_TRIVIAL_COPY_CTOR instead +#if defined(_LIBCPP_BUILDING_LIBRARY) || defined(_LIBCPP_ABI_UNSTABLE) || _LIBCPP_ABI_VERSION >= 2 +// Enable additional explicit instantiations of iostreams components. This +// reduces the number of weak definitions generated in programs that use +// iostreams by providing a single strong definition in the shared library. +# define _LIBCPP_ABI_ENABLE_ADDITIONAL_IOSTREAM_EXPLICIT_INSTANTIATIONS_1 #endif #define _LIBCPP_CONCAT1(_LIBCPP_X,_LIBCPP_Y) _LIBCPP_X##_LIBCPP_Y @@ -256,14 +264,14 @@ # endif // __LONG_LONG_SUPPORTED #endif // __FreeBSD__ -#ifdef __NetBSD__ +#if defined(__NetBSD__) || defined(__OpenBSD__) # include # if _BYTE_ORDER == _LITTLE_ENDIAN # define _LIBCPP_LITTLE_ENDIAN # else // _BYTE_ORDER == _LITTLE_ENDIAN # define _LIBCPP_BIG_ENDIAN # endif // _BYTE_ORDER == _LITTLE_ENDIAN -#endif // __NetBSD__ +#endif // defined(__NetBSD__) || defined(__OpenBSD__) #if defined(_WIN32) # define _LIBCPP_WIN32API @@ -304,7 +312,7 @@ # endif #endif // __sun__ -#if defined(__CloudABI__) +#if defined(__OpenBSD__) || defined(__CloudABI__) // Certain architectures provide arc4random(). Prefer using // arc4random() over /dev/{u,}random to make it possible to obtain // random data even when using sandboxing mechanisms such as chroots, @@ -344,13 +352,11 @@ # if defined(__FreeBSD__) # define _LIBCPP_HAS_ALIGNED_ALLOC # define _LIBCPP_HAS_QUICK_EXIT -# define _LIBCPP_HAS_C11_FEATURES # if __FreeBSD_version >= 1300064 || \ (__FreeBSD_version >= 1201504 && __FreeBSD_version < 1300000) # define _LIBCPP_HAS_TIMESPEC_GET # endif # elif defined(__BIONIC__) -# define _LIBCPP_HAS_C11_FEATURES # if __ANDROID_API__ >= 21 # define _LIBCPP_HAS_QUICK_EXIT # endif @@ -364,7 +370,9 @@ # define _LIBCPP_HAS_ALIGNED_ALLOC # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_TIMESPEC_GET -# define _LIBCPP_HAS_C11_FEATURES +# elif defined(__OpenBSD__) +# define _LIBCPP_HAS_ALIGNED_ALLOC +# define _LIBCPP_HAS_TIMESPEC_GET # elif defined(__linux__) # if !defined(_LIBCPP_HAS_MUSL_LIBC) # if _LIBCPP_GLIBC_PREREQ(2, 15) || defined(__BIONIC__) @@ -372,16 +380,24 @@ # endif # if _LIBCPP_GLIBC_PREREQ(2, 17) # define _LIBCPP_HAS_ALIGNED_ALLOC -# define _LIBCPP_HAS_C11_FEATURES # define _LIBCPP_HAS_TIMESPEC_GET # endif # else // defined(_LIBCPP_HAS_MUSL_LIBC) # define _LIBCPP_HAS_ALIGNED_ALLOC # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_TIMESPEC_GET -# define _LIBCPP_HAS_C11_FEATURES # endif -# endif // __linux__ +# elif defined(__APPLE__) + // timespec_get and aligned_alloc were introduced in macOS 10.15 and + // aligned releases +# if (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101500 || \ + __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ >= 130000 || \ + __ENVIRONMENT_TV_OS_VERSION_MIN_REQUIRED__ >= 130000 || \ + __ENVIRONMENT_WATCH_OS_VERSION_MIN_REQUIRED__ >= 60000) +# define _LIBCPP_HAS_ALIGNED_ALLOC +# define _LIBCPP_HAS_TIMESPEC_GET +# endif +# endif // __APPLE__ #endif #ifndef _LIBCPP_CXX03_LANG @@ -389,9 +405,7 @@ #elif defined(_LIBCPP_COMPILER_CLANG) # define _LIBCPP_ALIGNOF(_Tp) _Alignof(_Tp) #else -// This definition is potentially buggy, but it's only taken with GCC in C++03, -// which we barely support anyway. See llvm.org/PR39713 -# define _LIBCPP_ALIGNOF(_Tp) __alignof(_Tp) +# error "We don't know a correct way to implement alignof(T) in C++03 outside of Clang" #endif #define _LIBCPP_PREFERRED_ALIGNOF(_Tp) __alignof(_Tp) @@ -433,10 +447,6 @@ typedef __char32_t char32_t; # define _LIBCPP_NORETURN __attribute__ ((noreturn)) #endif -#if !(__has_feature(cxx_lambdas)) -#define _LIBCPP_HAS_NO_LAMBDAS -#endif - #if !(__has_feature(cxx_nullptr)) # if (__has_extension(cxx_nullptr) || __has_keyword(__nullptr)) && defined(_LIBCPP_ABI_ALWAYS_USE_CXX11_NULLPTR) # define nullptr __nullptr @@ -445,18 +455,6 @@ typedef __char32_t char32_t; # endif #endif -#if !(__has_feature(cxx_rvalue_references)) -#define _LIBCPP_HAS_NO_RVALUE_REFERENCES -#endif - -#if !(__has_feature(cxx_auto_type)) -#define _LIBCPP_HAS_NO_AUTO_TYPE -#endif - -#if !(__has_feature(cxx_variadic_templates)) -#define _LIBCPP_HAS_NO_VARIADICS -#endif - // Objective-C++ features (opt-in) #if __has_feature(objc_arc) #define _LIBCPP_HAS_OBJC_ARC @@ -720,7 +718,7 @@ typedef __char32_t char32_t; #endif #ifndef _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS -# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) && __has_attribute(__type_visibility__) +# if !defined(_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS) # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS __attribute__ ((__visibility__("default"))) # else # define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS @@ -754,16 +752,6 @@ typedef __char32_t char32_t; # endif #endif -#ifndef _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION -# ifdef _LIBCPP_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos. -# define _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION 2 -# else - // TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398 - // And we should consider defaulting to OFF. -# define _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION 1 -# endif -#endif - #ifndef _LIBCPP_HIDE_FROM_ABI # if _LIBCPP_HIDE_FROM_ABI_PER_TU # define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_INTERNAL_LINKAGE @@ -838,6 +826,12 @@ typedef unsigned int char32_t; # define _LIBCPP_CONSTEXPR constexpr #endif +#ifndef __cpp_consteval +# define _LIBCPP_CONSTEVAL _LIBCPP_CONSTEXPR +#else +# define _LIBCPP_CONSTEVAL consteval +#endif + #ifdef _LIBCPP_CXX03_LANG # define _LIBCPP_DEFAULT {} #else @@ -863,10 +857,6 @@ typedef unsigned int char32_t; # define _LIBCPP_EXPLICIT #endif -#if !__has_builtin(__builtin_operator_new) || !__has_builtin(__builtin_operator_delete) -#define _LIBCPP_HAS_NO_BUILTIN_OPERATOR_NEW_DELETE -#endif - #ifdef _LIBCPP_HAS_NO_STRONG_ENUMS # define _LIBCPP_DECLARE_STRONG_ENUM(x) struct _LIBCPP_TYPE_VIS x { enum __lx # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) \ @@ -880,34 +870,43 @@ typedef unsigned int char32_t; # define _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(x) #endif // _LIBCPP_HAS_NO_STRONG_ENUMS -#ifdef _LIBCPP_DEBUG -# if _LIBCPP_DEBUG == 0 -# define _LIBCPP_DEBUG_LEVEL 1 -# elif _LIBCPP_DEBUG == 1 -# define _LIBCPP_DEBUG_LEVEL 2 -# else -# error Supported values for _LIBCPP_DEBUG are 0 and 1 -# endif -# if !defined(_LIBCPP_BUILDING_LIBRARY) -# define _LIBCPP_EXTERN_TEMPLATE(...) -# endif +// _LIBCPP_DEBUG potential values: +// - undefined: No assertions. This is the default. +// - 0: Basic assertions +// - 1: Basic assertions + iterator validity checks. +#if !defined(_LIBCPP_DEBUG) +# define _LIBCPP_DEBUG_LEVEL 0 +#elif _LIBCPP_DEBUG == 0 +# define _LIBCPP_DEBUG_LEVEL 1 +#elif _LIBCPP_DEBUG == 1 +# define _LIBCPP_DEBUG_LEVEL 2 +#else +# error Supported values for _LIBCPP_DEBUG are 0 and 1 #endif -#ifndef _LIBCPP_DEBUG_LEVEL -# define _LIBCPP_DEBUG_LEVEL 0 +// _LIBCPP_DEBUG_LEVEL is always defined to one of [0, 1, 2] at this point +#if _LIBCPP_DEBUG_LEVEL >= 1 && !defined(_LIBCPP_DISABLE_EXTERN_TEMPLATE) +# define _LIBCPP_EXTERN_TEMPLATE(...) #endif #ifdef _LIBCPP_DISABLE_EXTERN_TEMPLATE -#define _LIBCPP_EXTERN_TEMPLATE(...) -#define _LIBCPP_EXTERN_TEMPLATE2(...) +# define _LIBCPP_EXTERN_TEMPLATE(...) +# define _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(...) #endif #ifndef _LIBCPP_EXTERN_TEMPLATE #define _LIBCPP_EXTERN_TEMPLATE(...) extern template __VA_ARGS__; #endif -#ifndef _LIBCPP_EXTERN_TEMPLATE2 -#define _LIBCPP_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__; +// When the Debug mode is enabled, we disable extern declarations because we +// don't want to use the functions compiled in the library, which might not +// have had the debug mode enabled when built. However, some extern declarations +// need to be used, because code correctness depends on it (several instances +// in the ). Those special declarations are declared with +// _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE, which is enabled even +// when the debug mode is enabled. +#ifndef _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE +# define _LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(...) extern template __VA_ARGS__; #endif #ifndef _LIBCPP_EXTERN_TEMPLATE_DEFINE @@ -938,6 +937,8 @@ typedef unsigned int char32_t; // We're deferring to Microsoft's STL to provide aligned new et al. We don't // have it unless the language feature test macro is defined. # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION +#elif defined(__MVS__) +# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION #endif #if defined(__APPLE__) @@ -999,6 +1000,18 @@ typedef unsigned int char32_t; # define _LIBCPP_DEPRECATED_IN_CXX17 #endif +#if _LIBCPP_STD_VER > 17 +# define _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_DEPRECATED +#else +# define _LIBCPP_DEPRECATED_IN_CXX20 +#endif + +#if !defined(_LIBCPP_NO_HAS_CHAR8_T) +# define _LIBCPP_DEPRECATED_WITH_CHAR8_T _LIBCPP_DEPRECATED +#else +# define _LIBCPP_DEPRECATED_WITH_CHAR8_T +#endif + // Macros to enter and leave a state where deprecation warnings are suppressed. #if !defined(_LIBCPP_SUPPRESS_DEPRECATED_PUSH) && \ (defined(_LIBCPP_COMPILER_CLANG) || defined(_LIBCPP_COMPILER_GCC)) @@ -1037,14 +1050,6 @@ typedef unsigned int char32_t; # define _LIBCPP_CONSTEXPR_AFTER_CXX17 #endif -#if _LIBCPP_STD_VER > 17 && \ - !defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR) && \ - !defined(_LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED) -# define _LIBCPP_CONSTEXPR_AFTER_CXX17_WITH_IS_CONSTANT_EVALUATED constexpr -#else -# define _LIBCPP_CONSTEXPR_AFTER_CXX17_WITH_IS_CONSTANT_EVALUATED -#endif - // The _LIBCPP_NODISCARD_ATTRIBUTE should only be used to define other // NODISCARD macros to the correct attribute. #if __has_cpp_attribute(nodiscard) || defined(_LIBCPP_COMPILER_MSVC) @@ -1079,12 +1084,6 @@ typedef unsigned int char32_t; # define _LIBCPP_INLINE_VAR #endif -#ifdef _LIBCPP_HAS_NO_RVALUE_REFERENCES -# define _LIBCPP_EXPLICIT_MOVE(x) _VSTD::move(x) -#else -# define _LIBCPP_EXPLICIT_MOVE(x) (x) -#endif - #ifndef _LIBCPP_CONSTEXPR_IF_NODEBUG #if defined(_LIBCPP_DEBUG) || defined(_LIBCPP_HAS_NO_CXX14_CONSTEXPR) #define _LIBCPP_CONSTEXPR_IF_NODEBUG @@ -1100,7 +1099,7 @@ typedef unsigned int char32_t; #endif #ifndef _LIBCPP_HAS_NO_ASAN -_LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( +extern "C" _LIBCPP_FUNC_VIS void __sanitizer_annotate_contiguous_container( const void *, const void *, const void *, const void *); #endif @@ -1125,11 +1124,14 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( # if defined(__FreeBSD__) || \ defined(__wasi__) || \ defined(__NetBSD__) || \ + defined(__OpenBSD__) || \ + defined(__NuttX__) || \ defined(__linux__) || \ defined(__GNU__) || \ defined(__APPLE__) || \ defined(__CloudABI__) || \ defined(__sun__) || \ + defined(__MVS__) || \ (defined(__MINGW32__) && __has_include()) # define _LIBCPP_HAS_THREAD_API_PTHREAD # elif defined(__Fuchsia__) @@ -1167,10 +1169,6 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( _LIBCPP_HAS_NO_THREADS is defined. #endif -#if defined(__STDCPP_THREADS__) && defined(_LIBCPP_HAS_NO_THREADS) -#error _LIBCPP_HAS_NO_THREADS cannot be set when __STDCPP_THREADS__ is set. -#endif - #if !defined(_LIBCPP_HAS_NO_THREADS) && !defined(__STDCPP_THREADS__) #define __STDCPP_THREADS__ 1 #endif @@ -1222,13 +1220,15 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( // Some systems do not provide gets() in their C library, for security reasons. #ifndef _LIBCPP_C_HAS_NO_GETS # if defined(_LIBCPP_MSVCRT) || \ - (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) + (defined(__FreeBSD_version) && __FreeBSD_version >= 1300043) || \ + defined(__OpenBSD__) # define _LIBCPP_C_HAS_NO_GETS # endif #endif -#if defined(__BIONIC__) || defined(__CloudABI__) || \ - defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) +#if defined(__BIONIC__) || defined(__CloudABI__) || defined(__NuttX__) || \ + defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) || \ + defined(__MVS__) || defined(__OpenBSD__) #define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE #endif @@ -1337,6 +1337,12 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #endif #endif // !defined(_LIBCPP_NODEBUG_TYPE) +#if __has_attribute(__preferred_name__) +#define _LIBCPP_PREFERRED_NAME(x) __attribute__((__preferred_name__(x))) +#else +#define _LIBCPP_PREFERRED_NAME(x) +#endif + #if defined(_LIBCPP_ABI_MICROSOFT) && \ (defined(_LIBCPP_COMPILER_MSVC) || __has_declspec_attribute(empty_bases)) # define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases) @@ -1367,120 +1373,6 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_HAS_NO_SPACESHIP_OPERATOR #endif -// Decide whether to use availability macros. -#if !defined(_LIBCPP_BUILDING_LIBRARY) && \ - !defined(_LIBCXXABI_BUILDING_LIBRARY) && \ - !defined(_LIBCPP_DISABLE_AVAILABILITY) && \ - __has_feature(attribute_availability_with_strict) && \ - __has_feature(attribute_availability_in_templates) && \ - __has_extension(pragma_clang_attribute_external_declaration) -# ifdef __APPLE__ -# define _LIBCPP_USE_AVAILABILITY_APPLE -# endif -#endif - -// Define availability macros. -#if defined(_LIBCPP_USE_AVAILABILITY_APPLE) -# define _LIBCPP_AVAILABILITY_SHARED_MUTEX \ - __attribute__((availability(macosx,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) -# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS \ - __attribute__((availability(macosx,strict,introduced=10.13))) \ - __attribute__((availability(ios,strict,introduced=11.0))) \ - __attribute__((availability(tvos,strict,introduced=11.0))) \ - __attribute__((availability(watchos,strict,introduced=4.0))) -# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS \ - _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST \ - _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS \ - __attribute__((availability(macosx,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) -# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE \ - __attribute__((availability(macosx,strict,introduced=10.12))) \ - __attribute__((availability(ios,strict,introduced=10.0))) \ - __attribute__((availability(tvos,strict,introduced=10.0))) \ - __attribute__((availability(watchos,strict,introduced=3.0))) -# define _LIBCPP_AVAILABILITY_FUTURE_ERROR \ - __attribute__((availability(ios,strict,introduced=6.0))) -# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE \ - __attribute__((availability(macosx,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) -# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY \ - __attribute__((availability(macosx,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) -# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ - __attribute__((availability(macosx,strict,introduced=10.9))) \ - __attribute__((availability(ios,strict,introduced=7.0))) -# define _LIBCPP_AVAILABILITY_FILESYSTEM \ - __attribute__((availability(macosx,strict,introduced=10.15))) \ - __attribute__((availability(ios,strict,introduced=13.0))) \ - __attribute__((availability(tvos,strict,introduced=13.0))) \ - __attribute__((availability(watchos,strict,introduced=6.0))) -# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH \ - _Pragma("clang attribute push(__attribute__((availability(macosx,strict,introduced=10.15))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ - _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") -# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") \ - _Pragma("clang attribute pop") -# define _LIBCPP_AVAILABILITY_TO_CHARS \ - _LIBCPP_AVAILABILITY_FILESYSTEM -# define _LIBCPP_AVAILABILITY_SYNC \ - __attribute__((unavailable)) -#else -# define _LIBCPP_AVAILABILITY_SHARED_MUTEX -# define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS -# define _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_BAD_ANY_CAST -# define _LIBCPP_AVAILABILITY_UNCAUGHT_EXCEPTIONS -# define _LIBCPP_AVAILABILITY_SIZED_NEW_DELETE -# define _LIBCPP_AVAILABILITY_FUTURE_ERROR -# define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE -# define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY -# define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR -# define _LIBCPP_AVAILABILITY_FILESYSTEM -# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH -# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP -# define _LIBCPP_AVAILABILITY_TO_CHARS -# define _LIBCPP_AVAILABILITY_SYNC -#endif - -// Define availability that depends on _LIBCPP_NO_EXCEPTIONS. -#ifdef _LIBCPP_NO_EXCEPTIONS -# define _LIBCPP_AVAILABILITY_FUTURE -# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST -# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS -#else -# define _LIBCPP_AVAILABILITY_FUTURE _LIBCPP_AVAILABILITY_FUTURE_ERROR -# define _LIBCPP_AVAILABILITY_THROW_BAD_ANY_CAST _LIBCPP_AVAILABILITY_BAD_ANY_CAST -# define _LIBCPP_AVAILABILITY_THROW_BAD_OPTIONAL_ACCESS _LIBCPP_AVAILABILITY_BAD_OPTIONAL_ACCESS -# define _LIBCPP_AVAILABILITY_THROW_BAD_VARIANT_ACCESS _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS -#endif - -// The stream API was dropped and re-added in the dylib shipped on macOS -// and iOS. We can only assume the dylib to provide these definitions for -// macosx >= 10.9 and ios >= 7.0. Otherwise, the definitions are available -// from the headers, but not from the dylib. Explicit instantiation -// declarations for streams exist conditionally to this; if we provide -// an explicit instantiation declaration and we try to deploy to a dylib -// that does not provide those symbols, we'll get a load-time error. -#if !defined(_LIBCPP_BUILDING_LIBRARY) && \ - ((defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ < 1090) || \ - (defined(__ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__) && \ - __ENVIRONMENT_IPHONE_OS_VERSION_MIN_REQUIRED__ < 70000)) -# define _LIBCPP_DO_NOT_ASSUME_STREAMS_EXPLICIT_INSTANTIATION_IN_DYLIB -#endif - #if defined(_LIBCPP_COMPILER_IBM) #define _LIBCPP_HAS_NO_PRAGMA_PUSH_POP_MACRO #endif @@ -1547,6 +1439,12 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_HAS_NO_FGETPOS_FSETPOS #endif +#if __has_attribute(init_priority) +# define _LIBCPP_INIT_PRIORITY_MAX __attribute__((init_priority(101))) +#else +# define _LIBCPP_INIT_PRIORITY_MAX +#endif + #endif // __cplusplus #endif // _LIBCPP_CONFIG diff --git a/lib/libcxx/include/__config_site.in b/lib/libcxx/include/__config_site.in index a6984b2eef..ec4d410bb9 100644 --- a/lib/libcxx/include/__config_site.in +++ b/lib/libcxx/include/__config_site.in @@ -26,12 +26,14 @@ #cmakedefine _LIBCPP_HAS_THREAD_API_WIN32 #cmakedefine _LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL #cmakedefine _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS +#cmakedefine _LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS #cmakedefine _LIBCPP_NO_VCRUNTIME -#ifndef _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION #cmakedefine _LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION @_LIBCPP_TYPEINFO_COMPARISON_IMPLEMENTATION@ -#endif #cmakedefine _LIBCPP_ABI_NAMESPACE @_LIBCPP_ABI_NAMESPACE@ +#cmakedefine _LIBCPP_HAS_NO_FILESYSTEM_LIBRARY #cmakedefine _LIBCPP_HAS_PARALLEL_ALGORITHMS +#cmakedefine _LIBCPP_HAS_NO_RANDOM_DEVICE +#cmakedefine _LIBCPP_HAS_NO_LOCALIZATION @_LIBCPP_ABI_DEFINES@ diff --git a/lib/libcxx/include/__debug b/lib/libcxx/include/__debug index 11367413fc..7b5bfb3f83 100644 --- a/lib/libcxx/include/__debug +++ b/lib/libcxx/include/__debug @@ -27,26 +27,21 @@ # include #endif -#if _LIBCPP_DEBUG_LEVEL >= 1 && !defined(_LIBCPP_ASSERT) -# define _LIBCPP_ASSERT(x, m) ((x) ? (void)0 : \ - _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) -#endif - -#if _LIBCPP_DEBUG_LEVEL >= 2 -#ifndef _LIBCPP_DEBUG_ASSERT -#define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(x, m) -#endif -#define _LIBCPP_DEBUG_MODE(...) __VA_ARGS__ -#endif - -#ifndef _LIBCPP_ASSERT -# define _LIBCPP_ASSERT(x, m) ((void)0) -#endif -#ifndef _LIBCPP_DEBUG_ASSERT +#if _LIBCPP_DEBUG_LEVEL == 0 # define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) +# define _LIBCPP_ASSERT_IMPL(x, m) ((void)0) +#elif _LIBCPP_DEBUG_LEVEL == 1 +# define _LIBCPP_DEBUG_ASSERT(x, m) ((void)0) +# define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) +#elif _LIBCPP_DEBUG_LEVEL == 2 +# define _LIBCPP_DEBUG_ASSERT(x, m) _LIBCPP_ASSERT(x, m) +# define _LIBCPP_ASSERT_IMPL(x, m) ((x) ? (void)0 : _VSTD::__libcpp_debug_function(_VSTD::__libcpp_debug_info(__FILE__, __LINE__, #x, m))) +#else +# error _LIBCPP_DEBUG_LEVEL must be one of 0, 1, 2 #endif -#ifndef _LIBCPP_DEBUG_MODE -#define _LIBCPP_DEBUG_MODE(...) ((void)0) + +#if !defined(_LIBCPP_ASSERT) +# define _LIBCPP_ASSERT(x, m) _LIBCPP_ASSERT_IMPL(x, m) #endif _LIBCPP_BEGIN_NAMESPACE_STD @@ -59,7 +54,7 @@ struct _LIBCPP_TEMPLATE_VIS __libcpp_debug_info { __libcpp_debug_info(const char* __f, int __l, const char* __p, const char* __m) : __file_(__f), __line_(__l), __pred_(__p), __msg_(__m) {} - _LIBCPP_FUNC_VIS std::string what() const; + _LIBCPP_FUNC_VIS string what() const; const char* __file_; int __line_; @@ -83,7 +78,7 @@ void __libcpp_abort_debug_function(__libcpp_debug_info const&); _LIBCPP_FUNC_VIS bool __libcpp_set_debug_function(__libcpp_debug_function_type __func); -#if _LIBCPP_DEBUG_LEVEL >= 2 || defined(_LIBCPP_BUILDING_LIBRARY) +#if _LIBCPP_DEBUG_LEVEL == 2 || defined(_LIBCPP_BUILDING_LIBRARY) struct _LIBCPP_TYPE_VIS __c_node; @@ -226,7 +221,7 @@ public: template _LIBCPP_INLINE_VISIBILITY static __c_node* __create_C_node(void *__mem, void *__c, __c_node *__next) { - return ::new(__mem) _C_node<_Cont>(__c, __next); + return ::new (__mem) _C_node<_Cont>(__c, __next); } template @@ -271,7 +266,7 @@ _LIBCPP_FUNC_VIS __libcpp_db* __get_db(); _LIBCPP_FUNC_VIS const __libcpp_db* __get_const_db(); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 || defined(_LIBCPP_BUILDING_LIBRARY) +#endif // _LIBCPP_DEBUG_LEVEL == 2 || defined(_LIBCPP_BUILDING_LIBRARY) _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__functional_03 b/lib/libcxx/include/__functional_03 index bf86428dea..9616480611 100644 --- a/lib/libcxx/include/__functional_03 +++ b/lib/libcxx/include/__functional_03 @@ -126,7 +126,7 @@ __func<_Fp, _Alloc, _Rp()>::__clone() const _Ap __a(__f_.second()); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) __func(__f_.first(), _Alloc(__a)); + ::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a)); return __hold.release(); } @@ -134,7 +134,7 @@ template void __func<_Fp, _Alloc, _Rp()>::__clone(__base<_Rp()>* __p) const { - ::new (__p) __func(__f_.first(), __f_.second()); + ::new ((void*)__p) __func(__f_.first(), __f_.second()); } template @@ -212,7 +212,7 @@ __func<_Fp, _Alloc, _Rp(_A0)>::__clone() const _Ap __a(__f_.second()); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) __func(__f_.first(), _Alloc(__a)); + ::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a)); return __hold.release(); } @@ -220,7 +220,7 @@ template void __func<_Fp, _Alloc, _Rp(_A0)>::__clone(__base<_Rp(_A0)>* __p) const { - ::new (__p) __func(__f_.first(), __f_.second()); + ::new ((void*)__p) __func(__f_.first(), __f_.second()); } template @@ -298,7 +298,7 @@ __func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone() const _Ap __a(__f_.second()); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) __func(__f_.first(), _Alloc(__a)); + ::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a)); return __hold.release(); } @@ -306,7 +306,7 @@ template void __func<_Fp, _Alloc, _Rp(_A0, _A1)>::__clone(__base<_Rp(_A0, _A1)>* __p) const { - ::new (__p) __func(__f_.first(), __f_.second()); + ::new ((void*)__p) __func(__f_.first(), __f_.second()); } template @@ -384,7 +384,7 @@ __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone() const _Ap __a(__f_.second()); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__func, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) __func(__f_.first(), _Alloc(__a)); + ::new ((void*)__hold.get()) __func(__f_.first(), _Alloc(__a)); return __hold.release(); } @@ -392,7 +392,7 @@ template void __func<_Fp, _Alloc, _Rp(_A0, _A1, _A2)>::__clone(__base<_Rp(_A0, _A1, _A2)>* __p) const { - ::new (__p) __func(__f_.first(), __f_.second()); + ::new ((void*)__p) __func(__f_.first(), __f_.second()); } template @@ -554,7 +554,7 @@ function<_Rp()>::function(_Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f); + ::new ((void*)__f_) _FF(__f); } else { @@ -562,7 +562,7 @@ function<_Rp()>::function(_Fp __f, _Ap __a; typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, allocator<_Fp>(__a)); + ::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a)); __f_ = __hold.release(); } } @@ -581,7 +581,7 @@ function<_Rp()>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f, __a0); + ::new ((void*)__f_) _FF(__f, __a0); } else { @@ -589,7 +589,7 @@ function<_Rp()>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, _Ap __a(__a0); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, _Alloc(__a)); + ::new ((void*)__hold.get()) _FF(__f, _Alloc(__a)); __f_ = __hold.release(); } } @@ -834,7 +834,7 @@ function<_Rp(_A0)>::function(_Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f); + ::new ((void*)__f_) _FF(__f); } else { @@ -842,7 +842,7 @@ function<_Rp(_A0)>::function(_Fp __f, _Ap __a; typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, allocator<_Fp>(__a)); + ::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a)); __f_ = __hold.release(); } } @@ -861,7 +861,7 @@ function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f, __a0); + ::new ((void*)__f_) _FF(__f, __a0); } else { @@ -869,7 +869,7 @@ function<_Rp(_A0)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, _Ap __a(__a0); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, _Alloc(__a)); + ::new ((void*)__hold.get()) _FF(__f, _Alloc(__a)); __f_ = __hold.release(); } } @@ -1114,7 +1114,7 @@ function<_Rp(_A0, _A1)>::function(_Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f); + ::new ((void*)__f_) _FF(__f); } else { @@ -1122,7 +1122,7 @@ function<_Rp(_A0, _A1)>::function(_Fp __f, _Ap __a; typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, allocator<_Fp>(__a)); + ::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a)); __f_ = __hold.release(); } } @@ -1141,7 +1141,7 @@ function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f, __a0); + ::new ((void*)__f_) _FF(__f, __a0); } else { @@ -1149,7 +1149,7 @@ function<_Rp(_A0, _A1)>::function(allocator_arg_t, const _Alloc& __a0, _Fp __f, _Ap __a(__a0); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, _Alloc(__a)); + ::new ((void*)__hold.get()) _FF(__f, _Alloc(__a)); __f_ = __hold.release(); } } @@ -1394,7 +1394,7 @@ function<_Rp(_A0, _A1, _A2)>::function(_Fp __f, if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f); + ::new ((void*)__f_) _FF(__f); } else { @@ -1402,7 +1402,7 @@ function<_Rp(_A0, _A1, _A2)>::function(_Fp __f, _Ap __a; typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, allocator<_Fp>(__a)); + ::new ((void*)__hold.get()) _FF(__f, allocator<_Fp>(__a)); __f_ = __hold.release(); } } @@ -1421,7 +1421,7 @@ function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc& __a0, _Fp if (sizeof(_FF) <= sizeof(__buf_)) { __f_ = (__base*)&__buf_; - ::new (__f_) _FF(__f, __a0); + ::new ((void*)__f_) _FF(__f, __a0); } else { @@ -1429,7 +1429,7 @@ function<_Rp(_A0, _A1, _A2)>::function(allocator_arg_t, const _Alloc& __a0, _Fp _Ap __a(__a0); typedef __allocator_destructor<_Ap> _Dp; unique_ptr<__base, _Dp> __hold(__a.allocate(1), _Dp(__a, 1)); - ::new (__hold.get()) _FF(__f, _Alloc(__a)); + ::new ((void*)__hold.get()) _FF(__f, _Alloc(__a)); __f_ = __hold.release(); } } diff --git a/lib/libcxx/include/__functional_base b/lib/libcxx/include/__functional_base index f591bf5a9d..1c02e960d5 100644 --- a/lib/libcxx/include/__functional_base +++ b/lib/libcxx/include/__functional_base @@ -298,7 +298,7 @@ struct __weak_result_type<_Rp (_Cp::*)(_A1, _A2, _A3...) const volatile> template struct __invoke_return { - typedef decltype(__invoke(_VSTD::declval<_Tp>(), _VSTD::declval<_Args>()...)) type; + typedef decltype(_VSTD::__invoke(declval<_Tp>(), declval<_Args>()...)) type; }; #else // defined(_LIBCPP_CXX03_LANG) @@ -308,64 +308,64 @@ struct __invoke_return #endif // !defined(_LIBCPP_CXX03_LANG) -template +template ::value> struct __invoke_void_return_wrapper { #ifndef _LIBCPP_CXX03_LANG template static _Ret __call(_Args&&... __args) { - return __invoke(_VSTD::forward<_Args>(__args)...); + return _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); } #else template static _Ret __call(_Fn __f) { - return __invoke(__f); + return _VSTD::__invoke(__f); } template static _Ret __call(_Fn __f, _A0& __a0) { - return __invoke(__f, __a0); + return _VSTD::__invoke(__f, __a0); } template static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1) { - return __invoke(__f, __a0, __a1); + return _VSTD::__invoke(__f, __a0, __a1); } template static _Ret __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2){ - return __invoke(__f, __a0, __a1, __a2); + return _VSTD::__invoke(__f, __a0, __a1, __a2); } #endif }; -template <> -struct __invoke_void_return_wrapper +template +struct __invoke_void_return_wrapper<_Ret, true> { #ifndef _LIBCPP_CXX03_LANG template static void __call(_Args&&... __args) { - __invoke(_VSTD::forward<_Args>(__args)...); + _VSTD::__invoke(_VSTD::forward<_Args>(__args)...); } #else template static void __call(_Fn __f) { - __invoke(__f); + _VSTD::__invoke(__f); } template static void __call(_Fn __f, _A0& __a0) { - __invoke(__f, __a0); + _VSTD::__invoke(__f, __a0); } template static void __call(_Fn __f, _A0& __a0, _A1& __a1) { - __invoke(__f, __a0, __a1); + _VSTD::__invoke(__f, __a0, __a1); } template static void __call(_Fn __f, _A0& __a0, _A1& __a1, _A2& __a2) { - __invoke(__f, __a0, __a1, __a2); + _VSTD::__invoke(__f, __a0, __a1, __a2); } #endif }; @@ -382,135 +382,138 @@ private: public: // construct/copy/destroy - _LIBCPP_INLINE_VISIBILITY reference_wrapper(type& __f) _NOEXCEPT + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + reference_wrapper(type& __f) _NOEXCEPT : __f_(_VSTD::addressof(__f)) {} #ifndef _LIBCPP_CXX03_LANG private: reference_wrapper(type&&); public: // = delete; // do not bind to temps #endif // access - _LIBCPP_INLINE_VISIBILITY operator type& () const _NOEXCEPT {return *__f_;} - _LIBCPP_INLINE_VISIBILITY type& get() const _NOEXCEPT {return *__f_;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + operator type&() const _NOEXCEPT {return *__f_;} + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 + type& get() const _NOEXCEPT {return *__f_;} #ifndef _LIBCPP_CXX03_LANG // invoke template - _LIBCPP_INLINE_VISIBILITY + _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 typename __invoke_of::type operator() (_ArgTypes&&... __args) const { - return __invoke(get(), _VSTD::forward<_ArgTypes>(__args)...); + return _VSTD::__invoke(get(), _VSTD::forward<_ArgTypes>(__args)...); } #else _LIBCPP_INLINE_VISIBILITY typename __invoke_return::type operator() () const { - return __invoke(get()); + return _VSTD::__invoke(get()); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return0::type operator() (_A0& __a0) const { - return __invoke(get(), __a0); + return _VSTD::__invoke(get(), __a0); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return0::type operator() (_A0 const& __a0) const { - return __invoke(get(), __a0); + return _VSTD::__invoke(get(), __a0); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return1::type operator() (_A0& __a0, _A1& __a1) const { - return __invoke(get(), __a0, __a1); + return _VSTD::__invoke(get(), __a0, __a1); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return1::type operator() (_A0 const& __a0, _A1& __a1) const { - return __invoke(get(), __a0, __a1); + return _VSTD::__invoke(get(), __a0, __a1); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return1::type operator() (_A0& __a0, _A1 const& __a1) const { - return __invoke(get(), __a0, __a1); + return _VSTD::__invoke(get(), __a0, __a1); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return1::type operator() (_A0 const& __a0, _A1 const& __a1) const { - return __invoke(get(), __a0, __a1); + return _VSTD::__invoke(get(), __a0, __a1); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0& __a0, _A1& __a1, _A2& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0 const& __a0, _A1& __a1, _A2& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0& __a0, _A1 const& __a1, _A2& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0& __a0, _A1& __a1, _A2 const& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0 const& __a0, _A1 const& __a1, _A2& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0 const& __a0, _A1& __a1, _A2 const& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0& __a0, _A1 const& __a1, _A2 const& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } template _LIBCPP_INLINE_VISIBILITY typename __invoke_return2::type operator() (_A0 const& __a0, _A1 const& __a1, _A2 const& __a2) const { - return __invoke(get(), __a0, __a1, __a2); + return _VSTD::__invoke(get(), __a0, __a1, __a2); } #endif // _LIBCPP_CXX03_LANG }; template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 reference_wrapper<_Tp> ref(_Tp& __t) _NOEXCEPT { @@ -518,7 +521,7 @@ ref(_Tp& __t) _NOEXCEPT } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 reference_wrapper<_Tp> ref(reference_wrapper<_Tp> __t) _NOEXCEPT { @@ -526,7 +529,7 @@ ref(reference_wrapper<_Tp> __t) _NOEXCEPT } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 reference_wrapper cref(const _Tp& __t) _NOEXCEPT { @@ -534,7 +537,7 @@ cref(const _Tp& __t) _NOEXCEPT } template -inline _LIBCPP_INLINE_VISIBILITY +inline _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR_AFTER_CXX17 reference_wrapper cref(reference_wrapper<_Tp> __t) _NOEXCEPT { diff --git a/lib/libcxx/include/__functional_base_03 b/lib/libcxx/include/__functional_base_03 index e6dac90c84..9b08bd26a8 100644 --- a/lib/libcxx/include/__functional_base_03 +++ b/lib/libcxx/include/__functional_base_03 @@ -40,7 +40,7 @@ struct __enable_invoke_imp<_Ret, _T1, false, true> { template struct __enable_invoke_imp<_Ret, _T1, false, false> { typedef typename add_lvalue_reference< - typename __apply_cv()), _Ret>::type + typename __apply_cv()), _Ret>::type >::type _Bullet4; typedef _Bullet4 type; }; @@ -142,7 +142,7 @@ __invoke(_Fn __f, _T1& __t1) { template inline _LIBCPP_INLINE_VISIBILITY -decltype(_VSTD::declval<_Fp&>()()) +decltype(declval<_Fp&>()()) __invoke(_Fp& __f) { return __f(); @@ -150,7 +150,7 @@ __invoke(_Fp& __f) template inline _LIBCPP_INLINE_VISIBILITY -decltype(_VSTD::declval<_Fp&>()(_VSTD::declval<_A0&>())) +decltype(declval<_Fp&>()(declval<_A0&>())) __invoke(_Fp& __f, _A0& __a0) { return __f(__a0); @@ -158,7 +158,7 @@ __invoke(_Fp& __f, _A0& __a0) template inline _LIBCPP_INLINE_VISIBILITY -decltype(_VSTD::declval<_Fp&>()(_VSTD::declval<_A0&>(), _VSTD::declval<_A1&>())) +decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>())) __invoke(_Fp& __f, _A0& __a0, _A1& __a1) { return __f(__a0, __a1); @@ -166,7 +166,7 @@ __invoke(_Fp& __f, _A0& __a0, _A1& __a1) template inline _LIBCPP_INLINE_VISIBILITY -decltype(_VSTD::declval<_Fp&>()(_VSTD::declval<_A0&>(), _VSTD::declval<_A1&>(), _VSTD::declval<_A2&>())) +decltype(declval<_Fp&>()(declval<_A0&>(), declval<_A1&>(), declval<_A2&>())) __invoke(_Fp& __f, _A0& __a0, _A1& __a1, _A2& __a2) { return __f(__a0, __a1, __a2); @@ -181,13 +181,13 @@ struct __invoke_return template struct __invoke_return<_Fp, false> { - typedef decltype(__invoke(_VSTD::declval<_Fp&>())) type; + typedef decltype(_VSTD::__invoke(declval<_Fp&>())) type; }; template struct __invoke_return0 { - typedef decltype(__invoke(_VSTD::declval<_Tp&>(), _VSTD::declval<_A0&>())) type; + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>())) type; }; template @@ -199,8 +199,8 @@ struct __invoke_return0<_Rp _Tp::*, _A0> template struct __invoke_return1 { - typedef decltype(__invoke(_VSTD::declval<_Tp&>(), _VSTD::declval<_A0&>(), - _VSTD::declval<_A1&>())) type; + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), + declval<_A1&>())) type; }; template @@ -211,9 +211,9 @@ struct __invoke_return1<_Rp _Class::*, _A0, _A1> { template struct __invoke_return2 { - typedef decltype(__invoke(_VSTD::declval<_Tp&>(), _VSTD::declval<_A0&>(), - _VSTD::declval<_A1&>(), - _VSTD::declval<_A2&>())) type; + typedef decltype(_VSTD::__invoke(declval<_Tp&>(), declval<_A0&>(), + declval<_A1&>(), + declval<_A2&>())) type; }; template diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table index 13ff096897..521ebbf2c4 100644 --- a/lib/libcxx/include/__hash_table +++ b/lib/libcxx/include/__hash_table @@ -34,19 +34,17 @@ _LIBCPP_BEGIN_NAMESPACE_STD template struct __hash_value_type; -#ifndef _LIBCPP_CXX03_LANG template struct __is_hash_value_type_imp : false_type {}; template -struct __is_hash_value_type_imp<__hash_value_type<_Key, _Value>> : true_type {}; +struct __is_hash_value_type_imp<__hash_value_type<_Key, _Value> > : true_type {}; template struct __is_hash_value_type : false_type {}; template struct __is_hash_value_type<_One> : __is_hash_value_type_imp::type> {}; -#endif _LIBCPP_FUNC_VIS size_t __next_prime(size_t __n); @@ -122,7 +120,7 @@ inline _LIBCPP_INLINE_VISIBILITY size_t __next_hash_pow2(size_t __n) { - return __n < 2 ? __n : (size_t(1) << (std::numeric_limits::digits - __libcpp_clz(__n-1))); + return __n < 2 ? __n : (size_t(1) << (numeric_limits::digits - __libcpp_clz(__n-1))); } @@ -155,12 +153,10 @@ struct __hash_key_value_types { static __container_value_type* __get_ptr(__node_value_type& __n) { return _VSTD::addressof(__n); } -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY static __container_value_type&& __move(__node_value_type& __v) { return _VSTD::move(__v); } -#endif }; template @@ -197,13 +193,10 @@ struct __hash_key_value_types<__hash_value_type<_Key, _Tp> > { static __container_value_type* __get_ptr(__node_value_type& __n) { return _VSTD::addressof(__n.__get_value()); } -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY static pair __move(__node_value_type& __v) { return __v.__move(); } -#endif - }; template , @@ -295,10 +288,12 @@ public: typedef typename _NodeTypes::__node_value_type_pointer pointer; _LIBCPP_INLINE_VISIBILITY __hash_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_iterator(const __hash_iterator& __i) : __node_(__i.__node_) @@ -322,7 +317,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -364,7 +359,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_iterator(__next_pointer __node, const void* __c) _NOEXCEPT : __node_(__node) @@ -405,17 +400,21 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(const __non_const_iterator& __x) _NOEXCEPT : __node_(__x.__node_) { - _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__iterator_copy(this, &__x); +#endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(const __hash_const_iterator& __i) : __node_(__i.__node_) @@ -439,7 +438,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -480,7 +479,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_iterator(__next_pointer __node, const void* __c) _NOEXCEPT : __node_(__node) @@ -518,10 +517,12 @@ public: typedef typename _NodeTypes::__node_value_type_pointer pointer; _LIBCPP_INLINE_VISIBILITY __hash_local_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_local_iterator(const __hash_local_iterator& __i) : __node_(__i.__node_), @@ -549,7 +550,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -593,7 +594,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_local_iterator(__next_pointer __node, size_t __bucket, size_t __bucket_count, const void* __c) _NOEXCEPT @@ -650,7 +651,9 @@ public: _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator() _NOEXCEPT : __node_(nullptr) { - _LIBCPP_DEBUG_MODE(__get_db()->__insert_i(this)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__insert_i(this); +#endif } _LIBCPP_INLINE_VISIBILITY @@ -659,10 +662,12 @@ public: __bucket_(__x.__bucket_), __bucket_count_(__x.__bucket_count_) { - _LIBCPP_DEBUG_MODE(__get_db()->__iterator_copy(this, &__x)); +#if _LIBCPP_DEBUG_LEVEL == 2 + __get_db()->__iterator_copy(this, &__x); +#endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator(const __hash_const_local_iterator& __i) : __node_(__i.__node_), @@ -690,7 +695,7 @@ public: } return *this; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY reference operator*() const { @@ -734,7 +739,7 @@ public: {return !(__x == __y);} private: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_INLINE_VISIBILITY __hash_const_local_iterator(__next_pointer __node, size_t __bucket, size_t __bucket_count, const void* __c) _NOEXCEPT @@ -783,7 +788,6 @@ public: _NOEXCEPT_(is_nothrow_copy_constructible::value) : __data_(__size, __a) {} -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY __bucket_list_deallocator(__bucket_list_deallocator&& __x) _NOEXCEPT_(is_nothrow_move_constructible::value) @@ -791,7 +795,6 @@ public: { __x.size() = 0; } -#endif _LIBCPP_INLINE_VISIBILITY size_type& size() _NOEXCEPT {return __data_.first();} @@ -1007,7 +1010,6 @@ public: explicit __hash_table(const allocator_type& __a); __hash_table(const __hash_table& __u); __hash_table(const __hash_table& __u, const allocator_type& __a); -#ifndef _LIBCPP_CXX03_LANG __hash_table(__hash_table&& __u) _NOEXCEPT_( is_nothrow_move_constructible<__bucket_list>::value && @@ -1016,11 +1018,9 @@ public: is_nothrow_move_constructible::value && is_nothrow_move_constructible::value); __hash_table(__hash_table&& __u, const allocator_type& __a); -#endif // _LIBCPP_CXX03_LANG ~__hash_table(); __hash_table& operator=(const __hash_table& __u); -#ifndef _LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY __hash_table& operator=(__hash_table&& __u) _NOEXCEPT_( @@ -1028,7 +1028,6 @@ public: is_nothrow_move_assignable<__node_allocator>::value && is_nothrow_move_assignable::value && is_nothrow_move_assignable::value); -#endif template void __assign_unique(_InputIterator __first, _InputIterator __last); template @@ -1037,7 +1036,7 @@ public: _LIBCPP_INLINE_VISIBILITY size_type max_size() const _NOEXCEPT { - return std::min( + return _VSTD::min( __node_traits::max_size(__node_alloc()), numeric_limits::max() ); @@ -1066,7 +1065,6 @@ public: iterator __node_insert_multi(const_iterator __p, __node_pointer __nd); -#ifndef _LIBCPP_CXX03_LANG template _LIBCPP_INLINE_VISIBILITY pair __emplace_unique_key_args(_Key const& __k, _Args&&... __args); @@ -1151,15 +1149,6 @@ public: return __emplace_hint_multi(__p, _VSTD::forward<_Pp>(__x)); } -#else // !defined(_LIBCPP_CXX03_LANG) - template - _LIBCPP_INLINE_VISIBILITY - pair __emplace_unique_key_args(_Key const&, _Args& __args); - - iterator __insert_multi(const __container_value_type& __x); - iterator __insert_multi(const_iterator __p, const __container_value_type& __x); -#endif - _LIBCPP_INLINE_VISIBILITY pair __insert_unique(const __container_value_type& __x) { return __emplace_unique_key_args(_NodeTypes::__get_key(__x), __x); @@ -1295,7 +1284,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::begin(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return local_iterator(__bucket_list_[__n], __n, bucket_count(), this); #else return local_iterator(__bucket_list_[__n], __n, bucket_count()); @@ -1308,7 +1297,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::end(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return local_iterator(nullptr, __n, bucket_count(), this); #else return local_iterator(nullptr, __n, bucket_count()); @@ -1321,7 +1310,7 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::cbegin(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_local_iterator(__bucket_list_[__n], __n, bucket_count(), this); #else return const_local_iterator(__bucket_list_[__n], __n, bucket_count()); @@ -1334,35 +1323,30 @@ public: { _LIBCPP_ASSERT(__n < bucket_count(), "unordered container::cend(n) called with n >= bucket_count()"); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_local_iterator(nullptr, __n, bucket_count(), this); #else return const_local_iterator(nullptr, __n, bucket_count()); #endif } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 bool __dereferenceable(const const_iterator* __i) const; bool __decrementable(const const_iterator* __i) const; bool __addable(const const_iterator* __i, ptrdiff_t __n) const; bool __subscriptable(const const_iterator* __i, ptrdiff_t __n) const; -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 private: void __rehash(size_type __n); -#ifndef _LIBCPP_CXX03_LANG template __node_holder __construct_node(_Args&& ...__args); template __node_holder __construct_node_hash(size_t __hash, _First&& __f, _Rest&&... __rest); -#else // _LIBCPP_CXX03_LANG - __node_holder __construct_node(const __container_value_type& __v); - __node_holder __construct_node_hash(size_t __hash, const __container_value_type& __v); -#endif _LIBCPP_INLINE_VISIBILITY @@ -1373,7 +1357,6 @@ private: _LIBCPP_INLINE_VISIBILITY void __copy_assign_alloc(const __hash_table&, false_type) {} -#ifndef _LIBCPP_CXX03_LANG void __move_assign(__hash_table& __u, false_type); void __move_assign(__hash_table& __u, true_type) _NOEXCEPT_( @@ -1400,7 +1383,6 @@ private: } _LIBCPP_INLINE_VISIBILITY void __move_assign_alloc(__hash_table&, false_type) _NOEXCEPT {} -#endif // _LIBCPP_CXX03_LANG void __deallocate_node(__next_pointer __np) _NOEXCEPT; __next_pointer __detach() _NOEXCEPT; @@ -1477,8 +1459,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(const __hash_table& __u, { } -#ifndef _LIBCPP_CXX03_LANG - template __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u) _NOEXCEPT_( @@ -1526,8 +1506,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__hash_table(__hash_table&& __u, } } -#endif // _LIBCPP_CXX03_LANG - template __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() { @@ -1539,7 +1517,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::~__hash_table() #endif __deallocate_node(__p1_.first().__next_); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__erase_c(this); #endif } @@ -1583,7 +1561,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) while (__np != nullptr) { __next_pointer __next = __np->__next_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __p = __c->end_; __p != __c->beg_; ) { @@ -1593,7 +1571,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__deallocate_node(__next_pointer __np) { (*__p)->__c_ = nullptr; if (--__c->end_ != __p) - memmove(__p, __p+1, (__c->end_ - __p)*sizeof(__i_node*)); + _VSTD::memmove(__p, __p+1, (__c->end_ - __p)*sizeof(__i_node*)); } } __get_db()->unlock(); @@ -1618,8 +1596,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__detach() _NOEXCEPT return __cache; } -#ifndef _LIBCPP_CXX03_LANG - template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( @@ -1646,7 +1622,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__move_assign( __u.__p1_.first().__next_ = nullptr; __u.size() = 0; } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__u); #endif } @@ -1714,8 +1690,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::operator=(__hash_table&& __u) return *this; } -#endif // _LIBCPP_CXX03_LANG - template template void @@ -1800,7 +1774,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__p1_.first().__next_, this); #else return iterator(__p1_.first().__next_); @@ -1812,7 +1786,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(nullptr, this); #else return iterator(nullptr); @@ -1824,7 +1798,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::begin() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__p1_.first().__next_, this); #else return const_iterator(__p1_.first().__next_); @@ -1836,7 +1810,7 @@ inline typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::const_iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::end() const _NOEXCEPT { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(nullptr, this); #else return const_iterator(nullptr); @@ -1945,7 +1919,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __ __existing_node = __nd->__ptr(); __inserted = true; } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return pair(iterator(__existing_node, this), __inserted); #else return pair(iterator(__existing_node), __inserted); @@ -1955,7 +1929,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_unique(__node_pointer __ // Prepare the container for an insertion of the value __cp_val with the hash // __cp_hash. This does a lookup into the container to see if __cp_value is // already present, and performs a rehash if necessary. Returns a pointer to the -// last occurance of __cp_val in the map. +// last occurrence of __cp_val in the map. // // Note that this function does forward exceptions if key_eq() throws, and never // mutates __value or actually inserts into the map. @@ -2043,7 +2017,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi(__node_pointer __c __next_pointer __pn = __node_insert_multi_prepare(__cp->__hash(), __cp->__value_); __node_insert_multi_perform(__cp, __pn); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__cp->__ptr(), this); #else return iterator(__cp->__ptr()); @@ -2055,7 +2029,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( const_iterator __p, __node_pointer __cp) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); @@ -2078,7 +2052,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( __cp->__next_ = __np; __pp->__next_ = static_cast<__next_pointer>(__cp); ++size(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(static_cast<__next_pointer>(__cp), this); #else return iterator(static_cast<__next_pointer>(__cp)); @@ -2089,17 +2063,10 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_insert_multi( -#ifndef _LIBCPP_CXX03_LANG template template pair::iterator, bool> __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __k, _Args&&... __args) -#else -template -template -pair::iterator, bool> -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __k, _Args& __args) -#endif { size_t __hash = hash_function()(__k); @@ -2123,11 +2090,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& } } { -#ifndef _LIBCPP_CXX03_LANG __node_holder __h = __construct_node_hash(__hash, _VSTD::forward<_Args>(__args)...); -#else - __node_holder __h = __construct_node_hash(__hash, __args); -#endif if (size()+1 > __bc * max_load_factor() || __bc == 0) { rehash(_VSTD::max(2 * __bc + !__is_hash_power2(__bc), @@ -2159,15 +2122,13 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_unique_key_args(_Key const& __inserted = true; } __done: -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return pair(iterator(__nd, this), __inserted); #else return pair(iterator(__nd), __inserted); #endif } -#ifndef _LIBCPP_CXX03_LANG - template template pair::iterator, bool> @@ -2197,7 +2158,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( const_iterator __p, _Args&&... __args) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container::emplace_hint(const_iterator, args...) called with an iterator not" " referring to this unordered container"); @@ -2208,36 +2169,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__emplace_hint_multi( return __r; } -#else // _LIBCPP_CXX03_LANG - -template -typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_multi(const __container_value_type& __x) -{ - __node_holder __h = __construct_node(__x); - iterator __r = __node_insert_multi(__h.get()); - __h.release(); - return __r; -} - -template -typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__insert_multi(const_iterator __p, - const __container_value_type& __x) -{ -#if _LIBCPP_DEBUG_LEVEL >= 2 - _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, - "unordered container::insert(const_iterator, lvalue) called with an iterator not" - " referring to this unordered container"); -#endif - __node_holder __h = __construct_node(__x); - iterator __r = __node_insert_multi(__p, __h.get()); - __h.release(); - return __r; -} - -#endif // _LIBCPP_CXX03_LANG - #if _LIBCPP_STD_VER > 14 template template @@ -2399,9 +2330,9 @@ template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::__rehash(size_type __nbc) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->__invalidate_all(this); -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif __pointer_allocator& __npa = __bucket_list_.get_deleter().__alloc(); __bucket_list_.reset(__nbc > 0 ? __pointer_alloc_traits::allocate(__npa, __nbc) : nullptr); @@ -2470,7 +2401,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) { if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__value_, __k)) -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator(__nd, this); #else return iterator(__nd); @@ -2501,7 +2432,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const { if ((__nd->__hash() == __hash) && key_eq()(__nd->__upcast()->__value_, __k)) -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return const_iterator(__nd, this); #else return const_iterator(__nd); @@ -2513,8 +2444,6 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::find(const _Key& __k) const return end(); } -#ifndef _LIBCPP_CXX03_LANG - template template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder @@ -2550,43 +2479,12 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash( return __h; } -#else // _LIBCPP_CXX03_LANG - -template -typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node(const __container_value_type& __v) -{ - __node_allocator& __na = __node_alloc(); - __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__value_), __v); - __h.get_deleter().__value_constructed = true; - __h->__hash_ = hash_function()(__h->__value_); - __h->__next_ = nullptr; - return _LIBCPP_EXPLICIT_MOVE(__h); // explicitly moved for C++03 -} - -template -typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_holder -__hash_table<_Tp, _Hash, _Equal, _Alloc>::__construct_node_hash(size_t __hash, - const __container_value_type& __v) -{ - __node_allocator& __na = __node_alloc(); - __node_holder __h(__node_traits::allocate(__na, 1), _Dp(__na)); - __node_traits::construct(__na, _NodeTypes::__get_ptr(__h->__value_), __v); - __h.get_deleter().__value_constructed = true; - __h->__hash_ = __hash; - __h->__next_ = nullptr; - return _LIBCPP_EXPLICIT_MOVE(__h); // explicitly moved for C++03 -} - -#endif // _LIBCPP_CXX03_LANG - template typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __p) { __next_pointer __np = __p.__node_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__p) == this, "unordered container erase(iterator) called with an iterator not" " referring to this container"); @@ -2606,7 +2504,7 @@ typename __hash_table<_Tp, _Hash, _Equal, _Alloc>::iterator __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, const_iterator __last) { -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_ASSERT(__get_const_db()->__find_c_from_i(&__first) == this, "unodered container::erase(iterator, iterator) called with an iterator not" " referring to this unodered container"); @@ -2620,7 +2518,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::erase(const_iterator __first, erase(__p); } __next_pointer __np = __last.__node_; -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 return iterator (__np, this); #else return iterator (__np); @@ -2691,7 +2589,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::remove(const_iterator __p) _NOEXCEPT __pn->__next_ = __cn->__next_; __cn->__next_ = nullptr; --size(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __c_node* __c = __get_db()->__find_c_and_lock(this); for (__i_node** __dp = __c->end_; __dp != __c->beg_; ) { @@ -2701,7 +2599,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::remove(const_iterator __p) _NOEXCEPT { (*__dp)->__c_ = nullptr; if (--__c->end_ != __dp) - memmove(__dp, __dp+1, (__c->end_ - __dp)*sizeof(__i_node*)); + _VSTD::memmove(__dp, __dp+1, (__c->end_ - __dp)*sizeof(__i_node*)); } } __get_db()->unlock(); @@ -2830,9 +2728,9 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) __u.__bucket_list_.reset(__npp); } _VSTD::swap(__bucket_list_.get_deleter().size(), __u.__bucket_list_.get_deleter().size()); - __swap_allocator(__bucket_list_.get_deleter().__alloc(), + _VSTD::__swap_allocator(__bucket_list_.get_deleter().__alloc(), __u.__bucket_list_.get_deleter().__alloc()); - __swap_allocator(__node_alloc(), __u.__node_alloc()); + _VSTD::__swap_allocator(__node_alloc(), __u.__node_alloc()); _VSTD::swap(__p1_.first().__next_, __u.__p1_.first().__next_); __p2_.swap(__u.__p2_); __p3_.swap(__u.__p3_); @@ -2842,7 +2740,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) if (__u.size() > 0) __u.__bucket_list_[__constrain_hash(__u.__p1_.first().__next_->__hash(), __u.bucket_count())] = __u.__p1_.first().__ptr(); -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 __get_db()->swap(this, &__u); #endif } @@ -2876,7 +2774,7 @@ swap(__hash_table<_Tp, _Hash, _Equal, _Alloc>& __x, __x.swap(__y); } -#if _LIBCPP_DEBUG_LEVEL >= 2 +#if _LIBCPP_DEBUG_LEVEL == 2 template bool @@ -2906,7 +2804,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__subscriptable(const const_iterator*, return false; } -#endif // _LIBCPP_DEBUG_LEVEL >= 2 +#endif // _LIBCPP_DEBUG_LEVEL == 2 _LIBCPP_END_NAMESPACE_STD diff --git a/lib/libcxx/include/__libcpp_version b/lib/libcxx/include/__libcpp_version index 82b3803a20..e334181b40 100644 --- a/lib/libcxx/include/__libcpp_version +++ b/lib/libcxx/include/__libcpp_version @@ -1 +1 @@ -11000 +12000 diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale index 6d10fa4d3d..77e5faab26 100644 --- a/lib/libcxx/include/__locale +++ b/lib/libcxx/include/__locale @@ -11,6 +11,7 @@ #define _LIBCPP___LOCALE #include <__config> +#include <__availability> #include #include #include @@ -20,26 +21,30 @@ #include #if defined(_LIBCPP_MSVCRT_LIKE) # include -# include -#elif defined(_AIX) -# include +# include <__support/win32/locale_win32.h> +#elif defined(__NuttX__) +# include <__support/nuttx/xlocale.h> +#elif defined(_AIX) || defined(__MVS__) +# include <__support/ibm/xlocale.h> #elif defined(__ANDROID__) -# include +# include <__support/android/locale_bionic.h> #elif defined(__sun__) # include -# include +# include <__support/solaris/xlocale.h> #elif defined(_NEWLIB_VERSION) -# include +# include <__support/newlib/xlocale.h> +#elif defined(__OpenBSD__) +# include <__support/openbsd/xlocale.h> #elif (defined(__APPLE__) || defined(__FreeBSD__) \ || defined(__EMSCRIPTEN__) || defined(__IBMCPP__)) # include #elif defined(__Fuchsia__) -# include +# include <__support/fuchsia/xlocale.h> #elif defined(__wasi__) // WASI libc uses musl's locales support. -# include +# include <__support/musl/xlocale.h> #elif defined(_LIBCPP_HAS_MUSL_LIBC) -# include +# include <__support/musl/xlocale.h> #endif #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -76,7 +81,7 @@ struct __libcpp_locale_guard { // locale name, otherwise it will be a semicolon-separated string listing // each category. In the second case, we know at least one category won't // be what we want, so we only have to check the first case. - if (strcmp(__l.__get_locale(), __lc) != 0) { + if (_VSTD::strcmp(__l.__get_locale(), __lc) != 0) { __locale_all = _strdup(__lc); if (__locale_all == nullptr) __throw_bad_alloc(); @@ -105,7 +110,6 @@ struct __libcpp_locale_guard { }; #endif - class _LIBCPP_TYPE_VIS locale; template @@ -335,8 +339,8 @@ collate<_CharT>::do_hash(const char_type* __lo, const char_type* __hi) const return static_cast(__h); } -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS collate) // template class collate_byname; @@ -396,7 +400,26 @@ locale::operator()(const basic_string<_CharT, _Traits, _Allocator>& __x, class _LIBCPP_TYPE_VIS ctype_base { public: -#if defined(__GLIBC__) +#if defined(_LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE) + typedef unsigned long mask; + static const mask space = 1<<0; + static const mask print = 1<<1; + static const mask cntrl = 1<<2; + static const mask upper = 1<<3; + static const mask lower = 1<<4; + static const mask alpha = 1<<5; + static const mask digit = 1<<6; + static const mask punct = 1<<7; + static const mask xdigit = 1<<8; + static const mask blank = 1<<9; +#if defined(__BIONIC__) + // Historically this was a part of regex_traits rather than ctype_base. The + // historical value of the constant is preserved for ABI compatibility. + static const mask __regex_word = 0x8000; +#else + static const mask __regex_word = 1<<10; +#endif // defined(__BIONIC__) +#elif defined(__GLIBC__) typedef unsigned short mask; static const mask space = _ISspace; static const mask print = _ISprint; @@ -485,24 +508,7 @@ public: # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT #else - typedef unsigned long mask; - static const mask space = 1<<0; - static const mask print = 1<<1; - static const mask cntrl = 1<<2; - static const mask upper = 1<<3; - static const mask lower = 1<<4; - static const mask alpha = 1<<5; - static const mask digit = 1<<6; - static const mask punct = 1<<7; - static const mask xdigit = 1<<8; - static const mask blank = 1<<9; -#if defined(__BIONIC__) - // Historically this was a part of regex_traits rather than ctype_base. The - // historical value of the constant is preserved for ABI compatibility. - static const mask __regex_word = 0x8000; -#else - static const mask __regex_word = 1<<10; -#endif // defined(__BIONIC__) +# error unknown rune table for this platform -- do you mean to define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE? #endif static const mask alnum = alpha | digit; static const mask graph = alnum | punct; @@ -623,7 +629,7 @@ class _LIBCPP_TYPE_VIS ctype public: typedef char char_type; - explicit ctype(const mask* __tab = 0, bool __del = false, size_t __refs = 0); + explicit ctype(const mask* __tab = nullptr, bool __del = false, size_t __refs = 0); _LIBCPP_INLINE_VISIBILITY bool is(mask __m, char_type __c) const @@ -1069,10 +1075,10 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -// template <> class codecvt +// template <> class codecvt // deprecated in C++20 template <> -class _LIBCPP_TYPE_VIS codecvt +class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_TYPE_VIS codecvt : public locale::facet, public codecvt_base { @@ -1155,10 +1161,100 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; -// template <> class codecvt +#ifndef _LIBCPP_NO_HAS_CHAR8_T + +// template <> class codecvt // C++20 template <> -class _LIBCPP_TYPE_VIS codecvt +class _LIBCPP_TYPE_VIS codecvt + : public locale::facet, + public codecvt_base +{ +public: + typedef char16_t intern_type; + typedef char8_t extern_type; + typedef mbstate_t state_type; + + _LIBCPP_INLINE_VISIBILITY + explicit codecvt(size_t __refs = 0) + : locale::facet(__refs) {} + + _LIBCPP_INLINE_VISIBILITY + result out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const + { + return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + result unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const + { + return do_unshift(__st, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + result in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const + { + return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + int encoding() const _NOEXCEPT + { + return do_encoding(); + } + + _LIBCPP_INLINE_VISIBILITY + bool always_noconv() const _NOEXCEPT + { + return do_always_noconv(); + } + + _LIBCPP_INLINE_VISIBILITY + int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const + { + return do_length(__st, __frm, __end, __mx); + } + + _LIBCPP_INLINE_VISIBILITY + int max_length() const _NOEXCEPT + { + return do_max_length(); + } + + static locale::id id; + +protected: + _LIBCPP_INLINE_VISIBILITY + explicit codecvt(const char*, size_t __refs = 0) + : locale::facet(__refs) {} + + ~codecvt(); + + virtual result do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const _NOEXCEPT; + virtual bool do_always_noconv() const _NOEXCEPT; + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const; + virtual int do_max_length() const _NOEXCEPT; +}; + +#endif + +// template <> class codecvt // deprecated in C++20 + +template <> +class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_TYPE_VIS codecvt : public locale::facet, public codecvt_base { @@ -1241,6 +1337,96 @@ protected: virtual int do_max_length() const _NOEXCEPT; }; +#ifndef _LIBCPP_NO_HAS_CHAR8_T + +// template <> class codecvt // C++20 + +template <> +class _LIBCPP_TYPE_VIS codecvt + : public locale::facet, + public codecvt_base +{ +public: + typedef char32_t intern_type; + typedef char8_t extern_type; + typedef mbstate_t state_type; + + _LIBCPP_INLINE_VISIBILITY + explicit codecvt(size_t __refs = 0) + : locale::facet(__refs) {} + + _LIBCPP_INLINE_VISIBILITY + result out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const + { + return do_out(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + result unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const + { + return do_unshift(__st, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + result in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const + { + return do_in(__st, __frm, __frm_end, __frm_nxt, __to, __to_end, __to_nxt); + } + + _LIBCPP_INLINE_VISIBILITY + int encoding() const _NOEXCEPT + { + return do_encoding(); + } + + _LIBCPP_INLINE_VISIBILITY + bool always_noconv() const _NOEXCEPT + { + return do_always_noconv(); + } + + _LIBCPP_INLINE_VISIBILITY + int length(state_type& __st, const extern_type* __frm, const extern_type* __end, size_t __mx) const + { + return do_length(__st, __frm, __end, __mx); + } + + _LIBCPP_INLINE_VISIBILITY + int max_length() const _NOEXCEPT + { + return do_max_length(); + } + + static locale::id id; + +protected: + _LIBCPP_INLINE_VISIBILITY + explicit codecvt(const char*, size_t __refs = 0) + : locale::facet(__refs) {} + + ~codecvt(); + + virtual result do_out(state_type& __st, + const intern_type* __frm, const intern_type* __frm_end, const intern_type*& __frm_nxt, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual result do_in(state_type& __st, + const extern_type* __frm, const extern_type* __frm_end, const extern_type*& __frm_nxt, + intern_type* __to, intern_type* __to_end, intern_type*& __to_nxt) const; + virtual result do_unshift(state_type& __st, + extern_type* __to, extern_type* __to_end, extern_type*& __to_nxt) const; + virtual int do_encoding() const _NOEXCEPT; + virtual bool do_always_noconv() const _NOEXCEPT; + virtual int do_length(state_type&, const extern_type* __frm, const extern_type* __end, size_t __mx) const; + virtual int do_max_length() const _NOEXCEPT; +}; + +#endif + // template class codecvt_byname template @@ -1258,15 +1444,21 @@ protected: ~codecvt_byname(); }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template codecvt_byname<_InternT, _ExternT, _StateT>::~codecvt_byname() { } +_LIBCPP_SUPPRESS_DEPRECATED_POP -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) // deprecated in C++20 +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_DEPRECATED_IN_CXX20 _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) // deprecated in C++20 +#ifndef _LIBCPP_NO_HAS_CHAR8_T +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) // C++20 +_LIBCPP_EXTERN_TEMPLATE_EVEN_IN_DEBUG_MODE(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) // C++20 +#endif template struct __narrow_to_utf8 @@ -1290,12 +1482,14 @@ struct __narrow_to_utf8<8> } }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template <> struct _LIBCPP_TEMPLATE_VIS __narrow_to_utf8<16> : public codecvt { _LIBCPP_INLINE_VISIBILITY __narrow_to_utf8() : codecvt(1) {} +_LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_EXPORTED_FROM_ABI ~__narrow_to_utf8(); @@ -1324,12 +1518,14 @@ struct _LIBCPP_TEMPLATE_VIS __narrow_to_utf8<16> } }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template <> struct _LIBCPP_TEMPLATE_VIS __narrow_to_utf8<32> : public codecvt { _LIBCPP_INLINE_VISIBILITY __narrow_to_utf8() : codecvt(1) {} +_LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_EXPORTED_FROM_ABI ~__narrow_to_utf8(); @@ -1380,12 +1576,14 @@ struct __widen_from_utf8<8> } }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template <> struct _LIBCPP_TEMPLATE_VIS __widen_from_utf8<16> : public codecvt { _LIBCPP_INLINE_VISIBILITY __widen_from_utf8() : codecvt(1) {} +_LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_EXPORTED_FROM_ABI ~__widen_from_utf8(); @@ -1407,19 +1605,21 @@ struct _LIBCPP_TEMPLATE_VIS __widen_from_utf8<16> if (__r == codecvt_base::error || __nn == __nb) __throw_runtime_error("locale not supported"); for (const char16_t* __p = __buf; __p < __bn; ++__p, ++__s) - *__s = (wchar_t)*__p; + *__s = *__p; __nb = __nn; } return __s; } }; +_LIBCPP_SUPPRESS_DEPRECATED_PUSH template <> struct _LIBCPP_TEMPLATE_VIS __widen_from_utf8<32> : public codecvt { _LIBCPP_INLINE_VISIBILITY __widen_from_utf8() : codecvt(1) {} +_LIBCPP_SUPPRESS_DEPRECATED_POP _LIBCPP_EXPORTED_FROM_ABI ~__widen_from_utf8(); @@ -1441,7 +1641,7 @@ struct _LIBCPP_TEMPLATE_VIS __widen_from_utf8<32> if (__r == codecvt_base::error || __nn == __nb) __throw_runtime_error("locale not supported"); for (const char32_t* __p = __buf; __p < __bn; ++__p, ++__s) - *__s = (wchar_t)*__p; + *__s = *__p; __nb = __nn; } return __s; diff --git a/lib/libcxx/include/__memory/allocator_traits.h b/lib/libcxx/include/__memory/allocator_traits.h new file mode 100644 index 0000000000..9443f61b71 --- /dev/null +++ b/lib/libcxx/include/__memory/allocator_traits.h @@ -0,0 +1,401 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___MEMORY_ALLOCATOR_TRAITS_H +#define _LIBCPP___MEMORY_ALLOCATOR_TRAITS_H + +#include <__config> +#include <__memory/base.h> +#include <__memory/pointer_traits.h> +#include + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +#pragma GCC system_header +#endif + +_LIBCPP_PUSH_MACROS +#include <__undef_macros> + +_LIBCPP_BEGIN_NAMESPACE_STD + +#define _LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(NAME, PROPERTY) \ + template struct NAME : false_type { }; \ + template struct NAME<_Tp, typename __void_t::type> : true_type { } + +// __pointer +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_pointer, pointer); +template ::type, + bool = __has_pointer<_RawAlloc>::value> +struct __pointer { + using type _LIBCPP_NODEBUG_TYPE = typename _RawAlloc::pointer; +}; +template +struct __pointer<_Tp, _Alloc, _RawAlloc, false> { + using type _LIBCPP_NODEBUG_TYPE = _Tp*; +}; + +// __const_pointer +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_pointer, const_pointer); +template ::value> +struct __const_pointer { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::const_pointer; +}; +template +struct __const_pointer<_Tp, _Ptr, _Alloc, false> { +#ifdef _LIBCPP_CXX03_LANG + using type = typename pointer_traits<_Ptr>::template rebind::other; +#else + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind; +#endif +}; + +// __void_pointer +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_void_pointer, void_pointer); +template ::value> +struct __void_pointer { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::void_pointer; +}; +template +struct __void_pointer<_Ptr, _Alloc, false> { +#ifdef _LIBCPP_CXX03_LANG + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind::other; +#else + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind; +#endif +}; + +// __const_void_pointer +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_const_void_pointer, const_void_pointer); +template ::value> +struct __const_void_pointer { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::const_void_pointer; +}; +template +struct __const_void_pointer<_Ptr, _Alloc, false> { +#ifdef _LIBCPP_CXX03_LANG + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind::other; +#else + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::template rebind; +#endif +}; + +// __size_type +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_size_type, size_type); +template ::value> +struct __size_type : make_unsigned<_DiffType> { }; +template +struct __size_type<_Alloc, _DiffType, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::size_type; +}; + +// __alloc_traits_difference_type +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_alloc_traits_difference_type, difference_type); +template ::value> +struct __alloc_traits_difference_type { + using type _LIBCPP_NODEBUG_TYPE = typename pointer_traits<_Ptr>::difference_type; +}; +template +struct __alloc_traits_difference_type<_Alloc, _Ptr, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::difference_type; +}; + +// __propagate_on_container_copy_assignment +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_copy_assignment, propagate_on_container_copy_assignment); +template ::value> +struct __propagate_on_container_copy_assignment : false_type { }; +template +struct __propagate_on_container_copy_assignment<_Alloc, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_copy_assignment; +}; + +// __propagate_on_container_move_assignment +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_move_assignment, propagate_on_container_move_assignment); +template ::value> +struct __propagate_on_container_move_assignment : false_type { }; +template +struct __propagate_on_container_move_assignment<_Alloc, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_move_assignment; +}; + +// __propagate_on_container_swap +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_propagate_on_container_swap, propagate_on_container_swap); +template ::value> +struct __propagate_on_container_swap : false_type { }; +template +struct __propagate_on_container_swap<_Alloc, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::propagate_on_container_swap; +}; + +// __is_always_equal +_LIBCPP_ALLOCATOR_TRAITS_HAS_XXX(__has_is_always_equal, is_always_equal); +template ::value> +struct __is_always_equal : is_empty<_Alloc> { }; +template +struct __is_always_equal<_Alloc, true> { + using type _LIBCPP_NODEBUG_TYPE = typename _Alloc::is_always_equal; +}; + +// __allocator_traits_rebind +_LIBCPP_SUPPRESS_DEPRECATED_PUSH +template +struct __has_rebind_other : false_type { }; +template +struct __has_rebind_other<_Tp, _Up, typename __void_t< + typename _Tp::template rebind<_Up>::other +>::type> : true_type { }; + +template ::value> +struct __allocator_traits_rebind { + using type _LIBCPP_NODEBUG_TYPE = typename _Tp::template rebind<_Up>::other; +}; +template