From 8f92a49dfde5e8690525da028844cb7c1188d397 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Tue, 16 Jul 2019 22:23:48 -0400 Subject: [PATCH 01/31] update to llvm9 trunk --- README.md | 10 +++++----- cmake/Findclang.cmake | 16 ++++++++-------- cmake/Findlld.cmake | 14 +++++++------- cmake/Findllvm.cmake | 18 +++++++++--------- src/target.cpp | 7 +++++++ src/target.hpp | 1 + src/zig_llvm.cpp | 12 +++++++++++- src/zig_llvm.h | 15 +++++++++++---- 8 files changed, 59 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 91178f87de..2e94e68763 100644 --- a/README.md +++ b/README.md @@ -26,14 +26,14 @@ Note that you can * cmake >= 2.8.5 * gcc >= 5.0.0 or clang >= 3.6.0 - * LLVM, Clang, LLD development libraries == 8.x, compiled with the same gcc or clang version above + * LLVM, Clang, LLD development libraries == 9.x, compiled with the same gcc or clang version above - Use the system package manager, or [build from source](https://github.com/ziglang/zig/wiki/How-to-build-LLVM,-libclang,-and-liblld-from-source#posix). ##### Windows * cmake >= 2.8.5 * Microsoft Visual Studio 2017 (version 15.8) - * LLVM, Clang, LLD development libraries == 8.x, compiled with the same MSVC version above + * LLVM, Clang, LLD development libraries == 9.x, compiled with the same MSVC version above - Use the [pre-built binaries](https://github.com/ziglang/zig/wiki/How-to-build-LLVM,-libclang,-and-liblld-from-source#pre-built-binaries) or [build from source](https://github.com/ziglang/zig/wiki/How-to-build-LLVM,-libclang,-and-liblld-from-source#windows). #### Instructions @@ -50,11 +50,11 @@ make install ##### MacOS ``` -brew install cmake llvm@8 -brew outdated llvm@8 || brew upgrade llvm@8 +brew install cmake llvm@9 +brew outdated llvm@9 || brew upgrade llvm@9 mkdir build cd build -cmake .. -DCMAKE_PREFIX_PATH=/usr/local/Cellar/llvm/8.0.0_1 +cmake .. -DCMAKE_PREFIX_PATH=/usr/local/Cellar/llvm/9.0.0 make install ``` diff --git a/cmake/Findclang.cmake b/cmake/Findclang.cmake index 5555485d89..704eba9814 100644 --- a/cmake/Findclang.cmake +++ b/cmake/Findclang.cmake @@ -38,10 +38,10 @@ if(MSVC) else() find_path(CLANG_INCLUDE_DIRS NAMES clang/Frontend/ASTUnit.h PATHS - /usr/lib/llvm/8/include - /usr/lib/llvm-8/include - /usr/lib/llvm-8.0/include - /usr/local/llvm80/include + /usr/lib/llvm/9/include + /usr/lib/llvm-9/include + /usr/lib/llvm-9.0/include + /usr/local/llvm90/include /mingw64/include) macro(FIND_AND_ADD_CLANG_LIB _libname_) @@ -49,10 +49,10 @@ else() find_library(CLANG_${_prettylibname_}_LIB NAMES ${_libname_} PATHS ${CLANG_LIBDIRS} - /usr/lib/llvm/8/lib - /usr/lib/llvm-8/lib - /usr/lib/llvm-8.0/lib - /usr/local/llvm80/lib + /usr/lib/llvm/9/lib + /usr/lib/llvm-9/lib + /usr/lib/llvm-9.0/lib + /usr/local/llvm90/lib /mingw64/lib /c/msys64/mingw64/lib c:\\msys64\\mingw64\\lib) diff --git a/cmake/Findlld.cmake b/cmake/Findlld.cmake index 1895612eb9..878a750159 100644 --- a/cmake/Findlld.cmake +++ b/cmake/Findlld.cmake @@ -8,14 +8,14 @@ find_path(LLD_INCLUDE_DIRS NAMES lld/Common/Driver.h PATHS - /usr/lib/llvm-8.0/include - /usr/local/llvm80/include + /usr/lib/llvm-9.0/include + /usr/local/llvm90/include /mingw64/include) -find_library(LLD_LIBRARY NAMES lld-8.0 lld80 lld +find_library(LLD_LIBRARY NAMES lld-9.0 lld90 lld PATHS - /usr/lib/llvm-8.0/lib - /usr/local/llvm80/lib + /usr/lib/llvm-9.0/lib + /usr/local/llvm90/lib ) if(EXISTS ${LLD_LIBRARY}) set(LLD_LIBRARIES ${LLD_LIBRARY}) @@ -24,8 +24,8 @@ else() string(TOUPPER ${_libname_} _prettylibname_) find_library(LLD_${_prettylibname_}_LIB NAMES ${_libname_} PATHS - /usr/lib/llvm-8.0/lib - /usr/local/llvm80/lib + /usr/lib/llvm-9.0/lib + /usr/local/llvm90/lib /mingw64/lib /c/msys64/mingw64/lib c:/msys64/mingw64/lib) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 3eb6bd9003..8a75f59a7c 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -8,12 +8,12 @@ # LLVM_LIBDIRS find_program(LLVM_CONFIG_EXE - NAMES llvm-config-8 llvm-config-8.0 llvm-config80 llvm-config + NAMES llvm-config-9 llvm-config-9.0 llvm-config90 llvm-config PATHS "/mingw64/bin" "/c/msys64/mingw64/bin" "c:/msys64/mingw64/bin" - "C:/Libraries/llvm-8.0.0/bin") + "C:/Libraries/llvm-9.0.0/bin") if ("${LLVM_CONFIG_EXE}" STREQUAL "LLVM_CONFIG_EXE-NOTFOUND") message(FATAL_ERROR "unable to find llvm-config") @@ -28,14 +28,14 @@ execute_process( OUTPUT_VARIABLE LLVM_CONFIG_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) -if("${LLVM_CONFIG_VERSION}" VERSION_LESS 8) - message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}") +if("${LLVM_CONFIG_VERSION}" VERSION_LESS 9) + message(FATAL_ERROR "expected LLVM 9.x but found ${LLVM_CONFIG_VERSION}") endif() -if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 9) - message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}") +if("${LLVM_CONFIG_VERSION}" VERSION_EQUAL 10) + message(FATAL_ERROR "expected LLVM 9.x but found ${LLVM_CONFIG_VERSION}") endif() -if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 9) - message(FATAL_ERROR "expected LLVM 8.x but found ${LLVM_CONFIG_VERSION}") +if("${LLVM_CONFIG_VERSION}" VERSION_GREATER 10) + message(FATAL_ERROR "expected LLVM 9.x but found ${LLVM_CONFIG_VERSION}") endif() execute_process( @@ -112,7 +112,7 @@ execute_process( set(LLVM_LIBRARIES ${LLVM_LIBRARIES} ${LLVM_SYSTEM_LIBS}) if(NOT LLVM_LIBRARIES) - find_library(LLVM_LIBRARIES NAMES LLVM LLVM-8 LLVM-8.0) + find_library(LLVM_LIBRARIES NAMES LLVM LLVM-9 LLVM-9.0) endif() link_directories("${CMAKE_PREFIX_PATH}/lib") diff --git a/src/target.cpp b/src/target.cpp index 804c421618..1c5a34e54f 100644 --- a/src/target.cpp +++ b/src/target.cpp @@ -33,6 +33,7 @@ static const ZigLLVM_SubArchType subarch_list_arm32[] = { ZigLLVM_ARMSubArch_v8r, ZigLLVM_ARMSubArch_v8m_baseline, ZigLLVM_ARMSubArch_v8m_mainline, + ZigLLVM_ARMSubArch_v8_1m_mainline, ZigLLVM_ARMSubArch_v7, ZigLLVM_ARMSubArch_v7em, ZigLLVM_ARMSubArch_v7m, @@ -76,6 +77,7 @@ static const ZigLLVM_ArchType arch_list[] = { ZigLLVM_armeb, // ARM (big endian): armeb ZigLLVM_aarch64, // AArch64 (little endian): aarch64 ZigLLVM_aarch64_be, // AArch64 (big endian): aarch64_be + ZigLLVM_aarch64_32, // AArch64 (little endian) ILP32: aarch64_32 ZigLLVM_arc, // ARC: Synopsys ARC ZigLLVM_avr, // AVR: Atmel AVR microcontroller ZigLLVM_bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) @@ -176,6 +178,7 @@ static const Os os_list[] = { OsHermitCore, OsHurd, OsWASI, + OsEmscripten, OsZen, OsUefi, }; @@ -193,15 +196,19 @@ static const ZigLLVM_EnvironmentType abi_list[] = { ZigLLVM_CODE16, ZigLLVM_EABI, ZigLLVM_EABIHF, + ZigLLVM_ELFv1, + ZigLLVM_ELFv2, ZigLLVM_Android, ZigLLVM_Musl, ZigLLVM_MuslEABI, ZigLLVM_MuslEABIHF, + ZigLLVM_MSVC, ZigLLVM_Itanium, ZigLLVM_Cygnus, ZigLLVM_CoreCLR, ZigLLVM_Simulator, + ZigLLVM_MacABI, }; static const ZigLLVM_ObjectFormatType oformat_list[] = { diff --git a/src/target.hpp b/src/target.hpp index fcda9955b9..0e7dc6686d 100644 --- a/src/target.hpp +++ b/src/target.hpp @@ -48,6 +48,7 @@ enum Os { OsHermitCore, OsHurd, OsWASI, + OsEmscripten, OsZen, OsUefi, }; diff --git a/src/zig_llvm.cpp b/src/zig_llvm.cpp index c51c9e1a50..c033d7af96 100644 --- a/src/zig_llvm.cpp +++ b/src/zig_llvm.cpp @@ -176,7 +176,6 @@ bool ZigLLVMTargetMachineEmitToFile(LLVMTargetMachineRef targ_machine_ref, LLVMM PMBuilder->SizeLevel = is_small ? 2 : 0; PMBuilder->DisableTailCalls = is_debug; - PMBuilder->DisableUnitAtATime = is_debug; PMBuilder->DisableUnrollLoops = is_debug; PMBuilder->SLPVectorize = !is_debug; PMBuilder->LoopVectorize = !is_debug; @@ -805,6 +804,8 @@ const char *ZigLLVMGetSubArchTypeName(ZigLLVM_SubArchType sub_arch) { return "v8m_baseline"; case ZigLLVM_ARMSubArch_v8m_mainline: return "v8m_mainline"; + case ZigLLVM_ARMSubArch_v8_1m_mainline: + return "v8_1m_mainline"; case ZigLLVM_ARMSubArch_v7: return "v7"; case ZigLLVM_ARMSubArch_v7em: @@ -964,6 +965,7 @@ bool ZigLLDLink(ZigLLVM_ObjectFormatType oformat, const char **args, size_t arg_ switch (oformat) { case ZigLLVM_UnknownObjectFormat: + case ZigLLVM_XCOFF: assert(false); // unreachable case ZigLLVM_COFF: @@ -1043,6 +1045,7 @@ static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v8 == Triple::ARMSubArch_v static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v8r == Triple::ARMSubArch_v8r, ""); static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v8m_baseline == Triple::ARMSubArch_v8m_baseline, ""); static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v8m_mainline == Triple::ARMSubArch_v8m_mainline, ""); +static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v8_1m_mainline == Triple::ARMSubArch_v8_1m_mainline, ""); static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v7 == Triple::ARMSubArch_v7, ""); static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v7em == Triple::ARMSubArch_v7em, ""); static_assert((Triple::SubArchType)ZigLLVM_ARMSubArch_v7m == Triple::ARMSubArch_v7m, ""); @@ -1115,6 +1118,10 @@ static_assert((Triple::OSType)ZigLLVM_WatchOS == Triple::WatchOS, ""); static_assert((Triple::OSType)ZigLLVM_Mesa3D == Triple::Mesa3D, ""); static_assert((Triple::OSType)ZigLLVM_Contiki == Triple::Contiki, ""); static_assert((Triple::OSType)ZigLLVM_AMDPAL == Triple::AMDPAL, ""); +static_assert((Triple::OSType)ZigLLVM_HermitCore == Triple::HermitCore, ""); +static_assert((Triple::OSType)ZigLLVM_Hurd == Triple::Hurd, ""); +static_assert((Triple::OSType)ZigLLVM_WASI == Triple::WASI, ""); +static_assert((Triple::OSType)ZigLLVM_Emscripten == Triple::Emscripten, ""); static_assert((Triple::OSType)ZigLLVM_LastOSType == Triple::LastOSType, ""); static_assert((Triple::EnvironmentType)ZigLLVM_UnknownEnvironment == Triple::UnknownEnvironment, ""); @@ -1127,6 +1134,8 @@ static_assert((Triple::EnvironmentType)ZigLLVM_GNUX32 == Triple::GNUX32, ""); static_assert((Triple::EnvironmentType)ZigLLVM_CODE16 == Triple::CODE16, ""); static_assert((Triple::EnvironmentType)ZigLLVM_EABI == Triple::EABI, ""); static_assert((Triple::EnvironmentType)ZigLLVM_EABIHF == Triple::EABIHF, ""); +static_assert((Triple::EnvironmentType)ZigLLVM_ELFv1 == Triple::ELFv1, ""); +static_assert((Triple::EnvironmentType)ZigLLVM_ELFv2 == Triple::ELFv2, ""); static_assert((Triple::EnvironmentType)ZigLLVM_Android == Triple::Android, ""); static_assert((Triple::EnvironmentType)ZigLLVM_Musl == Triple::Musl, ""); static_assert((Triple::EnvironmentType)ZigLLVM_MuslEABI == Triple::MuslEABI, ""); @@ -1143,3 +1152,4 @@ static_assert((Triple::ObjectFormatType)ZigLLVM_COFF == Triple::COFF, ""); static_assert((Triple::ObjectFormatType)ZigLLVM_ELF == Triple::ELF, ""); static_assert((Triple::ObjectFormatType)ZigLLVM_MachO == Triple::MachO, ""); static_assert((Triple::ObjectFormatType)ZigLLVM_Wasm == Triple::Wasm, ""); +static_assert((Triple::ObjectFormatType)ZigLLVM_XCOFF == Triple::XCOFF, ""); diff --git a/src/zig_llvm.h b/src/zig_llvm.h index 8b7b0775f7..210f245efe 100644 --- a/src/zig_llvm.h +++ b/src/zig_llvm.h @@ -227,6 +227,7 @@ enum ZigLLVM_ArchType { ZigLLVM_armeb, // ARM (big endian): armeb ZigLLVM_aarch64, // AArch64 (little endian): aarch64 ZigLLVM_aarch64_be, // AArch64 (big endian): aarch64_be + ZigLLVM_aarch64_32, // AArch64 (little endian) ILP32: aarch64_32 ZigLLVM_arc, // ARC: Synopsys ARC ZigLLVM_avr, // AVR: Atmel AVR microcontroller ZigLLVM_bpfel, // eBPF or extended BPF or 64-bit BPF (little endian) @@ -289,6 +290,7 @@ enum ZigLLVM_SubArchType { ZigLLVM_ARMSubArch_v8r, ZigLLVM_ARMSubArch_v8m_baseline, ZigLLVM_ARMSubArch_v8m_mainline, + ZigLLVM_ARMSubArch_v8_1m_mainline, ZigLLVM_ARMSubArch_v7, ZigLLVM_ARMSubArch_v7em, ZigLLVM_ARMSubArch_v7m, @@ -370,11 +372,12 @@ enum ZigLLVM_OSType { ZigLLVM_HermitCore, // HermitCore Unikernel/Multikernel ZigLLVM_Hurd, // GNU/Hurd ZigLLVM_WASI, // Experimental WebAssembly OS + ZigLLVM_Emscripten, - ZigLLVM_LastOSType = ZigLLVM_WASI + ZigLLVM_LastOSType = ZigLLVM_Emscripten }; -// Synchronize with target.cpp::environ_list +// Synchronize with target.cpp::abi_list enum ZigLLVM_EnvironmentType { ZigLLVM_UnknownEnvironment, @@ -387,6 +390,8 @@ enum ZigLLVM_EnvironmentType { ZigLLVM_CODE16, ZigLLVM_EABI, ZigLLVM_EABIHF, + ZigLLVM_ELFv1, + ZigLLVM_ELFv2, ZigLLVM_Android, ZigLLVM_Musl, ZigLLVM_MuslEABI, @@ -396,9 +401,10 @@ enum ZigLLVM_EnvironmentType { ZigLLVM_Itanium, ZigLLVM_Cygnus, ZigLLVM_CoreCLR, - ZigLLVM_Simulator, + ZigLLVM_Simulator, // Simulator variants of other systems, e.g., Apple's iOS + ZigLLVM_MacABI, // Mac Catalyst variant of Apple's iOS deployment target. - ZigLLVM_LastEnvironmentType = ZigLLVM_Simulator + ZigLLVM_LastEnvironmentType = ZigLLVM_MacABI }; enum ZigLLVM_ObjectFormatType { @@ -408,6 +414,7 @@ enum ZigLLVM_ObjectFormatType { ZigLLVM_ELF, ZigLLVM_MachO, ZigLLVM_Wasm, + ZigLLVM_XCOFF, }; #define ZigLLVM_DIFlags_Zero 0U From e816d592e8773c03ca92645d978669ec6de5ca0c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 17 Jul 2019 17:45:24 -0400 Subject: [PATCH 02/31] update zig codebase to llvm 9 --- cmake/Findllvm.cmake | 1 + src-self-hosted/clang.zig | 426 +++++++++++++++++++------------------- src/link.cpp | 1 + src/target.cpp | 66 +++++- src/target.hpp | 1 + src/translate_c.cpp | 34 ++- src/zig_clang.cpp | 42 ++-- src/zig_clang.h | 194 +++++++++-------- 8 files changed, 422 insertions(+), 343 deletions(-) diff --git a/cmake/Findllvm.cmake b/cmake/Findllvm.cmake index 8a75f59a7c..96324a739f 100644 --- a/cmake/Findllvm.cmake +++ b/cmake/Findllvm.cmake @@ -59,6 +59,7 @@ NEED_TARGET("Mips") NEED_TARGET("MSP430") NEED_TARGET("NVPTX") NEED_TARGET("PowerPC") +NEED_TARGET("RISCV") NEED_TARGET("Sparc") NEED_TARGET("SystemZ") NEED_TARGET("WebAssembly") diff --git a/src-self-hosted/clang.zig b/src-self-hosted/clang.zig index f2433ae60c..5266f27371 100644 --- a/src-self-hosted/clang.zig +++ b/src-self-hosted/clang.zig @@ -148,6 +148,7 @@ pub const ZigClangTypeClass = extern enum { UnresolvedUsing, Paren, Typedef, + MacroQualified, Adjusted, Decayed, TypeOfExpr, @@ -176,210 +177,213 @@ pub const ZigClangTypeClass = extern enum { Atomic, }; -pub const ZigClangStmtClass = extern enum { - NoStmtClass = 0, - GCCAsmStmtClass = 1, - MSAsmStmtClass = 2, - AttributedStmtClass = 3, - BreakStmtClass = 4, - CXXCatchStmtClass = 5, - CXXForRangeStmtClass = 6, - CXXTryStmtClass = 7, - CapturedStmtClass = 8, - CompoundStmtClass = 9, - ContinueStmtClass = 10, - CoreturnStmtClass = 11, - CoroutineBodyStmtClass = 12, - DeclStmtClass = 13, - DoStmtClass = 14, - BinaryConditionalOperatorClass = 15, - ConditionalOperatorClass = 16, - AddrLabelExprClass = 17, - ArrayInitIndexExprClass = 18, - ArrayInitLoopExprClass = 19, - ArraySubscriptExprClass = 20, - ArrayTypeTraitExprClass = 21, - AsTypeExprClass = 22, - AtomicExprClass = 23, - BinaryOperatorClass = 24, - CompoundAssignOperatorClass = 25, - BlockExprClass = 26, - CXXBindTemporaryExprClass = 27, - CXXBoolLiteralExprClass = 28, - CXXConstructExprClass = 29, - CXXTemporaryObjectExprClass = 30, - CXXDefaultArgExprClass = 31, - CXXDefaultInitExprClass = 32, - CXXDeleteExprClass = 33, - CXXDependentScopeMemberExprClass = 34, - CXXFoldExprClass = 35, - CXXInheritedCtorInitExprClass = 36, - CXXNewExprClass = 37, - CXXNoexceptExprClass = 38, - CXXNullPtrLiteralExprClass = 39, - CXXPseudoDestructorExprClass = 40, - CXXScalarValueInitExprClass = 41, - CXXStdInitializerListExprClass = 42, - CXXThisExprClass = 43, - CXXThrowExprClass = 44, - CXXTypeidExprClass = 45, - CXXUnresolvedConstructExprClass = 46, - CXXUuidofExprClass = 47, - CallExprClass = 48, - CUDAKernelCallExprClass = 49, - CXXMemberCallExprClass = 50, - CXXOperatorCallExprClass = 51, - UserDefinedLiteralClass = 52, - CStyleCastExprClass = 53, - CXXFunctionalCastExprClass = 54, - CXXConstCastExprClass = 55, - CXXDynamicCastExprClass = 56, - CXXReinterpretCastExprClass = 57, - CXXStaticCastExprClass = 58, - ObjCBridgedCastExprClass = 59, - ImplicitCastExprClass = 60, - CharacterLiteralClass = 61, - ChooseExprClass = 62, - CompoundLiteralExprClass = 63, - ConvertVectorExprClass = 64, - CoawaitExprClass = 65, - CoyieldExprClass = 66, - DeclRefExprClass = 67, - DependentCoawaitExprClass = 68, - DependentScopeDeclRefExprClass = 69, - DesignatedInitExprClass = 70, - DesignatedInitUpdateExprClass = 71, - ExpressionTraitExprClass = 72, - ExtVectorElementExprClass = 73, - FixedPointLiteralClass = 74, - FloatingLiteralClass = 75, - ConstantExprClass = 76, - ExprWithCleanupsClass = 77, - FunctionParmPackExprClass = 78, - GNUNullExprClass = 79, - GenericSelectionExprClass = 80, - ImaginaryLiteralClass = 81, - ImplicitValueInitExprClass = 82, - InitListExprClass = 83, - IntegerLiteralClass = 84, - LambdaExprClass = 85, - MSPropertyRefExprClass = 86, - MSPropertySubscriptExprClass = 87, - MaterializeTemporaryExprClass = 88, - MemberExprClass = 89, - NoInitExprClass = 90, - OMPArraySectionExprClass = 91, - ObjCArrayLiteralClass = 92, - ObjCAvailabilityCheckExprClass = 93, - ObjCBoolLiteralExprClass = 94, - ObjCBoxedExprClass = 95, - ObjCDictionaryLiteralClass = 96, - ObjCEncodeExprClass = 97, - ObjCIndirectCopyRestoreExprClass = 98, - ObjCIsaExprClass = 99, - ObjCIvarRefExprClass = 100, - ObjCMessageExprClass = 101, - ObjCPropertyRefExprClass = 102, - ObjCProtocolExprClass = 103, - ObjCSelectorExprClass = 104, - ObjCStringLiteralClass = 105, - ObjCSubscriptRefExprClass = 106, - OffsetOfExprClass = 107, - OpaqueValueExprClass = 108, - UnresolvedLookupExprClass = 109, - UnresolvedMemberExprClass = 110, - PackExpansionExprClass = 111, - ParenExprClass = 112, - ParenListExprClass = 113, - PredefinedExprClass = 114, - PseudoObjectExprClass = 115, - ShuffleVectorExprClass = 116, - SizeOfPackExprClass = 117, - StmtExprClass = 118, - StringLiteralClass = 119, - SubstNonTypeTemplateParmExprClass = 120, - SubstNonTypeTemplateParmPackExprClass = 121, - TypeTraitExprClass = 122, - TypoExprClass = 123, - UnaryExprOrTypeTraitExprClass = 124, - UnaryOperatorClass = 125, - VAArgExprClass = 126, - ForStmtClass = 127, - GotoStmtClass = 128, - IfStmtClass = 129, - IndirectGotoStmtClass = 130, - LabelStmtClass = 131, - MSDependentExistsStmtClass = 132, - NullStmtClass = 133, - OMPAtomicDirectiveClass = 134, - OMPBarrierDirectiveClass = 135, - OMPCancelDirectiveClass = 136, - OMPCancellationPointDirectiveClass = 137, - OMPCriticalDirectiveClass = 138, - OMPFlushDirectiveClass = 139, - OMPDistributeDirectiveClass = 140, - OMPDistributeParallelForDirectiveClass = 141, - OMPDistributeParallelForSimdDirectiveClass = 142, - OMPDistributeSimdDirectiveClass = 143, - OMPForDirectiveClass = 144, - OMPForSimdDirectiveClass = 145, - OMPParallelForDirectiveClass = 146, - OMPParallelForSimdDirectiveClass = 147, - OMPSimdDirectiveClass = 148, - OMPTargetParallelForSimdDirectiveClass = 149, - OMPTargetSimdDirectiveClass = 150, - OMPTargetTeamsDistributeDirectiveClass = 151, - OMPTargetTeamsDistributeParallelForDirectiveClass = 152, - OMPTargetTeamsDistributeParallelForSimdDirectiveClass = 153, - OMPTargetTeamsDistributeSimdDirectiveClass = 154, - OMPTaskLoopDirectiveClass = 155, - OMPTaskLoopSimdDirectiveClass = 156, - OMPTeamsDistributeDirectiveClass = 157, - OMPTeamsDistributeParallelForDirectiveClass = 158, - OMPTeamsDistributeParallelForSimdDirectiveClass = 159, - OMPTeamsDistributeSimdDirectiveClass = 160, - OMPMasterDirectiveClass = 161, - OMPOrderedDirectiveClass = 162, - OMPParallelDirectiveClass = 163, - OMPParallelSectionsDirectiveClass = 164, - OMPSectionDirectiveClass = 165, - OMPSectionsDirectiveClass = 166, - OMPSingleDirectiveClass = 167, - OMPTargetDataDirectiveClass = 168, - OMPTargetDirectiveClass = 169, - OMPTargetEnterDataDirectiveClass = 170, - OMPTargetExitDataDirectiveClass = 171, - OMPTargetParallelDirectiveClass = 172, - OMPTargetParallelForDirectiveClass = 173, - OMPTargetTeamsDirectiveClass = 174, - OMPTargetUpdateDirectiveClass = 175, - OMPTaskDirectiveClass = 176, - OMPTaskgroupDirectiveClass = 177, - OMPTaskwaitDirectiveClass = 178, - OMPTaskyieldDirectiveClass = 179, - OMPTeamsDirectiveClass = 180, - ObjCAtCatchStmtClass = 181, - ObjCAtFinallyStmtClass = 182, - ObjCAtSynchronizedStmtClass = 183, - ObjCAtThrowStmtClass = 184, - ObjCAtTryStmtClass = 185, - ObjCAutoreleasePoolStmtClass = 186, - ObjCForCollectionStmtClass = 187, - ReturnStmtClass = 188, - SEHExceptStmtClass = 189, - SEHFinallyStmtClass = 190, - SEHLeaveStmtClass = 191, - SEHTryStmtClass = 192, - CaseStmtClass = 193, - DefaultStmtClass = 194, - SwitchStmtClass = 195, - WhileStmtClass = 196, +const ZigClangStmtClass = extern enum { + NoStmtClass, + GCCAsmStmtClass, + MSAsmStmtClass, + BreakStmtClass, + CXXCatchStmtClass, + CXXForRangeStmtClass, + CXXTryStmtClass, + CapturedStmtClass, + CompoundStmtClass, + ContinueStmtClass, + CoreturnStmtClass, + CoroutineBodyStmtClass, + DeclStmtClass, + DoStmtClass, + ForStmtClass, + GotoStmtClass, + IfStmtClass, + IndirectGotoStmtClass, + MSDependentExistsStmtClass, + NullStmtClass, + OMPAtomicDirectiveClass, + OMPBarrierDirectiveClass, + OMPCancelDirectiveClass, + OMPCancellationPointDirectiveClass, + OMPCriticalDirectiveClass, + OMPFlushDirectiveClass, + OMPDistributeDirectiveClass, + OMPDistributeParallelForDirectiveClass, + OMPDistributeParallelForSimdDirectiveClass, + OMPDistributeSimdDirectiveClass, + OMPForDirectiveClass, + OMPForSimdDirectiveClass, + OMPParallelForDirectiveClass, + OMPParallelForSimdDirectiveClass, + OMPSimdDirectiveClass, + OMPTargetParallelForSimdDirectiveClass, + OMPTargetSimdDirectiveClass, + OMPTargetTeamsDistributeDirectiveClass, + OMPTargetTeamsDistributeParallelForDirectiveClass, + OMPTargetTeamsDistributeParallelForSimdDirectiveClass, + OMPTargetTeamsDistributeSimdDirectiveClass, + OMPTaskLoopDirectiveClass, + OMPTaskLoopSimdDirectiveClass, + OMPTeamsDistributeDirectiveClass, + OMPTeamsDistributeParallelForDirectiveClass, + OMPTeamsDistributeParallelForSimdDirectiveClass, + OMPTeamsDistributeSimdDirectiveClass, + OMPMasterDirectiveClass, + OMPOrderedDirectiveClass, + OMPParallelDirectiveClass, + OMPParallelSectionsDirectiveClass, + OMPSectionDirectiveClass, + OMPSectionsDirectiveClass, + OMPSingleDirectiveClass, + OMPTargetDataDirectiveClass, + OMPTargetDirectiveClass, + OMPTargetEnterDataDirectiveClass, + OMPTargetExitDataDirectiveClass, + OMPTargetParallelDirectiveClass, + OMPTargetParallelForDirectiveClass, + OMPTargetTeamsDirectiveClass, + OMPTargetUpdateDirectiveClass, + OMPTaskDirectiveClass, + OMPTaskgroupDirectiveClass, + OMPTaskwaitDirectiveClass, + OMPTaskyieldDirectiveClass, + OMPTeamsDirectiveClass, + ObjCAtCatchStmtClass, + ObjCAtFinallyStmtClass, + ObjCAtSynchronizedStmtClass, + ObjCAtThrowStmtClass, + ObjCAtTryStmtClass, + ObjCAutoreleasePoolStmtClass, + ObjCForCollectionStmtClass, + ReturnStmtClass, + SEHExceptStmtClass, + SEHFinallyStmtClass, + SEHLeaveStmtClass, + SEHTryStmtClass, + CaseStmtClass, + DefaultStmtClass, + SwitchStmtClass, + AttributedStmtClass, + BinaryConditionalOperatorClass, + ConditionalOperatorClass, + AddrLabelExprClass, + ArrayInitIndexExprClass, + ArrayInitLoopExprClass, + ArraySubscriptExprClass, + ArrayTypeTraitExprClass, + AsTypeExprClass, + AtomicExprClass, + BinaryOperatorClass, + CompoundAssignOperatorClass, + BlockExprClass, + CXXBindTemporaryExprClass, + CXXBoolLiteralExprClass, + CXXConstructExprClass, + CXXTemporaryObjectExprClass, + CXXDefaultArgExprClass, + CXXDefaultInitExprClass, + CXXDeleteExprClass, + CXXDependentScopeMemberExprClass, + CXXFoldExprClass, + CXXInheritedCtorInitExprClass, + CXXNewExprClass, + CXXNoexceptExprClass, + CXXNullPtrLiteralExprClass, + CXXPseudoDestructorExprClass, + CXXScalarValueInitExprClass, + CXXStdInitializerListExprClass, + CXXThisExprClass, + CXXThrowExprClass, + CXXTypeidExprClass, + CXXUnresolvedConstructExprClass, + CXXUuidofExprClass, + CallExprClass, + CUDAKernelCallExprClass, + CXXMemberCallExprClass, + CXXOperatorCallExprClass, + UserDefinedLiteralClass, + BuiltinBitCastExprClass, + CStyleCastExprClass, + CXXFunctionalCastExprClass, + CXXConstCastExprClass, + CXXDynamicCastExprClass, + CXXReinterpretCastExprClass, + CXXStaticCastExprClass, + ObjCBridgedCastExprClass, + ImplicitCastExprClass, + CharacterLiteralClass, + ChooseExprClass, + CompoundLiteralExprClass, + ConvertVectorExprClass, + CoawaitExprClass, + CoyieldExprClass, + DeclRefExprClass, + DependentCoawaitExprClass, + DependentScopeDeclRefExprClass, + DesignatedInitExprClass, + DesignatedInitUpdateExprClass, + ExpressionTraitExprClass, + ExtVectorElementExprClass, + FixedPointLiteralClass, + FloatingLiteralClass, + ConstantExprClass, + ExprWithCleanupsClass, + FunctionParmPackExprClass, + GNUNullExprClass, + GenericSelectionExprClass, + ImaginaryLiteralClass, + ImplicitValueInitExprClass, + InitListExprClass, + IntegerLiteralClass, + LambdaExprClass, + MSPropertyRefExprClass, + MSPropertySubscriptExprClass, + MaterializeTemporaryExprClass, + MemberExprClass, + NoInitExprClass, + OMPArraySectionExprClass, + ObjCArrayLiteralClass, + ObjCAvailabilityCheckExprClass, + ObjCBoolLiteralExprClass, + ObjCBoxedExprClass, + ObjCDictionaryLiteralClass, + ObjCEncodeExprClass, + ObjCIndirectCopyRestoreExprClass, + ObjCIsaExprClass, + ObjCIvarRefExprClass, + ObjCMessageExprClass, + ObjCPropertyRefExprClass, + ObjCProtocolExprClass, + ObjCSelectorExprClass, + ObjCStringLiteralClass, + ObjCSubscriptRefExprClass, + OffsetOfExprClass, + OpaqueValueExprClass, + UnresolvedLookupExprClass, + UnresolvedMemberExprClass, + PackExpansionExprClass, + ParenExprClass, + ParenListExprClass, + PredefinedExprClass, + PseudoObjectExprClass, + ShuffleVectorExprClass, + SizeOfPackExprClass, + SourceLocExprClass, + StmtExprClass, + StringLiteralClass, + SubstNonTypeTemplateParmExprClass, + SubstNonTypeTemplateParmPackExprClass, + TypeTraitExprClass, + TypoExprClass, + UnaryExprOrTypeTraitExprClass, + UnaryOperatorClass, + VAArgExprClass, + LabelStmtClass, + WhileStmtClass, }; pub const ZigClangCK = extern enum { Dependent, BitCast, LValueBitCast, + LValueToRValueBitCast, LValueToRValue, NoOp, BaseToDerived, @@ -406,6 +410,8 @@ pub const ZigClangCK = extern enum { IntegralToBoolean, IntegralToFloating, FixedPointCast, + FixedPointToIntegral, + IntegralToFixedPoint, FixedPointToBoolean, FloatingToIntegral, FloatingToBoolean, @@ -439,9 +445,11 @@ pub const ZigClangCK = extern enum { }; pub const ZigClangAPValueKind = extern enum { - Uninitialized, + None, + Indeterminate, Int, Float, + FixedPoint, ComplexInt, ComplexFloat, LValue, @@ -478,6 +486,7 @@ pub const ZigClangDeclKind = extern enum { ObjCMethod, ObjCProperty, BuiltinTemplate, + Concept, ClassTemplate, FunctionTemplate, TypeAliasTemplate, @@ -519,8 +528,10 @@ pub const ZigClangDeclKind = extern enum { VarTemplatePartialSpecialization, EnumConstant, IndirectField, + OMPDeclareMapper, OMPDeclareReduction, UnresolvedUsingValue, + OMPAllocate, OMPRequires, OMPThreadPrivate, ObjCPropertyImpl, @@ -939,25 +950,10 @@ pub const struct_ZigClangExprEvalResult = extern struct { }; pub const struct_ZigClangAPValue = extern struct { - Kind: ZigClangAPValue_ValueKind, + Kind: ZigClangAPValueKind, Data: if (builtin.os == .windows and builtin.abi == .msvc) [52]u8 else [68]u8, }; -pub const ZigClangAPValue_ValueKind = extern enum { - ZigClangAPValue_ValueKind_Uninitialized, - ZigClangAPValue_ValueKind_Int, - ZigClangAPValue_ValueKind_Float, - ZigClangAPValue_ValueKind_ComplexInt, - ZigClangAPValue_ValueKind_ComplexFloat, - ZigClangAPValue_ValueKind_LValue, - ZigClangAPValue_ValueKind_Vector, - ZigClangAPValue_ValueKind_Array, - ZigClangAPValue_ValueKind_Struct, - ZigClangAPValue_ValueKind_Union, - ZigClangAPValue_ValueKind_MemberPointer, - ZigClangAPValue_ValueKind_AddrLabelDiff, -}; - pub extern fn ZigClangIntegerLiteral_EvaluateAsInt(*const ZigClangIntegerLiteral, *ZigClangExprEvalResult, *const ZigClangASTContext) bool; pub extern fn ZigClangIntegerLiteral_getBeginLoc(*const ZigClangIntegerLiteral) ZigClangSourceLocation; diff --git a/src/link.cpp b/src/link.cpp index 44b3abaa90..88267f6cac 100644 --- a/src/link.cpp +++ b/src/link.cpp @@ -2520,6 +2520,7 @@ static void construct_linker_job_macho(LinkJob *lj) { static void construct_linker_job(LinkJob *lj) { switch (target_object_format(lj->codegen->zig_target)) { case ZigLLVM_UnknownObjectFormat: + case ZigLLVM_XCOFF: zig_unreachable(); case ZigLLVM_COFF: diff --git a/src/target.cpp b/src/target.cpp index 1c5a34e54f..fd5deeea29 100644 --- a/src/target.cpp +++ b/src/target.cpp @@ -235,6 +235,7 @@ const char *target_oformat_name(ZigLLVM_ObjectFormatType oformat) { case ZigLLVM_ELF: return "elf"; case ZigLLVM_MachO: return "macho"; case ZigLLVM_Wasm: return "wasm"; + case ZigLLVM_XCOFF: return "xcoff"; } zig_unreachable(); } @@ -337,6 +338,8 @@ ZigLLVM_OSType get_llvm_os_type(Os os_type) { return ZigLLVM_Hurd; case OsWASI: return ZigLLVM_WASI; + case OsEmscripten: + return ZigLLVM_Emscripten; } zig_unreachable(); } @@ -412,6 +415,8 @@ static Os get_zig_os_type(ZigLLVM_OSType os_type) { return OsHurd; case ZigLLVM_WASI: return OsWASI; + case ZigLLVM_Emscripten: + return OsEmscripten; } zig_unreachable(); } @@ -457,6 +462,7 @@ const char *target_os_name(Os os_type) { case OsHermitCore: case OsHurd: case OsWASI: + case OsEmscripten: return ZigLLVMGetOSTypeName(get_llvm_os_type(os_type)); } zig_unreachable(); @@ -569,6 +575,7 @@ SubArchList target_subarch_list(ZigLLVM_ArchType arch) { case ZigLLVM_aarch64: case ZigLLVM_aarch64_be: + case ZigLLVM_aarch64_32: return SubArchListArm64; case ZigLLVM_kalimba: @@ -843,6 +850,7 @@ uint32_t target_arch_pointer_bit_width(ZigLLVM_ArchType arch) { case ZigLLVM_shave: case ZigLLVM_wasm32: case ZigLLVM_renderscript32: + case ZigLLVM_aarch64_32: return 32; case ZigLLVM_aarch64: @@ -916,6 +924,7 @@ uint32_t target_c_type_size_in_bits(const ZigTarget *target, CIntType id) { case OsNetBSD: case OsOpenBSD: case OsWASI: + case OsEmscripten: switch (id) { case CIntTypeShort: case CIntTypeUShort: @@ -1091,7 +1100,40 @@ static bool is_64_bit(ZigLLVM_ArchType arch) { return target_arch_pointer_bit_width(arch) == 64; } +bool target_is_android(const ZigTarget *target) { + return target->abi == ZigLLVM_Android; +} + const char *target_dynamic_linker(const ZigTarget *target) { + if (target_is_android(target)) { + return is_64_bit(target->arch) ? "/system/bin/linker64" : "/system/bin/linker"; + } + + if (target_is_musl(target)) { + Buf buf = BUF_INIT; + buf_init_from_str(&buf, "/lib/ld-musl-"); + bool is_arm = false; + switch (target->arch) { + case ZigLLVM_arm: + case ZigLLVM_thumb: + buf_append_str(&buf, "arm"); + is_arm = true; + break; + case ZigLLVM_armeb: + case ZigLLVM_thumbeb: + buf_append_str(&buf, "armeb"); + is_arm = true; + break; + default: + buf_append_str(&buf, target_arch_name(target->arch)); + } + if (is_arm && get_float_abi(target) == FloatAbiHard) { + buf_append_str(&buf, "hf"); + } + buf_append_str(&buf, ".so.1"); + return buf_ptr(&buf); + } + switch (target->os) { case OsFreeBSD: return "/libexec/ld-elf.so.1"; @@ -1099,14 +1141,6 @@ const char *target_dynamic_linker(const ZigTarget *target) { return "/libexec/ld.elf_so"; case OsLinux: { const ZigLLVM_EnvironmentType abi = target->abi; - if (abi == ZigLLVM_Android) { - if (is_64_bit(target->arch)) { - return "/system/bin/linker64"; - } else { - return "/system/bin/linker"; - } - } - switch (target->arch) { case ZigLLVM_UnknownArch: zig_unreachable(); @@ -1121,6 +1155,9 @@ const char *target_dynamic_linker(const ZigTarget *target) { case ZigLLVM_aarch64_be: return "/lib/ld-linux-aarch64_be.so.1"; + case ZigLLVM_aarch64_32: + return "/lib/ld-linux-aarch64_32.so.1"; + case ZigLLVM_arm: case ZigLLVM_thumb: if (get_float_abi(target) == FloatAbiHard) { @@ -1213,6 +1250,7 @@ const char *target_dynamic_linker(const ZigTarget *target) { case OsMacOSX: case OsUefi: case OsWindows: + case OsEmscripten: return nullptr; case OsAnanas: @@ -1280,11 +1318,12 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_x86_64: return "rsp"; case ZigLLVM_aarch64: + case ZigLLVM_aarch64_be: + case ZigLLVM_aarch64_32: return "sp"; case ZigLLVM_arm: case ZigLLVM_thumb: - case ZigLLVM_aarch64_be: case ZigLLVM_amdgcn: case ZigLLVM_amdil: case ZigLLVM_amdil64: @@ -1338,10 +1377,11 @@ bool target_is_arm(const ZigTarget *target) { case ZigLLVM_UnknownArch: zig_unreachable(); case ZigLLVM_aarch64: - case ZigLLVM_arm: - case ZigLLVM_thumb: case ZigLLVM_aarch64_be: + case ZigLLVM_aarch64_32: + case ZigLLVM_arm: case ZigLLVM_armeb: + case ZigLLVM_thumb: case ZigLLVM_thumbeb: return true; @@ -1490,6 +1530,7 @@ ZigLLVM_EnvironmentType target_default_abi(ZigLLVM_ArchType arch, Os os) { return ZigLLVM_MSVC; case OsLinux: case OsWASI: + case OsEmscripten: return ZigLLVM_Musl; } zig_unreachable(); @@ -1607,12 +1648,15 @@ const char *target_libc_generic_name(const ZigTarget *target) { case ZigLLVM_CODE16: case ZigLLVM_EABI: case ZigLLVM_EABIHF: + case ZigLLVM_ELFv1: + case ZigLLVM_ELFv2: case ZigLLVM_Android: case ZigLLVM_MSVC: case ZigLLVM_Itanium: case ZigLLVM_Cygnus: case ZigLLVM_CoreCLR: case ZigLLVM_Simulator: + case ZigLLVM_MacABI: zig_unreachable(); } zig_unreachable(); diff --git a/src/target.hpp b/src/target.hpp index 0e7dc6686d..85768347a5 100644 --- a/src/target.hpp +++ b/src/target.hpp @@ -186,6 +186,7 @@ bool target_abi_is_musl(ZigLLVM_EnvironmentType abi); bool target_is_glibc(const ZigTarget *target); bool target_is_musl(const ZigTarget *target); bool target_is_wasm(const ZigTarget *target); +bool target_is_android(const ZigTarget *target); bool target_is_single_threaded(const ZigTarget *target); bool target_supports_stack_probing(const ZigTarget *target); bool target_has_debug_info(const ZigTarget *target); diff --git a/src/translate_c.cpp b/src/translate_c.cpp index 69c70958e5..706f4706b0 100644 --- a/src/translate_c.cpp +++ b/src/translate_c.cpp @@ -1301,6 +1301,7 @@ static AstNode *trans_type(Context *c, const ZigClangType *ty, ZigClangSourceLoc case ZigClangType_DeducedTemplateSpecialization: case ZigClangType_DependentAddressSpace: case ZigClangType_DependentVector: + case ZigClangType_MacroQualified: emit_warning(c, source_loc, "unsupported type: '%s'", ZigClangType_getTypeClassName(ty)); return nullptr; } @@ -2188,6 +2189,15 @@ static AstNode *trans_implicit_cast_expr(Context *c, ResultUsed result_used, Tra case ZigClangCK_IntToOCLSampler: emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C CK_IntToOCLSampler"); return nullptr; + case ZigClangCK_LValueToRValueBitCast: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C CK_LValueToRValueBitCast"); + return nullptr; + case ZigClangCK_FixedPointToIntegral: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C CK_FixedPointToIntegral"); + return nullptr; + case ZigClangCK_IntegralToFixedPoint: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C CK_IntegralToFixedPointral"); + return nullptr; } zig_unreachable(); } @@ -2671,6 +2681,15 @@ static int trans_local_declaration(Context *c, TransScope *scope, const clang::D case clang::Decl::TranslationUnit: emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C TranslationUnit"); return ErrorUnexpected; + case clang::Decl::Concept: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C Concept"); + return ErrorUnexpected; + case clang::Decl::OMPDeclareMapper: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C OMPDeclareMapper"); + return ErrorUnexpected; + case clang::Decl::OMPAllocate: + emit_warning(c, bitcast(stmt->getBeginLoc()), "TODO handle C OMPAllocate"); + return ErrorUnexpected; } zig_unreachable(); } @@ -2946,6 +2965,7 @@ static AstNode *trans_bool_expr(Context *c, ResultUsed result_used, TransScope * case ZigClangType_DeducedTemplateSpecialization: case ZigClangType_DependentAddressSpace: case ZigClangType_DependentVector: + case ZigClangType_MacroQualified: return res; } zig_unreachable(); @@ -4031,6 +4051,12 @@ static int trans_stmt_extra(Context *c, TransScope *scope, const ZigClangStmt *s case ZigClangStmt_SEHTryStmtClass: emit_warning(c, ZigClangStmt_getBeginLoc(stmt), "TODO handle C SEHTryStmtClass"); return ErrorUnexpected; + case ZigClangStmt_BuiltinBitCastExprClass: + emit_warning(c, ZigClangStmt_getBeginLoc(stmt), "TODO handle C BuiltinBitCastExprClass"); + return ErrorUnexpected; + case ZigClangStmt_SourceLocExprClass: + emit_warning(c, ZigClangStmt_getBeginLoc(stmt), "TODO handle C SourceLocExprClass"); + return ErrorUnexpected; } zig_unreachable(); } @@ -4448,7 +4474,7 @@ static AstNode *trans_ap_value(Context *c, const ZigClangAPValue *ap_value, ZigC switch (ZigClangAPValue_getKind(ap_value)) { case ZigClangAPValueInt: return trans_create_node_apint(c, ZigClangAPValue_getInt(ap_value)); - case ZigClangAPValueUninitialized: + case ZigClangAPValueNone: return trans_create_node(c, NodeTypeUndefinedLiteral); case ZigClangAPValueArray: { emit_warning(c, source_loc, "TODO add a test case for this code"); @@ -4539,6 +4565,12 @@ static AstNode *trans_ap_value(Context *c, const ZigClangAPValue *ap_value, ZigC case ZigClangAPValueAddrLabelDiff: emit_warning(c, source_loc, "unsupported initializer value kind: AddrLabelDiff"); return nullptr; + case ZigClangAPValueIndeterminate: + emit_warning(c, source_loc, "unsupported initializer value kind: Indeterminate"); + return nullptr; + case ZigClangAPValueFixedPoint: + emit_warning(c, source_loc, "unsupported initializer value kind: FixedPoint"); + return nullptr; } zig_unreachable(); } diff --git a/src/zig_clang.cpp b/src/zig_clang.cpp index 7c87f31125..3ace615365 100644 --- a/src/zig_clang.cpp +++ b/src/zig_clang.cpp @@ -182,6 +182,7 @@ void ZigClang_detect_enum_CK(clang::CastKind x) { case clang::CK_IntegralToFloating: case clang::CK_IntegralToPointer: case clang::CK_LValueBitCast: + case clang::CK_LValueToRValueBitCast: case clang::CK_LValueToRValue: case clang::CK_MemberPointerToBoolean: case clang::CK_NoOp: @@ -199,6 +200,8 @@ void ZigClang_detect_enum_CK(clang::CastKind x) { case clang::CK_VectorSplat: case clang::CK_ZeroToOCLOpaqueType: case clang::CK_FixedPointCast: + case clang::CK_FixedPointToIntegral: + case clang::CK_IntegralToFixedPoint: case clang::CK_FixedPointToBoolean: break; } @@ -207,6 +210,7 @@ void ZigClang_detect_enum_CK(clang::CastKind x) { static_assert((clang::CastKind)ZigClangCK_Dependent == clang::CK_Dependent, ""); static_assert((clang::CastKind)ZigClangCK_BitCast == clang::CK_BitCast, ""); static_assert((clang::CastKind)ZigClangCK_LValueBitCast == clang::CK_LValueBitCast, ""); +static_assert((clang::CastKind)ZigClangCK_LValueToRValueBitCast == clang::CK_LValueToRValueBitCast, ""); static_assert((clang::CastKind)ZigClangCK_LValueToRValue == clang::CK_LValueToRValue, ""); static_assert((clang::CastKind)ZigClangCK_NoOp == clang::CK_NoOp, ""); static_assert((clang::CastKind)ZigClangCK_BaseToDerived == clang::CK_BaseToDerived, ""); @@ -233,6 +237,8 @@ static_assert((clang::CastKind)ZigClangCK_IntegralCast == clang::CK_IntegralCast static_assert((clang::CastKind)ZigClangCK_IntegralToBoolean == clang::CK_IntegralToBoolean, ""); static_assert((clang::CastKind)ZigClangCK_IntegralToFloating == clang::CK_IntegralToFloating, ""); static_assert((clang::CastKind)ZigClangCK_FixedPointCast == clang::CK_FixedPointCast, ""); +static_assert((clang::CastKind)ZigClangCK_FixedPointToIntegral == clang::CK_FixedPointToIntegral, ""); +static_assert((clang::CastKind)ZigClangCK_IntegralToFixedPoint == clang::CK_IntegralToFixedPoint, ""); static_assert((clang::CastKind)ZigClangCK_FixedPointToBoolean == clang::CK_FixedPointToBoolean, ""); static_assert((clang::CastKind)ZigClangCK_FloatingToIntegral == clang::CK_FloatingToIntegral, ""); static_assert((clang::CastKind)ZigClangCK_FloatingToBoolean == clang::CK_FloatingToBoolean, ""); @@ -288,6 +294,7 @@ void ZigClang_detect_enum_TypeClass(clang::Type::TypeClass ty) { case clang::Type::UnresolvedUsing: case clang::Type::Paren: case clang::Type::Typedef: + case clang::Type::MacroQualified: case clang::Type::Adjusted: case clang::Type::Decayed: case clang::Type::TypeOfExpr: @@ -339,6 +346,7 @@ static_assert((clang::Type::TypeClass)ZigClangType_FunctionNoProto == clang::Typ static_assert((clang::Type::TypeClass)ZigClangType_UnresolvedUsing == clang::Type::UnresolvedUsing, ""); static_assert((clang::Type::TypeClass)ZigClangType_Paren == clang::Type::Paren, ""); static_assert((clang::Type::TypeClass)ZigClangType_Typedef == clang::Type::Typedef, ""); +static_assert((clang::Type::TypeClass)ZigClangType_MacroQualified == clang::Type::MacroQualified, ""); static_assert((clang::Type::TypeClass)ZigClangType_Adjusted == clang::Type::Adjusted, ""); static_assert((clang::Type::TypeClass)ZigClangType_Decayed == clang::Type::Decayed, ""); static_assert((clang::Type::TypeClass)ZigClangType_TypeOfExpr == clang::Type::TypeOfExpr, ""); @@ -423,6 +431,7 @@ void ZigClang_detect_enum_StmtClass(clang::Stmt::StmtClass x) { case clang::Stmt::ConditionalOperatorClass: case clang::Stmt::BinaryConditionalOperatorClass: case clang::Stmt::ImplicitCastExprClass: + case clang::Stmt::BuiltinBitCastExprClass: case clang::Stmt::CStyleCastExprClass: case clang::Stmt::CompoundLiteralExprClass: case clang::Stmt::ExtVectorElementExprClass: @@ -479,6 +488,7 @@ void ZigClang_detect_enum_StmtClass(clang::Stmt::StmtClass x) { case clang::Stmt::CXXNoexceptExprClass: case clang::Stmt::PackExpansionExprClass: case clang::Stmt::SizeOfPackExprClass: + case clang::Stmt::SourceLocExprClass: case clang::Stmt::SubstNonTypeTemplateParmExprClass: case clang::Stmt::SubstNonTypeTemplateParmPackExprClass: case clang::Stmt::FunctionParmPackExprClass: @@ -624,6 +634,7 @@ static_assert((clang::Stmt::StmtClass)ZigClangStmt_CompoundAssignOperatorClass = static_assert((clang::Stmt::StmtClass)ZigClangStmt_ConditionalOperatorClass == clang::Stmt::ConditionalOperatorClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_BinaryConditionalOperatorClass == clang::Stmt::BinaryConditionalOperatorClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_ImplicitCastExprClass == clang::Stmt::ImplicitCastExprClass, ""); +static_assert((clang::Stmt::StmtClass)ZigClangStmt_BuiltinBitCastExprClass == clang::Stmt::BuiltinBitCastExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_CStyleCastExprClass == clang::Stmt::CStyleCastExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_CompoundLiteralExprClass == clang::Stmt::CompoundLiteralExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_ExtVectorElementExprClass == clang::Stmt::ExtVectorElementExprClass, ""); @@ -680,6 +691,7 @@ static_assert((clang::Stmt::StmtClass)ZigClangStmt_UnresolvedMemberExprClass == static_assert((clang::Stmt::StmtClass)ZigClangStmt_CXXNoexceptExprClass == clang::Stmt::CXXNoexceptExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_PackExpansionExprClass == clang::Stmt::PackExpansionExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_SizeOfPackExprClass == clang::Stmt::SizeOfPackExprClass, ""); +static_assert((clang::Stmt::StmtClass)ZigClangStmt_SourceLocExprClass == clang::Stmt::SourceLocExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_SubstNonTypeTemplateParmExprClass == clang::Stmt::SubstNonTypeTemplateParmExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_SubstNonTypeTemplateParmPackExprClass == clang::Stmt::SubstNonTypeTemplateParmPackExprClass, ""); static_assert((clang::Stmt::StmtClass)ZigClangStmt_FunctionParmPackExprClass == clang::Stmt::FunctionParmPackExprClass, ""); @@ -770,9 +782,11 @@ static_assert((clang::Stmt::StmtClass)ZigClangStmt_OMPTargetTeamsDistributeSimdD void ZigClang_detect_enum_APValueKind(clang::APValue::ValueKind x) { switch (x) { - case clang::APValue::Uninitialized: + case clang::APValue::None: + case clang::APValue::Indeterminate: case clang::APValue::Int: case clang::APValue::Float: + case clang::APValue::FixedPoint: case clang::APValue::ComplexInt: case clang::APValue::ComplexFloat: case clang::APValue::LValue: @@ -786,9 +800,11 @@ void ZigClang_detect_enum_APValueKind(clang::APValue::ValueKind x) { } } -static_assert((clang::APValue::ValueKind)ZigClangAPValueUninitialized == clang::APValue::Uninitialized, ""); +static_assert((clang::APValue::ValueKind)ZigClangAPValueNone == clang::APValue::None, ""); +static_assert((clang::APValue::ValueKind)ZigClangAPValueIndeterminate == clang::APValue::Indeterminate, ""); static_assert((clang::APValue::ValueKind)ZigClangAPValueInt == clang::APValue::Int, ""); static_assert((clang::APValue::ValueKind)ZigClangAPValueFloat == clang::APValue::Float, ""); +static_assert((clang::APValue::ValueKind)ZigClangAPValueFixedPoint == clang::APValue::FixedPoint, ""); static_assert((clang::APValue::ValueKind)ZigClangAPValueComplexInt == clang::APValue::ComplexInt, ""); static_assert((clang::APValue::ValueKind)ZigClangAPValueComplexFloat == clang::APValue::ComplexFloat, ""); static_assert((clang::APValue::ValueKind)ZigClangAPValueLValue == clang::APValue::LValue, ""); @@ -826,6 +842,7 @@ void ZigClang_detect_enum_DeclKind(clang::Decl::Kind x) { case clang::Decl::ObjCMethod: case clang::Decl::ObjCProperty: case clang::Decl::BuiltinTemplate: + case clang::Decl::Concept: case clang::Decl::ClassTemplate: case clang::Decl::FunctionTemplate: case clang::Decl::TypeAliasTemplate: @@ -867,8 +884,10 @@ void ZigClang_detect_enum_DeclKind(clang::Decl::Kind x) { case clang::Decl::VarTemplatePartialSpecialization: case clang::Decl::EnumConstant: case clang::Decl::IndirectField: + case clang::Decl::OMPDeclareMapper: case clang::Decl::OMPDeclareReduction: case clang::Decl::UnresolvedUsingValue: + case clang::Decl::OMPAllocate: case clang::Decl::OMPRequires: case clang::Decl::OMPThreadPrivate: case clang::Decl::ObjCPropertyImpl: @@ -904,6 +923,7 @@ static_assert((clang::Decl::Kind)ZigClangDeclObjCProtocol == clang::Decl::ObjCPr static_assert((clang::Decl::Kind)ZigClangDeclObjCMethod == clang::Decl::ObjCMethod, ""); static_assert((clang::Decl::Kind)ZigClangDeclObjCProperty == clang::Decl::ObjCProperty, ""); static_assert((clang::Decl::Kind)ZigClangDeclBuiltinTemplate == clang::Decl::BuiltinTemplate, ""); +static_assert((clang::Decl::Kind)ZigClangDeclConcept == clang::Decl::Concept, ""); static_assert((clang::Decl::Kind)ZigClangDeclClassTemplate == clang::Decl::ClassTemplate, ""); static_assert((clang::Decl::Kind)ZigClangDeclFunctionTemplate == clang::Decl::FunctionTemplate, ""); static_assert((clang::Decl::Kind)ZigClangDeclTypeAliasTemplate == clang::Decl::TypeAliasTemplate, ""); @@ -919,6 +939,7 @@ static_assert((clang::Decl::Kind)ZigClangDeclObjCTypeParam == clang::Decl::ObjCT static_assert((clang::Decl::Kind)ZigClangDeclTypeAlias == clang::Decl::TypeAlias, ""); static_assert((clang::Decl::Kind)ZigClangDeclTypedef == clang::Decl::Typedef, ""); static_assert((clang::Decl::Kind)ZigClangDeclUnresolvedUsingTypename == clang::Decl::UnresolvedUsingTypename, ""); +static_assert((clang::Decl::Kind)ZigClangDeclOMPAllocate == clang::Decl::OMPAllocate, ""); static_assert((clang::Decl::Kind)ZigClangDeclUsing == clang::Decl::Using, ""); static_assert((clang::Decl::Kind)ZigClangDeclUsingDirective == clang::Decl::UsingDirective, ""); static_assert((clang::Decl::Kind)ZigClangDeclUsingPack == clang::Decl::UsingPack, ""); @@ -945,6 +966,7 @@ static_assert((clang::Decl::Kind)ZigClangDeclVarTemplateSpecialization == clang: static_assert((clang::Decl::Kind)ZigClangDeclVarTemplatePartialSpecialization == clang::Decl::VarTemplatePartialSpecialization, ""); static_assert((clang::Decl::Kind)ZigClangDeclEnumConstant == clang::Decl::EnumConstant, ""); static_assert((clang::Decl::Kind)ZigClangDeclIndirectField == clang::Decl::IndirectField, ""); +static_assert((clang::Decl::Kind)ZigClangDeclOMPDeclareMapper == clang::Decl::OMPDeclareMapper, ""); static_assert((clang::Decl::Kind)ZigClangDeclOMPDeclareReduction == clang::Decl::OMPDeclareReduction, ""); static_assert((clang::Decl::Kind)ZigClangDeclUnresolvedUsingValue == clang::Decl::UnresolvedUsingValue, ""); static_assert((clang::Decl::Kind)ZigClangDeclOMPRequires == clang::Decl::OMPRequires, ""); @@ -1288,19 +1310,6 @@ static_assert((clang::StringLiteral::StringKind)ZigClangStringLiteral_StringKind static_assert((clang::StringLiteral::StringKind)ZigClangStringLiteral_StringKind_UTF16 == clang::StringLiteral::UTF16, ""); static_assert((clang::StringLiteral::StringKind)ZigClangStringLiteral_StringKind_UTF32 == clang::StringLiteral::UTF32, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Uninitialized == clang::APValue::ValueKind::Uninitialized, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Int == clang::APValue::ValueKind::Int, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Float == clang::APValue::ValueKind::Float, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_ComplexInt == clang::APValue::ValueKind::ComplexInt, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_ComplexFloat == clang::APValue::ValueKind::ComplexFloat, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_LValue == clang::APValue::ValueKind::LValue, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Vector == clang::APValue::ValueKind::Vector, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Array == clang::APValue::ValueKind::Array, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Struct == clang::APValue::ValueKind::Struct, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_Union == clang::APValue::ValueKind::Union, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_MemberPointer == clang::APValue::ValueKind::MemberPointer, ""); -static_assert((clang::APValue::ValueKind)ZigClangAPValue_ValueKind_AddrLabelDiff == clang::APValue::ValueKind::AddrLabelDiff, ""); - static_assert(sizeof(ZigClangAPValue) == sizeof(clang::APValue), ""); static_assert(sizeof(ZigClangSourceLocation) == sizeof(clang::SourceLocation), ""); @@ -1744,7 +1753,6 @@ ZigClangASTUnit *ZigClangLoadFromCommandLine(const char **args_begin, const char std::shared_ptr pch_container_ops = std::make_shared(); bool only_local_decls = true; - bool capture_diagnostics = true; bool user_files_are_volatile = true; bool allow_pch_with_compiler_errors = false; bool single_file_parse = false; @@ -1753,7 +1761,7 @@ ZigClangASTUnit *ZigClangLoadFromCommandLine(const char **args_begin, const char clang::ASTUnit *ast_unit = clang::ASTUnit::LoadFromCommandLine( args_begin, args_end, pch_container_ops, diags, resources_path, - only_local_decls, capture_diagnostics, clang::None, true, 0, clang::TU_Complete, + only_local_decls, clang::CaptureDiagsKind::All, clang::None, true, 0, clang::TU_Complete, false, false, allow_pch_with_compiler_errors, clang::SkipFunctionBodiesScope::None, single_file_parse, user_files_are_volatile, for_serialization, clang::None, err_unit, nullptr); diff --git a/src/zig_clang.h b/src/zig_clang.h index 9d03b592fd..3891b0fac6 100644 --- a/src/zig_clang.h +++ b/src/zig_clang.h @@ -30,23 +30,25 @@ struct ZigClangAPValueLValueBase { unsigned Version; }; -enum ZigClangAPValue_ValueKind { - ZigClangAPValue_ValueKind_Uninitialized, - ZigClangAPValue_ValueKind_Int, - ZigClangAPValue_ValueKind_Float, - ZigClangAPValue_ValueKind_ComplexInt, - ZigClangAPValue_ValueKind_ComplexFloat, - ZigClangAPValue_ValueKind_LValue, - ZigClangAPValue_ValueKind_Vector, - ZigClangAPValue_ValueKind_Array, - ZigClangAPValue_ValueKind_Struct, - ZigClangAPValue_ValueKind_Union, - ZigClangAPValue_ValueKind_MemberPointer, - ZigClangAPValue_ValueKind_AddrLabelDiff +enum ZigClangAPValueKind { + ZigClangAPValueNone, + ZigClangAPValueIndeterminate, + ZigClangAPValueInt, + ZigClangAPValueFloat, + ZigClangAPValueFixedPoint, + ZigClangAPValueComplexInt, + ZigClangAPValueComplexFloat, + ZigClangAPValueLValue, + ZigClangAPValueVector, + ZigClangAPValueArray, + ZigClangAPValueStruct, + ZigClangAPValueUnion, + ZigClangAPValueMemberPointer, + ZigClangAPValueAddrLabelDiff, }; struct ZigClangAPValue { - enum ZigClangAPValue_ValueKind Kind; + enum ZigClangAPValueKind Kind; // experimentally-derived size of clang::APValue::DataType #if defined(WIN32) && defined(_MSC_VER) char Data[52]; @@ -214,6 +216,7 @@ enum ZigClangTypeClass { ZigClangType_UnresolvedUsing, ZigClangType_Paren, ZigClangType_Typedef, + ZigClangType_MacroQualified, ZigClangType_Adjusted, ZigClangType_Decayed, ZigClangType_TypeOfExpr, @@ -243,10 +246,9 @@ enum ZigClangTypeClass { }; enum ZigClangStmtClass { - ZigClangStmt_NoStmtClass = 0, + ZigClangStmt_NoStmtClass, ZigClangStmt_GCCAsmStmtClass, ZigClangStmt_MSAsmStmtClass, - ZigClangStmt_AttributedStmtClass, ZigClangStmt_BreakStmtClass, ZigClangStmt_CXXCatchStmtClass, ZigClangStmt_CXXForRangeStmtClass, @@ -258,6 +260,75 @@ enum ZigClangStmtClass { ZigClangStmt_CoroutineBodyStmtClass, ZigClangStmt_DeclStmtClass, ZigClangStmt_DoStmtClass, + ZigClangStmt_ForStmtClass, + ZigClangStmt_GotoStmtClass, + ZigClangStmt_IfStmtClass, + ZigClangStmt_IndirectGotoStmtClass, + ZigClangStmt_MSDependentExistsStmtClass, + ZigClangStmt_NullStmtClass, + ZigClangStmt_OMPAtomicDirectiveClass, + ZigClangStmt_OMPBarrierDirectiveClass, + ZigClangStmt_OMPCancelDirectiveClass, + ZigClangStmt_OMPCancellationPointDirectiveClass, + ZigClangStmt_OMPCriticalDirectiveClass, + ZigClangStmt_OMPFlushDirectiveClass, + ZigClangStmt_OMPDistributeDirectiveClass, + ZigClangStmt_OMPDistributeParallelForDirectiveClass, + ZigClangStmt_OMPDistributeParallelForSimdDirectiveClass, + ZigClangStmt_OMPDistributeSimdDirectiveClass, + ZigClangStmt_OMPForDirectiveClass, + ZigClangStmt_OMPForSimdDirectiveClass, + ZigClangStmt_OMPParallelForDirectiveClass, + ZigClangStmt_OMPParallelForSimdDirectiveClass, + ZigClangStmt_OMPSimdDirectiveClass, + ZigClangStmt_OMPTargetParallelForSimdDirectiveClass, + ZigClangStmt_OMPTargetSimdDirectiveClass, + ZigClangStmt_OMPTargetTeamsDistributeDirectiveClass, + ZigClangStmt_OMPTargetTeamsDistributeParallelForDirectiveClass, + ZigClangStmt_OMPTargetTeamsDistributeParallelForSimdDirectiveClass, + ZigClangStmt_OMPTargetTeamsDistributeSimdDirectiveClass, + ZigClangStmt_OMPTaskLoopDirectiveClass, + ZigClangStmt_OMPTaskLoopSimdDirectiveClass, + ZigClangStmt_OMPTeamsDistributeDirectiveClass, + ZigClangStmt_OMPTeamsDistributeParallelForDirectiveClass, + ZigClangStmt_OMPTeamsDistributeParallelForSimdDirectiveClass, + ZigClangStmt_OMPTeamsDistributeSimdDirectiveClass, + ZigClangStmt_OMPMasterDirectiveClass, + ZigClangStmt_OMPOrderedDirectiveClass, + ZigClangStmt_OMPParallelDirectiveClass, + ZigClangStmt_OMPParallelSectionsDirectiveClass, + ZigClangStmt_OMPSectionDirectiveClass, + ZigClangStmt_OMPSectionsDirectiveClass, + ZigClangStmt_OMPSingleDirectiveClass, + ZigClangStmt_OMPTargetDataDirectiveClass, + ZigClangStmt_OMPTargetDirectiveClass, + ZigClangStmt_OMPTargetEnterDataDirectiveClass, + ZigClangStmt_OMPTargetExitDataDirectiveClass, + ZigClangStmt_OMPTargetParallelDirectiveClass, + ZigClangStmt_OMPTargetParallelForDirectiveClass, + ZigClangStmt_OMPTargetTeamsDirectiveClass, + ZigClangStmt_OMPTargetUpdateDirectiveClass, + ZigClangStmt_OMPTaskDirectiveClass, + ZigClangStmt_OMPTaskgroupDirectiveClass, + ZigClangStmt_OMPTaskwaitDirectiveClass, + ZigClangStmt_OMPTaskyieldDirectiveClass, + ZigClangStmt_OMPTeamsDirectiveClass, + ZigClangStmt_ObjCAtCatchStmtClass, + ZigClangStmt_ObjCAtFinallyStmtClass, + ZigClangStmt_ObjCAtSynchronizedStmtClass, + ZigClangStmt_ObjCAtThrowStmtClass, + ZigClangStmt_ObjCAtTryStmtClass, + ZigClangStmt_ObjCAutoreleasePoolStmtClass, + ZigClangStmt_ObjCForCollectionStmtClass, + ZigClangStmt_ReturnStmtClass, + ZigClangStmt_SEHExceptStmtClass, + ZigClangStmt_SEHFinallyStmtClass, + ZigClangStmt_SEHLeaveStmtClass, + ZigClangStmt_SEHTryStmtClass, + ZigClangStmt_CaseStmtClass, + ZigClangStmt_DefaultStmtClass, + ZigClangStmt_SwitchStmtClass, + ZigClangStmt_AttributedStmtClass, ZigClangStmt_BinaryConditionalOperatorClass, ZigClangStmt_ConditionalOperatorClass, ZigClangStmt_AddrLabelExprClass, @@ -296,6 +367,7 @@ enum ZigClangStmtClass { ZigClangStmt_CXXMemberCallExprClass, ZigClangStmt_CXXOperatorCallExprClass, ZigClangStmt_UserDefinedLiteralClass, + ZigClangStmt_BuiltinBitCastExprClass, ZigClangStmt_CStyleCastExprClass, ZigClangStmt_CXXFunctionalCastExprClass, ZigClangStmt_CXXConstCastExprClass, @@ -361,6 +433,7 @@ enum ZigClangStmtClass { ZigClangStmt_PseudoObjectExprClass, ZigClangStmt_ShuffleVectorExprClass, ZigClangStmt_SizeOfPackExprClass, + ZigClangStmt_SourceLocExprClass, ZigClangStmt_StmtExprClass, ZigClangStmt_StringLiteralClass, ZigClangStmt_SubstNonTypeTemplateParmExprClass, @@ -370,75 +443,7 @@ enum ZigClangStmtClass { ZigClangStmt_UnaryExprOrTypeTraitExprClass, ZigClangStmt_UnaryOperatorClass, ZigClangStmt_VAArgExprClass, - ZigClangStmt_ForStmtClass, - ZigClangStmt_GotoStmtClass, - ZigClangStmt_IfStmtClass, - ZigClangStmt_IndirectGotoStmtClass, ZigClangStmt_LabelStmtClass, - ZigClangStmt_MSDependentExistsStmtClass, - ZigClangStmt_NullStmtClass, - ZigClangStmt_OMPAtomicDirectiveClass, - ZigClangStmt_OMPBarrierDirectiveClass, - ZigClangStmt_OMPCancelDirectiveClass, - ZigClangStmt_OMPCancellationPointDirectiveClass, - ZigClangStmt_OMPCriticalDirectiveClass, - ZigClangStmt_OMPFlushDirectiveClass, - ZigClangStmt_OMPDistributeDirectiveClass, - ZigClangStmt_OMPDistributeParallelForDirectiveClass, - ZigClangStmt_OMPDistributeParallelForSimdDirectiveClass, - ZigClangStmt_OMPDistributeSimdDirectiveClass, - ZigClangStmt_OMPForDirectiveClass, - ZigClangStmt_OMPForSimdDirectiveClass, - ZigClangStmt_OMPParallelForDirectiveClass, - ZigClangStmt_OMPParallelForSimdDirectiveClass, - ZigClangStmt_OMPSimdDirectiveClass, - ZigClangStmt_OMPTargetParallelForSimdDirectiveClass, - ZigClangStmt_OMPTargetSimdDirectiveClass, - ZigClangStmt_OMPTargetTeamsDistributeDirectiveClass, - ZigClangStmt_OMPTargetTeamsDistributeParallelForDirectiveClass, - ZigClangStmt_OMPTargetTeamsDistributeParallelForSimdDirectiveClass, - ZigClangStmt_OMPTargetTeamsDistributeSimdDirectiveClass, - ZigClangStmt_OMPTaskLoopDirectiveClass, - ZigClangStmt_OMPTaskLoopSimdDirectiveClass, - ZigClangStmt_OMPTeamsDistributeDirectiveClass, - ZigClangStmt_OMPTeamsDistributeParallelForDirectiveClass, - ZigClangStmt_OMPTeamsDistributeParallelForSimdDirectiveClass, - ZigClangStmt_OMPTeamsDistributeSimdDirectiveClass, - ZigClangStmt_OMPMasterDirectiveClass, - ZigClangStmt_OMPOrderedDirectiveClass, - ZigClangStmt_OMPParallelDirectiveClass, - ZigClangStmt_OMPParallelSectionsDirectiveClass, - ZigClangStmt_OMPSectionDirectiveClass, - ZigClangStmt_OMPSectionsDirectiveClass, - ZigClangStmt_OMPSingleDirectiveClass, - ZigClangStmt_OMPTargetDataDirectiveClass, - ZigClangStmt_OMPTargetDirectiveClass, - ZigClangStmt_OMPTargetEnterDataDirectiveClass, - ZigClangStmt_OMPTargetExitDataDirectiveClass, - ZigClangStmt_OMPTargetParallelDirectiveClass, - ZigClangStmt_OMPTargetParallelForDirectiveClass, - ZigClangStmt_OMPTargetTeamsDirectiveClass, - ZigClangStmt_OMPTargetUpdateDirectiveClass, - ZigClangStmt_OMPTaskDirectiveClass, - ZigClangStmt_OMPTaskgroupDirectiveClass, - ZigClangStmt_OMPTaskwaitDirectiveClass, - ZigClangStmt_OMPTaskyieldDirectiveClass, - ZigClangStmt_OMPTeamsDirectiveClass, - ZigClangStmt_ObjCAtCatchStmtClass, - ZigClangStmt_ObjCAtFinallyStmtClass, - ZigClangStmt_ObjCAtSynchronizedStmtClass, - ZigClangStmt_ObjCAtThrowStmtClass, - ZigClangStmt_ObjCAtTryStmtClass, - ZigClangStmt_ObjCAutoreleasePoolStmtClass, - ZigClangStmt_ObjCForCollectionStmtClass, - ZigClangStmt_ReturnStmtClass, - ZigClangStmt_SEHExceptStmtClass, - ZigClangStmt_SEHFinallyStmtClass, - ZigClangStmt_SEHLeaveStmtClass, - ZigClangStmt_SEHTryStmtClass, - ZigClangStmt_CaseStmtClass, - ZigClangStmt_DefaultStmtClass, - ZigClangStmt_SwitchStmtClass, ZigClangStmt_WhileStmtClass, }; @@ -446,6 +451,7 @@ enum ZigClangCK { ZigClangCK_Dependent, ZigClangCK_BitCast, ZigClangCK_LValueBitCast, + ZigClangCK_LValueToRValueBitCast, ZigClangCK_LValueToRValue, ZigClangCK_NoOp, ZigClangCK_BaseToDerived, @@ -472,6 +478,8 @@ enum ZigClangCK { ZigClangCK_IntegralToBoolean, ZigClangCK_IntegralToFloating, ZigClangCK_FixedPointCast, + ZigClangCK_FixedPointToIntegral, + ZigClangCK_IntegralToFixedPoint, ZigClangCK_FixedPointToBoolean, ZigClangCK_FloatingToIntegral, ZigClangCK_FloatingToBoolean, @@ -504,21 +512,6 @@ enum ZigClangCK { ZigClangCK_IntToOCLSampler, }; -enum ZigClangAPValueKind { - ZigClangAPValueUninitialized, - ZigClangAPValueInt, - ZigClangAPValueFloat, - ZigClangAPValueComplexInt, - ZigClangAPValueComplexFloat, - ZigClangAPValueLValue, - ZigClangAPValueVector, - ZigClangAPValueArray, - ZigClangAPValueStruct, - ZigClangAPValueUnion, - ZigClangAPValueMemberPointer, - ZigClangAPValueAddrLabelDiff, -}; - enum ZigClangDeclKind { ZigClangDeclAccessSpec, ZigClangDeclBlock, @@ -544,6 +537,7 @@ enum ZigClangDeclKind { ZigClangDeclObjCMethod, ZigClangDeclObjCProperty, ZigClangDeclBuiltinTemplate, + ZigClangDeclConcept, ZigClangDeclClassTemplate, ZigClangDeclFunctionTemplate, ZigClangDeclTypeAliasTemplate, @@ -585,8 +579,10 @@ enum ZigClangDeclKind { ZigClangDeclVarTemplatePartialSpecialization, ZigClangDeclEnumConstant, ZigClangDeclIndirectField, + ZigClangDeclOMPDeclareMapper, ZigClangDeclOMPDeclareReduction, ZigClangDeclUnresolvedUsingValue, + ZigClangDeclOMPAllocate, ZigClangDeclOMPRequires, ZigClangDeclOMPThreadPrivate, ZigClangDeclObjCPropertyImpl, From aaa4bf75d386058d6aca8c4db7244c51c27ce386 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Wed, 17 Jul 2019 18:11:03 -0400 Subject: [PATCH 03/31] msvc libc: provide _fltused symbol --- std/special/c.zig | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/std/special/c.zig b/std/special/c.zig index 7300b8bf74..150127196b 100644 --- a/std/special/c.zig +++ b/std/special/c.zig @@ -12,6 +12,10 @@ const is_wasm = switch (builtin.arch) { .wasm32, .wasm64 => true, else => false, }; +const is_msvc = switch (builtin.abi) { + .msvc => true, + else => false, +}; const is_freestanding = switch (builtin.os) { .freestanding => true, else => false, @@ -25,9 +29,13 @@ comptime { @export("strncmp", strncmp, .Strong); @export("strerror", strerror, .Strong); @export("strlen", strlen, .Strong); + } else if (is_msvc) { + @export("_fltused", _fltused, .Strong); } } +extern var _fltused: c_int = 1; + extern fn main(argc: c_int, argv: [*][*]u8) c_int; extern fn wasm_start() void { _ = main(0, undefined); From 04ce5376a8ba030249765779d5dcda0770445979 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 11:43:39 -0400 Subject: [PATCH 04/31] carry some upstream patches to musl to fix riscv inline asm Upstream commits: * 8eb49e0485fc547eead9e47200bbee6d81f391c1 * 2dcbeabd917e404a0dde0195388da401b849b9a4 * f0eb2e77b2132a88e2f00d8e06ffa7638c40b4bc These will be in the next version of musl, so no harm carrying them here. --- lib/libc/musl/arch/riscv64/atomic_arch.h | 20 ++++++++++++-------- lib/libc/musl/arch/riscv64/syscall_arch.h | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/lib/libc/musl/arch/riscv64/atomic_arch.h b/lib/libc/musl/arch/riscv64/atomic_arch.h index 98f12fc7cb..c976534284 100644 --- a/lib/libc/musl/arch/riscv64/atomic_arch.h +++ b/lib/libc/musl/arch/riscv64/atomic_arch.h @@ -8,13 +8,15 @@ static inline void a_barrier() static inline int a_cas(volatile int *p, int t, int s) { int old, tmp; - __asm__("\n1: lr.w.aqrl %0, %2\n" + __asm__ __volatile__ ( + "\n1: lr.w.aqrl %0, (%2)\n" " bne %0, %3, 1f\n" - " sc.w.aqrl %1, %4, %2\n" + " sc.w.aqrl %1, %4, (%2)\n" " bnez %1, 1b\n" "1:" - : "=&r"(old), "+r"(tmp), "+A"(*p) - : "r"(t), "r"(s)); + : "=&r"(old), "=r"(tmp) + : "r"(p), "r"(t), "r"(s) + : "memory"); return old; } @@ -23,12 +25,14 @@ static inline void *a_cas_p(volatile void *p, void *t, void *s) { void *old; int tmp; - __asm__("\n1: lr.d.aqrl %0, %2\n" + __asm__ __volatile__ ( + "\n1: lr.d.aqrl %0, (%2)\n" " bne %0, %3, 1f\n" - " sc.d.aqrl %1, %4, %2\n" + " sc.d.aqrl %1, %4, (%2)\n" " bnez %1, 1b\n" "1:" - : "=&r"(old), "+r"(tmp), "+A"(*(long *)p) - : "r"(t), "r"(s)); + : "=&r"(old), "=r"(tmp) + : "r"(p), "r"(t), "r"(s) + : "memory"); return old; } diff --git a/lib/libc/musl/arch/riscv64/syscall_arch.h b/lib/libc/musl/arch/riscv64/syscall_arch.h index 1aaeb63138..3e0804efe3 100644 --- a/lib/libc/musl/arch/riscv64/syscall_arch.h +++ b/lib/libc/musl/arch/riscv64/syscall_arch.h @@ -3,7 +3,7 @@ #define __asm_syscall(...) \ __asm__ __volatile__ ("ecall\n\t" \ - : "+r"(a0) : __VA_ARGS__ : "memory"); \ + : "=r"(a0) : __VA_ARGS__ : "memory"); \ return a0; \ static inline long __syscall0(long n) From d4ca337e6b8d0c268ebce5c4b5fbe9015f3d8ab6 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 11:45:37 -0400 Subject: [PATCH 05/31] improvements to riscv support --- src/codegen.cpp | 18 +++++++++++++++++- src/link.cpp | 6 ++++-- src/target.cpp | 19 ++++++++++++++++++- src/target.hpp | 2 ++ 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 188c5ccc8d..7136429615 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8390,13 +8390,13 @@ void add_cc_args(CodeGen *g, ZigList &args, const char *out_dep_pa } } + // According to Rich Felker libc headers are supposed to go before C language headers. for (size_t i = 0; i < g->libc_include_dir_len; i += 1) { Buf *include_dir = g->libc_include_dir_list[i]; args.append("-isystem"); args.append(buf_ptr(include_dir)); } - // According to Rich Felker libc headers are supposed to go before C language headers. args.append("-isystem"); args.append(buf_ptr(g->zig_c_headers_dir)); @@ -8405,6 +8405,21 @@ void add_cc_args(CodeGen *g, ZigList &args, const char *out_dep_pa } else { args.append("-target"); args.append(buf_ptr(&g->llvm_triple_str)); + + if (target_is_musl(g->zig_target) && target_is_riscv(g->zig_target)) { + // Musl depends on atomic instructions, which are disabled by default in Clang/LLVM's + // cross compilation CPU info for RISCV. + switch (g->zig_target->arch) { + case ZigLLVM_riscv32: + args.append("-march=rv32ia"); + break; + case ZigLLVM_riscv64: + args.append("-march=rv64ia"); + break; + default: + zig_unreachable(); + } + } } if (g->zig_target->os == OsFreestanding) { args.append("-ffreestanding"); @@ -8474,6 +8489,7 @@ void add_cc_args(CodeGen *g, ZigList &args, const char *out_dep_pa for (size_t arg_i = 0; arg_i < g->clang_argv_len; arg_i += 1) { args.append(g->clang_argv[arg_i]); } + } void codegen_translate_c(CodeGen *g, Buf *full_path, FILE *out_file, bool use_userland_implementation) { diff --git a/src/link.cpp b/src/link.cpp index 88267f6cac..a7cb9ad26f 100644 --- a/src/link.cpp +++ b/src/link.cpp @@ -1658,7 +1658,9 @@ static void construct_linker_job_elf(LinkJob *lj) { crt1o = "Scrt1.o"; } lj->args.append(get_libc_crt_file(g, crt1o)); - lj->args.append(get_libc_crt_file(g, "crti.o")); + if (target_libc_needs_crti_crtn(g->zig_target)) { + lj->args.append(get_libc_crt_file(g, "crti.o")); + } } for (size_t i = 0; i < g->rpath_list.length; i += 1) { @@ -1791,7 +1793,7 @@ static void construct_linker_job_elf(LinkJob *lj) { } // crt end - if (lj->link_in_crt) { + if (lj->link_in_crt && target_libc_needs_crti_crtn(g->zig_target)) { lj->args.append(get_libc_crt_file(g, "crtn.o")); } diff --git a/src/target.cpp b/src/target.cpp index fd5deeea29..05eb3c662f 100644 --- a/src/target.cpp +++ b/src/target.cpp @@ -1739,8 +1739,25 @@ const char *target_arch_musl_name(ZigLLVM_ArchType arch) { } bool target_supports_libunwind(const ZigTarget *target) { - if (target->arch == ZigLLVM_arm || target->arch == ZigLLVM_armeb) { + switch (target->arch) { + case ZigLLVM_arm: + case ZigLLVM_armeb: + case ZigLLVM_riscv32: + case ZigLLVM_riscv64: + return false; + default: + return true; + } + return true; +} + +bool target_libc_needs_crti_crtn(const ZigTarget *target) { + if (target->arch == ZigLLVM_riscv32 || target->arch == ZigLLVM_riscv64) { return false; } return true; } + +bool target_is_riscv(const ZigTarget *target) { + return target->arch == ZigLLVM_riscv32 || target->arch == ZigLLVM_riscv64; +} diff --git a/src/target.hpp b/src/target.hpp index 85768347a5..a8c0c890eb 100644 --- a/src/target.hpp +++ b/src/target.hpp @@ -186,6 +186,7 @@ bool target_abi_is_musl(ZigLLVM_EnvironmentType abi); bool target_is_glibc(const ZigTarget *target); bool target_is_musl(const ZigTarget *target); bool target_is_wasm(const ZigTarget *target); +bool target_is_riscv(const ZigTarget *target); bool target_is_android(const ZigTarget *target); bool target_is_single_threaded(const ZigTarget *target); bool target_supports_stack_probing(const ZigTarget *target); @@ -197,5 +198,6 @@ uint32_t target_arch_pointer_bit_width(ZigLLVM_ArchType arch); size_t target_libc_count(void); void target_libc_enum(size_t index, ZigTarget *out_target); +bool target_libc_needs_crti_crtn(const ZigTarget *target); #endif From afbf99c84671992acb4129a6a7bc70d604c4d00d Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 12:28:24 -0400 Subject: [PATCH 06/31] riscv musl: only add the +a feature --- src/codegen.cpp | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 7136429615..2c5f2c96e5 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8409,16 +8409,11 @@ void add_cc_args(CodeGen *g, ZigList &args, const char *out_dep_pa if (target_is_musl(g->zig_target) && target_is_riscv(g->zig_target)) { // Musl depends on atomic instructions, which are disabled by default in Clang/LLVM's // cross compilation CPU info for RISCV. - switch (g->zig_target->arch) { - case ZigLLVM_riscv32: - args.append("-march=rv32ia"); - break; - case ZigLLVM_riscv64: - args.append("-march=rv64ia"); - break; - default: - zig_unreachable(); - } + // TODO: https://github.com/ziglang/zig/issues/2883 + args.append("-Xclang"); + args.append("-target-feature"); + args.append("-Xclang"); + args.append("+a"); } } if (g->zig_target->os == OsFreestanding) { From 07f0de6a8a9a152cfbd76e458d99d992b59e9e06 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 13:54:22 -0400 Subject: [PATCH 07/31] compiler-rt: add __muldi3 --- build.zig | 9 +++-- std/special/compiler_rt.zig | 3 ++ std/special/compiler_rt/divti3.zig | 1 - std/special/compiler_rt/muldi3.zig | 54 +++++++++++++++++++++++++ std/special/compiler_rt/muldi3_test.zig | 51 +++++++++++++++++++++++ test/tests.zig | 3 +- 6 files changed, 116 insertions(+), 5 deletions(-) create mode 100644 std/special/compiler_rt/muldi3.zig create mode 100644 std/special/compiler_rt/muldi3_test.zig diff --git a/build.zig b/build.zig index 07041d026f..011f742d82 100644 --- a/build.zig +++ b/build.zig @@ -122,16 +122,19 @@ pub fn build(b: *Builder) !void { } const modes = chosen_modes[0..chosen_mode_index]; + const multi_and_single = [_]bool{ false, true }; + const just_multi = [_]bool{false}; + // run stage1 `zig fmt` on this build.zig file just to make sure it works test_step.dependOn(&fmt_build_zig.step); const fmt_step = b.step("test-fmt", "Run zig fmt against build.zig to make sure it works"); fmt_step.dependOn(&fmt_build_zig.step); - test_step.dependOn(tests.addPkgTests(b, test_filter, "test/stage1/behavior.zig", "behavior", "Run the behavior tests", modes, skip_non_native)); + test_step.dependOn(tests.addPkgTests(b, test_filter, "test/stage1/behavior.zig", "behavior", "Run the behavior tests", modes, multi_and_single, skip_non_native)); - test_step.dependOn(tests.addPkgTests(b, test_filter, "std/std.zig", "std", "Run the standard library tests", modes, skip_non_native)); + test_step.dependOn(tests.addPkgTests(b, test_filter, "std/std.zig", "std", "Run the standard library tests", modes, multi_and_single, skip_non_native)); - test_step.dependOn(tests.addPkgTests(b, test_filter, "std/special/compiler_rt.zig", "compiler-rt", "Run the compiler_rt tests", modes, skip_non_native)); + test_step.dependOn(tests.addPkgTests(b, test_filter, "std/special/compiler_rt.zig", "compiler-rt", "Run the compiler_rt tests", modes, just_multi, skip_non_native)); test_step.dependOn(tests.addCompareOutputTests(b, test_filter, modes)); test_step.dependOn(tests.addStandaloneTests(b, test_filter, modes)); diff --git a/std/special/compiler_rt.zig b/std/special/compiler_rt.zig index 5064e9db29..a5bbefa1db 100644 --- a/std/special/compiler_rt.zig +++ b/std/special/compiler_rt.zig @@ -127,6 +127,7 @@ comptime { @export("__udivmoddi4", @import("compiler_rt/udivmoddi4.zig").__udivmoddi4, linkage); @export("__popcountdi2", @import("compiler_rt/popcountdi2.zig").__popcountdi2, linkage); + @export("__muldi3", @import("compiler_rt/muldi3.zig").__muldi3, linkage); @export("__divmoddi4", __divmoddi4, linkage); @export("__divsi3", __divsi3, linkage); @export("__divdi3", __divdi3, linkage); @@ -147,6 +148,8 @@ comptime { @export("__aeabi_unwind_cpp_pr1", __aeabi_unwind_cpp_pr1, linkage); @export("__aeabi_unwind_cpp_pr2", __aeabi_unwind_cpp_pr2, linkage); + @export("__aeabi_lmul", @import("compiler_rt/muldi3.zig").__muldi3, linkage); + @export("__aeabi_ldivmod", __aeabi_ldivmod, linkage); @export("__aeabi_uldivmod", __aeabi_uldivmod, linkage); diff --git a/std/special/compiler_rt/divti3.zig b/std/special/compiler_rt/divti3.zig index d5b2778a34..477ce2cb98 100644 --- a/std/special/compiler_rt/divti3.zig +++ b/std/special/compiler_rt/divti3.zig @@ -1,6 +1,5 @@ const udivmod = @import("udivmod.zig").udivmod; const builtin = @import("builtin"); -const compiler_rt = @import("../compiler_rt.zig"); pub extern fn __divti3(a: i128, b: i128) i128 { @setRuntimeSafety(builtin.is_test); diff --git a/std/special/compiler_rt/muldi3.zig b/std/special/compiler_rt/muldi3.zig new file mode 100644 index 0000000000..7a955120f5 --- /dev/null +++ b/std/special/compiler_rt/muldi3.zig @@ -0,0 +1,54 @@ +const builtin = @import("builtin"); + +// Ported from +// https://github.com/llvm/llvm-project/blob/552c2c09d354a3ad9c1c9647e0a3bb5099c31088/compiler-rt/lib/builtins/muldi3.c + +const dwords = extern union { + all: i64, + s: switch (builtin.endian) { + .Little => extern struct { + low: u32, + high: u32, + }, + .Big => extern struct { + high: u32, + low: u32, + }, + }, +}; + +fn __muldsi3(a: u32, b: u32) i64 { + @setRuntimeSafety(builtin.is_test); + + const bits_in_word_2 = @sizeOf(i32) * 8 / 2; + const lower_mask = (~u32(0)) >> bits_in_word_2; + + var r: dwords = undefined; + r.s.low = (a & lower_mask) *% (b & lower_mask); + var t: u32 = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t += (a >> bits_in_word_2) *% (b & lower_mask); + r.s.low +%= (t & lower_mask) << bits_in_word_2; + r.s.high = t >> bits_in_word_2; + t = r.s.low >> bits_in_word_2; + r.s.low &= lower_mask; + t +%= (b >> bits_in_word_2) *% (a & lower_mask); + r.s.low +%= (t & lower_mask) << bits_in_word_2; + r.s.high +%= t >> bits_in_word_2; + r.s.high +%= (a >> bits_in_word_2) *% (b >> bits_in_word_2); + return r.all; +} + +pub extern fn __muldi3(a: i64, b: i64) i64 { + @setRuntimeSafety(builtin.is_test); + + const x = dwords{ .all = a }; + const y = dwords{ .all = b }; + var r = dwords{ .all = __muldsi3(x.s.low, y.s.low) }; + r.s.high +%= x.s.high *% y.s.low +% x.s.low *% y.s.high; + return r.all; +} + +test "import muldi3" { + _ = @import("muldi3_test.zig"); +} diff --git a/std/special/compiler_rt/muldi3_test.zig b/std/special/compiler_rt/muldi3_test.zig new file mode 100644 index 0000000000..db4daf1e1e --- /dev/null +++ b/std/special/compiler_rt/muldi3_test.zig @@ -0,0 +1,51 @@ +const __muldi3 = @import("muldi3.zig").__muldi3; +const testing = @import("std").testing; + +fn test__muldi3(a: i64, b: i64, expected: i64) void { + const x = __muldi3(a, b); + testing.expect(x == expected); +} + +test "muldi3" { + test__muldi3(0, 0, 0); + test__muldi3(0, 1, 0); + test__muldi3(1, 0, 0); + test__muldi3(0, 10, 0); + test__muldi3(10, 0, 0); + test__muldi3(0, 81985529216486895, 0); + test__muldi3(81985529216486895, 0, 0); + + test__muldi3(0, -1, 0); + test__muldi3(-1, 0, 0); + test__muldi3(0, -10, 0); + test__muldi3(-10, 0, 0); + test__muldi3(0, -81985529216486895, 0); + test__muldi3(-81985529216486895, 0, 0); + + test__muldi3(1, 1, 1); + test__muldi3(1, 10, 10); + test__muldi3(10, 1, 10); + test__muldi3(1, 81985529216486895, 81985529216486895); + test__muldi3(81985529216486895, 1, 81985529216486895); + + test__muldi3(1, -1, -1); + test__muldi3(1, -10, -10); + test__muldi3(-10, 1, -10); + test__muldi3(1, -81985529216486895, -81985529216486895); + test__muldi3(-81985529216486895, 1, -81985529216486895); + + test__muldi3(3037000499, 3037000499, 9223372030926249001); + test__muldi3(-3037000499, 3037000499, -9223372030926249001); + test__muldi3(3037000499, -3037000499, -9223372030926249001); + test__muldi3(-3037000499, -3037000499, 9223372030926249001); + + test__muldi3(4398046511103, 2097152, 9223372036852678656); + test__muldi3(-4398046511103, 2097152, -9223372036852678656); + test__muldi3(4398046511103, -2097152, -9223372036852678656); + test__muldi3(-4398046511103, -2097152, 9223372036852678656); + + test__muldi3(2097152, 4398046511103, 9223372036852678656); + test__muldi3(-2097152, 4398046511103, -9223372036852678656); + test__muldi3(2097152, -4398046511103, -9223372036852678656); + test__muldi3(-2097152, -4398046511103, 9223372036852678656); +} diff --git a/test/tests.zig b/test/tests.zig index d94c012ac1..c8aea33591 100644 --- a/test/tests.zig +++ b/test/tests.zig @@ -170,6 +170,7 @@ pub fn addPkgTests( name: []const u8, desc: []const u8, modes: []const Mode, + single_threaded_list: []const bool, skip_non_native: bool, ) *build.Step { const step = b.step(b.fmt("test-{}", name), desc); @@ -179,7 +180,7 @@ pub fn addPkgTests( continue; for (modes) |mode| { for ([_]bool{ false, true }) |link_libc| { - for ([_]bool{ false, true }) |single_threaded| { + for (single_threaded_list) |single_threaded| { if (link_libc and !is_native) { // don't assume we have a cross-compiling libc set up continue; From a3854d042e0d05a87ae9f0b71d12474c8d0e57ec Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 15:03:21 -0400 Subject: [PATCH 08/31] basic riscv support llvm is giving me `error: couldn't allocate output register for constraint '{a0}'` which is a bug that needs to be fixed upstream. --- src/target.cpp | 4 +- std/os/bits/linux.zig | 1 + std/os/bits/linux/riscv64.zig | 279 ++++++++++++++++++++++++++++++++++ std/os/linux.zig | 1 + std/os/linux/arm64.zig | 7 + std/os/linux/riscv64.zig | 78 ++++++++++ std/os/linux/tls.zig | 9 +- std/os/linux/x86_64.zig | 14 +- std/special/start.zig | 5 + 9 files changed, 388 insertions(+), 10 deletions(-) create mode 100644 std/os/bits/linux/riscv64.zig create mode 100644 std/os/linux/riscv64.zig diff --git a/src/target.cpp b/src/target.cpp index 05eb3c662f..5a418aa547 100644 --- a/src/target.cpp +++ b/src/target.cpp @@ -1320,6 +1320,8 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_aarch64: case ZigLLVM_aarch64_be: case ZigLLVM_aarch64_32: + case ZigLLVM_riscv32: + case ZigLLVM_riscv64: return "sp"; case ZigLLVM_arm: @@ -1350,8 +1352,6 @@ const char *arch_stack_pointer_register_name(ZigLLVM_ArchType arch) { case ZigLLVM_r600: case ZigLLVM_renderscript32: case ZigLLVM_renderscript64: - case ZigLLVM_riscv32: - case ZigLLVM_riscv64: case ZigLLVM_shave: case ZigLLVM_sparc: case ZigLLVM_sparcel: diff --git a/std/os/bits/linux.zig b/std/os/bits/linux.zig index 8430a05bf0..d7c23ffadb 100644 --- a/std/os/bits/linux.zig +++ b/std/os/bits/linux.zig @@ -7,6 +7,7 @@ pub usingnamespace @import("linux/errno.zig"); pub usingnamespace switch (builtin.arch) { .x86_64 => @import("linux/x86_64.zig"), .aarch64 => @import("linux/arm64.zig"), + .riscv64 => @import("linux/riscv64.zig"), else => struct {}, }; diff --git a/std/os/bits/linux/riscv64.zig b/std/os/bits/linux/riscv64.zig new file mode 100644 index 0000000000..cb3124792d --- /dev/null +++ b/std/os/bits/linux/riscv64.zig @@ -0,0 +1,279 @@ +// riscv64-specific declarations that are intended to be imported into the POSIX namespace. + +pub const SYS_io_setup = 0; +pub const SYS_io_destroy = 1; +pub const SYS_io_submit = 2; +pub const SYS_io_cancel = 3; +pub const SYS_io_getevents = 4; +pub const SYS_setxattr = 5; +pub const SYS_lsetxattr = 6; +pub const SYS_fsetxattr = 7; +pub const SYS_getxattr = 8; +pub const SYS_lgetxattr = 9; +pub const SYS_fgetxattr = 10; +pub const SYS_listxattr = 11; +pub const SYS_llistxattr = 12; +pub const SYS_flistxattr = 13; +pub const SYS_removexattr = 14; +pub const SYS_lremovexattr = 15; +pub const SYS_fremovexattr = 16; +pub const SYS_getcwd = 17; +pub const SYS_lookup_dcookie = 18; +pub const SYS_eventfd2 = 19; +pub const SYS_epoll_create1 = 20; +pub const SYS_epoll_ctl = 21; +pub const SYS_epoll_pwait = 22; +pub const SYS_dup = 23; +pub const SYS_dup3 = 24; +pub const SYS_fcntl = 25; +pub const SYS_inotify_init1 = 26; +pub const SYS_inotify_add_watch = 27; +pub const SYS_inotify_rm_watch = 28; +pub const SYS_ioctl = 29; +pub const SYS_ioprio_set = 30; +pub const SYS_ioprio_get = 31; +pub const SYS_flock = 32; +pub const SYS_mknodat = 33; +pub const SYS_mkdirat = 34; +pub const SYS_unlinkat = 35; +pub const SYS_symlinkat = 36; +pub const SYS_linkat = 37; +pub const SYS_umount2 = 39; +pub const SYS_mount = 40; +pub const SYS_pivot_root = 41; +pub const SYS_nfsservctl = 42; +pub const SYS_statfs = 43; +pub const SYS_fstatfs = 44; +pub const SYS_truncate = 45; +pub const SYS_ftruncate = 46; +pub const SYS_fallocate = 47; +pub const SYS_faccessat = 48; +pub const SYS_chdir = 49; +pub const SYS_fchdir = 50; +pub const SYS_chroot = 51; +pub const SYS_fchmod = 52; +pub const SYS_fchmodat = 53; +pub const SYS_fchownat = 54; +pub const SYS_fchown = 55; +pub const SYS_openat = 56; +pub const SYS_close = 57; +pub const SYS_vhangup = 58; +pub const SYS_pipe2 = 59; +pub const SYS_quotactl = 60; +pub const SYS_getdents64 = 61; +pub const SYS_lseek = 62; +pub const SYS_read = 63; +pub const SYS_write = 64; +pub const SYS_readv = 65; +pub const SYS_writev = 66; +pub const SYS_pread64 = 67; +pub const SYS_pwrite64 = 68; +pub const SYS_preadv = 69; +pub const SYS_pwritev = 70; +pub const SYS_sendfile = 71; +pub const SYS_pselect6 = 72; +pub const SYS_ppoll = 73; +pub const SYS_signalfd4 = 74; +pub const SYS_vmsplice = 75; +pub const SYS_splice = 76; +pub const SYS_tee = 77; +pub const SYS_readlinkat = 78; +pub const SYS_fstatat = 79; +pub const SYS_fstat = 80; +pub const SYS_sync = 81; +pub const SYS_fsync = 82; +pub const SYS_fdatasync = 83; +pub const SYS_sync_file_range = 84; +pub const SYS_timerfd_create = 85; +pub const SYS_timerfd_settime = 86; +pub const SYS_timerfd_gettime = 87; +pub const SYS_utimensat = 88; +pub const SYS_acct = 89; +pub const SYS_capget = 90; +pub const SYS_capset = 91; +pub const SYS_personality = 92; +pub const SYS_exit = 93; +pub const SYS_exit_group = 94; +pub const SYS_waitid = 95; +pub const SYS_set_tid_address = 96; +pub const SYS_unshare = 97; +pub const SYS_futex = 98; +pub const SYS_set_robust_list = 99; +pub const SYS_get_robust_list = 100; +pub const SYS_nanosleep = 101; +pub const SYS_getitimer = 102; +pub const SYS_setitimer = 103; +pub const SYS_kexec_load = 104; +pub const SYS_init_module = 105; +pub const SYS_delete_module = 106; +pub const SYS_timer_create = 107; +pub const SYS_timer_gettime = 108; +pub const SYS_timer_getoverrun = 109; +pub const SYS_timer_settime = 110; +pub const SYS_timer_delete = 111; +pub const SYS_clock_settime = 112; +pub const SYS_clock_gettime = 113; +pub const SYS_clock_getres = 114; +pub const SYS_clock_nanosleep = 115; +pub const SYS_syslog = 116; +pub const SYS_ptrace = 117; +pub const SYS_sched_setparam = 118; +pub const SYS_sched_setscheduler = 119; +pub const SYS_sched_getscheduler = 120; +pub const SYS_sched_getparam = 121; +pub const SYS_sched_setaffinity = 122; +pub const SYS_sched_getaffinity = 123; +pub const SYS_sched_yield = 124; +pub const SYS_sched_get_priority_max = 125; +pub const SYS_sched_get_priority_min = 126; +pub const SYS_sched_rr_get_interval = 127; +pub const SYS_restart_syscall = 128; +pub const SYS_kill = 129; +pub const SYS_tkill = 130; +pub const SYS_tgkill = 131; +pub const SYS_sigaltstack = 132; +pub const SYS_rt_sigsuspend = 133; +pub const SYS_rt_sigaction = 134; +pub const SYS_rt_sigprocmask = 135; +pub const SYS_rt_sigpending = 136; +pub const SYS_rt_sigtimedwait = 137; +pub const SYS_rt_sigqueueinfo = 138; +pub const SYS_rt_sigreturn = 139; +pub const SYS_setpriority = 140; +pub const SYS_getpriority = 141; +pub const SYS_reboot = 142; +pub const SYS_setregid = 143; +pub const SYS_setgid = 144; +pub const SYS_setreuid = 145; +pub const SYS_setuid = 146; +pub const SYS_setresuid = 147; +pub const SYS_getresuid = 148; +pub const SYS_setresgid = 149; +pub const SYS_getresgid = 150; +pub const SYS_setfsuid = 151; +pub const SYS_setfsgid = 152; +pub const SYS_times = 153; +pub const SYS_setpgid = 154; +pub const SYS_getpgid = 155; +pub const SYS_getsid = 156; +pub const SYS_setsid = 157; +pub const SYS_getgroups = 158; +pub const SYS_setgroups = 159; +pub const SYS_uname = 160; +pub const SYS_sethostname = 161; +pub const SYS_setdomainname = 162; +pub const SYS_getrlimit = 163; +pub const SYS_setrlimit = 164; +pub const SYS_getrusage = 165; +pub const SYS_umask = 166; +pub const SYS_prctl = 167; +pub const SYS_getcpu = 168; +pub const SYS_gettimeofday = 169; +pub const SYS_settimeofday = 170; +pub const SYS_adjtimex = 171; +pub const SYS_getpid = 172; +pub const SYS_getppid = 173; +pub const SYS_getuid = 174; +pub const SYS_geteuid = 175; +pub const SYS_getgid = 176; +pub const SYS_getegid = 177; +pub const SYS_gettid = 178; +pub const SYS_sysinfo = 179; +pub const SYS_mq_open = 180; +pub const SYS_mq_unlink = 181; +pub const SYS_mq_timedsend = 182; +pub const SYS_mq_timedreceive = 183; +pub const SYS_mq_notify = 184; +pub const SYS_mq_getsetattr = 185; +pub const SYS_msgget = 186; +pub const SYS_msgctl = 187; +pub const SYS_msgrcv = 188; +pub const SYS_msgsnd = 189; +pub const SYS_semget = 190; +pub const SYS_semctl = 191; +pub const SYS_semtimedop = 192; +pub const SYS_semop = 193; +pub const SYS_shmget = 194; +pub const SYS_shmctl = 195; +pub const SYS_shmat = 196; +pub const SYS_shmdt = 197; +pub const SYS_socket = 198; +pub const SYS_socketpair = 199; +pub const SYS_bind = 200; +pub const SYS_listen = 201; +pub const SYS_accept = 202; +pub const SYS_connect = 203; +pub const SYS_getsockname = 204; +pub const SYS_getpeername = 205; +pub const SYS_sendto = 206; +pub const SYS_recvfrom = 207; +pub const SYS_setsockopt = 208; +pub const SYS_getsockopt = 209; +pub const SYS_shutdown = 210; +pub const SYS_sendmsg = 211; +pub const SYS_recvmsg = 212; +pub const SYS_readahead = 213; +pub const SYS_brk = 214; +pub const SYS_munmap = 215; +pub const SYS_mremap = 216; +pub const SYS_add_key = 217; +pub const SYS_request_key = 218; +pub const SYS_keyctl = 219; +pub const SYS_clone = 220; +pub const SYS_execve = 221; +pub const SYS_mmap = 222; +pub const SYS_fadvise64 = 223; +pub const SYS_swapon = 224; +pub const SYS_swapoff = 225; +pub const SYS_mprotect = 226; +pub const SYS_msync = 227; +pub const SYS_mlock = 228; +pub const SYS_munlock = 229; +pub const SYS_mlockall = 230; +pub const SYS_munlockall = 231; +pub const SYS_mincore = 232; +pub const SYS_madvise = 233; +pub const SYS_remap_file_pages = 234; +pub const SYS_mbind = 235; +pub const SYS_get_mempolicy = 236; +pub const SYS_set_mempolicy = 237; +pub const SYS_migrate_pages = 238; +pub const SYS_move_pages = 239; +pub const SYS_rt_tgsigqueueinfo = 240; +pub const SYS_perf_event_open = 241; +pub const SYS_accept4 = 242; +pub const SYS_recvmmsg = 243; +pub const SYS_arch_specific_syscall = 244; +pub const SYS_wait4 = 260; +pub const SYS_prlimit64 = 261; +pub const SYS_fanotify_init = 262; +pub const SYS_fanotify_mark = 263; +pub const SYS_name_to_handle_at = 264; +pub const SYS_open_by_handle_at = 265; +pub const SYS_clock_adjtime = 266; +pub const SYS_syncfs = 267; +pub const SYS_setns = 268; +pub const SYS_sendmmsg = 269; +pub const SYS_process_vm_readv = 270; +pub const SYS_process_vm_writev = 271; +pub const SYS_kcmp = 272; +pub const SYS_finit_module = 273; +pub const SYS_sched_setattr = 274; +pub const SYS_sched_getattr = 275; +pub const SYS_renameat2 = 276; +pub const SYS_seccomp = 277; +pub const SYS_getrandom = 278; +pub const SYS_memfd_create = 279; +pub const SYS_bpf = 280; +pub const SYS_execveat = 281; +pub const SYS_userfaultfd = 282; +pub const SYS_membarrier = 283; +pub const SYS_mlock2 = 284; +pub const SYS_copy_file_range = 285; +pub const SYS_preadv2 = 286; +pub const SYS_pwritev2 = 287; +pub const SYS_pkey_mprotect = 288; +pub const SYS_pkey_alloc = 289; +pub const SYS_pkey_free = 290; +pub const SYS_sysriscv = 244; +pub const SYS_riscv_flush_icache = SYS_sysriscv + 15; diff --git a/std/os/linux.zig b/std/os/linux.zig index 053f30d265..e217430e27 100644 --- a/std/os/linux.zig +++ b/std/os/linux.zig @@ -17,6 +17,7 @@ pub const is_the_target = builtin.os == .linux; pub usingnamespace switch (builtin.arch) { .x86_64 => @import("linux/x86_64.zig"), .aarch64 => @import("linux/arm64.zig"), + .riscv64 => @import("linux/riscv64.zig"), else => struct {}, }; pub usingnamespace @import("bits.zig"); diff --git a/std/os/linux/arm64.zig b/std/os/linux/arm64.zig index 94cf8818de..f50374a503 100644 --- a/std/os/linux/arm64.zig +++ b/std/os/linux/arm64.zig @@ -2,6 +2,7 @@ pub fn syscall0(number: usize) usize { return asm volatile ("svc #0" : [ret] "={x0}" (-> usize) : [number] "{x8}" (number) + : "memory" ); } @@ -10,6 +11,7 @@ pub fn syscall1(number: usize, arg1: usize) usize { : [ret] "={x0}" (-> usize) : [number] "{x8}" (number), [arg1] "{x0}" (arg1) + : "memory" ); } @@ -19,6 +21,7 @@ pub fn syscall2(number: usize, arg1: usize, arg2: usize) usize { : [number] "{x8}" (number), [arg1] "{x0}" (arg1), [arg2] "{x1}" (arg2) + : "memory" ); } @@ -29,6 +32,7 @@ pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { [arg1] "{x0}" (arg1), [arg2] "{x1}" (arg2), [arg3] "{x2}" (arg3) + : "memory" ); } @@ -40,6 +44,7 @@ pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz [arg2] "{x1}" (arg2), [arg3] "{x2}" (arg3), [arg4] "{x3}" (arg4) + : "memory" ); } @@ -52,6 +57,7 @@ pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz [arg3] "{x2}" (arg3), [arg4] "{x3}" (arg4), [arg5] "{x4}" (arg5) + : "memory" ); } @@ -73,6 +79,7 @@ pub fn syscall6( [arg4] "{x3}" (arg4), [arg5] "{x4}" (arg5), [arg6] "{x5}" (arg6) + : "memory" ); } diff --git a/std/os/linux/riscv64.zig b/std/os/linux/riscv64.zig new file mode 100644 index 0000000000..b9264aa3db --- /dev/null +++ b/std/os/linux/riscv64.zig @@ -0,0 +1,78 @@ +pub fn syscall0(number: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number) + : "memory" + ); +} + +pub fn syscall1(number: usize, arg1: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1) + ); +} + +pub fn syscall2(number: usize, arg1: usize, arg2: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1), + [arg2] "{a1}" (arg2) + ); +} + +pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1), + [arg2] "{a1}" (arg2), + [arg3] "{a2}" (arg3) + ); +} + +pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1), + [arg2] "{a1}" (arg2), + [arg3] "{a2}" (arg3), + [arg4] "{a3}" (arg4) + ); +} + +pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1), + [arg2] "{a1}" (arg2), + [arg3] "{a2}" (arg3), + [arg4] "{a3}" (arg4), + [arg5] "{a4}" (arg5) + ); +} + +pub fn syscall6( + number: usize, + arg1: usize, + arg2: usize, + arg3: usize, + arg4: usize, + arg5: usize, + arg6: usize, +) usize { + return asm volatile ("ecall" + : [ret] "={a0}" (-> usize) + : [number] "{a7}" (number), + [arg1] "{a0}" (arg1), + [arg2] "{a1}" (arg2), + [arg3] "{a2}" (arg3), + [arg4] "{a3}" (arg4), + [arg5] "{a4}" (arg5), + [arg6] "{a5}" (arg6) + ); +} diff --git a/std/os/linux/tls.zig b/std/os/linux/tls.zig index a10d68663b..4996065121 100644 --- a/std/os/linux/tls.zig +++ b/std/os/linux/tls.zig @@ -47,7 +47,7 @@ const TLSVariant = enum { }; const tls_variant = switch (builtin.arch) { - .arm, .armeb, .aarch64, .aarch64_be => TLSVariant.VariantI, + .arm, .armeb, .aarch64, .aarch64_be, .riscv32, .riscv64 => TLSVariant.VariantI, .x86_64, .i386 => TLSVariant.VariantII, else => @compileError("undefined tls_variant for this architecture"), }; @@ -118,6 +118,13 @@ pub fn setThreadPointer(addr: usize) void { : [addr] "r" (addr) ); }, + .riscv64 => { + asm volatile ( + \\ mv tp, %[addr] + : + : [addr] "r" (addr) + ); + }, else => @compileError("Unsupported architecture"), } } diff --git a/std/os/linux/x86_64.zig b/std/os/linux/x86_64.zig index 82eed0256b..0f3a36636a 100644 --- a/std/os/linux/x86_64.zig +++ b/std/os/linux/x86_64.zig @@ -4,7 +4,7 @@ pub fn syscall0(number: usize) usize { return asm volatile ("syscall" : [ret] "={rax}" (-> usize) : [number] "{rax}" (number) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -13,7 +13,7 @@ pub fn syscall1(number: usize, arg1: usize) usize { : [ret] "={rax}" (-> usize) : [number] "{rax}" (number), [arg1] "{rdi}" (arg1) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -23,7 +23,7 @@ pub fn syscall2(number: usize, arg1: usize, arg2: usize) usize { : [number] "{rax}" (number), [arg1] "{rdi}" (arg1), [arg2] "{rsi}" (arg2) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -34,7 +34,7 @@ pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { [arg1] "{rdi}" (arg1), [arg2] "{rsi}" (arg2), [arg3] "{rdx}" (arg3) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -46,7 +46,7 @@ pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz [arg2] "{rsi}" (arg2), [arg3] "{rdx}" (arg3), [arg4] "{r10}" (arg4) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -59,7 +59,7 @@ pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usiz [arg3] "{rdx}" (arg3), [arg4] "{r10}" (arg4), [arg5] "{r8}" (arg5) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } @@ -81,7 +81,7 @@ pub fn syscall6( [arg4] "{r10}" (arg4), [arg5] "{r8}" (arg5), [arg6] "{r9}" (arg6) - : "rcx", "r11" + : "rcx", "r11", "memory" ); } diff --git a/std/special/start.zig b/std/special/start.zig index 30298669e3..42dda53121 100644 --- a/std/special/start.zig +++ b/std/special/start.zig @@ -59,6 +59,11 @@ nakedcc fn _start() noreturn { : [argc] "=r" (-> [*]usize) ); }, + .riscv64 => { + argc_ptr = asm ("mov %[argc], sp" + : [argc] "=r" (-> [*]usize) + ); + }, else => @compileError("unsupported arch"), } // If LLVM inlines stack variables into _start, they will overwrite From 03a3b1ca39e73c7edf615d26bb7647d7565a09f7 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 15:20:58 -0400 Subject: [PATCH 09/31] riscv workarounds for llvm not having good asm integration --- src/codegen.cpp | 4 ++ std/os/linux/riscv64.zig | 132 ++++++++++++++++++++++++++------------- std/special/c.zig | 5 +- std/special/start.zig | 2 +- 4 files changed, 99 insertions(+), 44 deletions(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 2c5f2c96e5..d664a21ecc 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -8148,6 +8148,10 @@ static void init(CodeGen *g) { target_specific_cpu_args = ZigLLVMGetHostCPUName(); target_specific_features = ZigLLVMGetNativeFeatures(); } + } else if (target_is_riscv(g->zig_target)) { + // TODO https://github.com/ziglang/zig/issues/2883 + target_specific_cpu_args = ""; + target_specific_features = "+a"; } else { target_specific_cpu_args = ""; target_specific_features = ""; diff --git a/std/os/linux/riscv64.zig b/std/os/linux/riscv64.zig index b9264aa3db..5cda2afa0a 100644 --- a/std/os/linux/riscv64.zig +++ b/std/os/linux/riscv64.zig @@ -1,58 +1,96 @@ pub fn syscall0(number: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number) + return asm volatile ( + \\ mv a7, %[number] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number) : "memory" ); } pub fn syscall1(number: usize, arg1: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1) + : "memory" ); } pub fn syscall2(number: usize, arg1: usize, arg2: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1), - [arg2] "{a1}" (arg2) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ mv a1, %[arg2] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1), + [arg2] "r" (arg2) + : "memory" ); } pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1), - [arg2] "{a1}" (arg2), - [arg3] "{a2}" (arg3) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ mv a1, %[arg2] + \\ mv a2, %[arg3] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1), + [arg2] "r" (arg2), + [arg3] "r" (arg3) + : "memory" ); } pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1), - [arg2] "{a1}" (arg2), - [arg3] "{a2}" (arg3), - [arg4] "{a3}" (arg4) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ mv a1, %[arg2] + \\ mv a2, %[arg3] + \\ mv a3, %[arg4] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1), + [arg2] "r" (arg2), + [arg3] "r" (arg3), + [arg4] "r" (arg4) + : "memory" ); } pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1), - [arg2] "{a1}" (arg2), - [arg3] "{a2}" (arg3), - [arg4] "{a3}" (arg4), - [arg5] "{a4}" (arg5) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ mv a1, %[arg2] + \\ mv a2, %[arg3] + \\ mv a3, %[arg4] + \\ mv a4, %[arg5] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1), + [arg2] "r" (arg2), + [arg3] "r" (arg3), + [arg4] "r" (arg4), + [arg5] "r" (arg5) + : "memory" ); } @@ -65,14 +103,24 @@ pub fn syscall6( arg5: usize, arg6: usize, ) usize { - return asm volatile ("ecall" - : [ret] "={a0}" (-> usize) - : [number] "{a7}" (number), - [arg1] "{a0}" (arg1), - [arg2] "{a1}" (arg2), - [arg3] "{a2}" (arg3), - [arg4] "{a3}" (arg4), - [arg5] "{a4}" (arg5), - [arg6] "{a5}" (arg6) + return asm volatile ( + \\ mv a7, %[number] + \\ mv a0, %[arg1] + \\ mv a1, %[arg2] + \\ mv a2, %[arg3] + \\ mv a3, %[arg4] + \\ mv a4, %[arg5] + \\ mv a5, %[arg6] + \\ ecall + \\ mv %[ret], a0 + : [ret] "=r" (-> usize) + : [number] "r" (number), + [arg1] "r" (arg1), + [arg2] "r" (arg2), + [arg3] "r" (arg3), + [arg4] "r" (arg4), + [arg5] "r" (arg5), + [arg6] "r" (arg6) + : "memory" ); } diff --git a/std/special/c.zig b/std/special/c.zig index 150127196b..fa039b3604 100644 --- a/std/special/c.zig +++ b/std/special/c.zig @@ -180,7 +180,10 @@ comptime { @export("__stack_chk_fail", __stack_chk_fail, builtin.GlobalLinkage.Strong); } if (builtin.os == builtin.Os.linux) { - @export("clone", clone, builtin.GlobalLinkage.Strong); + // TODO implement clone for riscv64 + if (builtin.arch != .riscv64) { + @export("clone", clone, builtin.GlobalLinkage.Strong); + } } } extern fn __stack_chk_fail() noreturn { diff --git a/std/special/start.zig b/std/special/start.zig index 42dda53121..0366e088d9 100644 --- a/std/special/start.zig +++ b/std/special/start.zig @@ -60,7 +60,7 @@ nakedcc fn _start() noreturn { ); }, .riscv64 => { - argc_ptr = asm ("mov %[argc], sp" + argc_ptr = asm ("mv %[argc], sp" : [argc] "=r" (-> [*]usize) ); }, From 70da0762f7aa2d800da4a238499fc3f31dc4d31f Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Thu, 18 Jul 2019 18:15:50 -0400 Subject: [PATCH 10/31] better riscv syscalls --- std/os/linux/riscv64.zig | 126 +++++++++++++-------------------------- 1 file changed, 42 insertions(+), 84 deletions(-) diff --git a/std/os/linux/riscv64.zig b/std/os/linux/riscv64.zig index 5cda2afa0a..3f5053c973 100644 --- a/std/os/linux/riscv64.zig +++ b/std/os/linux/riscv64.zig @@ -1,95 +1,62 @@ pub fn syscall0(number: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number) : "memory" ); } pub fn syscall1(number: usize, arg1: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1) : "memory" ); } pub fn syscall2(number: usize, arg1: usize, arg2: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ mv a1, %[arg2] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1), - [arg2] "r" (arg2) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1), + [arg2] "{x11}" (arg2) : "memory" ); } pub fn syscall3(number: usize, arg1: usize, arg2: usize, arg3: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ mv a1, %[arg2] - \\ mv a2, %[arg3] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1), - [arg2] "r" (arg2), - [arg3] "r" (arg3) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1), + [arg2] "{x11}" (arg2), + [arg3] "{x12}" (arg3) : "memory" ); } pub fn syscall4(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ mv a1, %[arg2] - \\ mv a2, %[arg3] - \\ mv a3, %[arg4] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1), - [arg2] "r" (arg2), - [arg3] "r" (arg3), - [arg4] "r" (arg4) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1), + [arg2] "{x11}" (arg2), + [arg3] "{x12}" (arg3), + [arg4] "{x13}" (arg4) : "memory" ); } pub fn syscall5(number: usize, arg1: usize, arg2: usize, arg3: usize, arg4: usize, arg5: usize) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ mv a1, %[arg2] - \\ mv a2, %[arg3] - \\ mv a3, %[arg4] - \\ mv a4, %[arg5] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1), - [arg2] "r" (arg2), - [arg3] "r" (arg3), - [arg4] "r" (arg4), - [arg5] "r" (arg5) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1), + [arg2] "{x11}" (arg2), + [arg3] "{x12}" (arg3), + [arg4] "{x13}" (arg4), + [arg5] "{x14}" (arg5) : "memory" ); } @@ -103,24 +70,15 @@ pub fn syscall6( arg5: usize, arg6: usize, ) usize { - return asm volatile ( - \\ mv a7, %[number] - \\ mv a0, %[arg1] - \\ mv a1, %[arg2] - \\ mv a2, %[arg3] - \\ mv a3, %[arg4] - \\ mv a4, %[arg5] - \\ mv a5, %[arg6] - \\ ecall - \\ mv %[ret], a0 - : [ret] "=r" (-> usize) - : [number] "r" (number), - [arg1] "r" (arg1), - [arg2] "r" (arg2), - [arg3] "r" (arg3), - [arg4] "r" (arg4), - [arg5] "r" (arg5), - [arg6] "r" (arg6) + return asm volatile ("ecall" + : [ret] "={x10}" (-> usize) + : [number] "{x17}" (number), + [arg1] "{x10}" (arg1), + [arg2] "{x11}" (arg2), + [arg3] "{x12}" (arg3), + [arg4] "{x13}" (arg4), + [arg5] "{x14}" (arg5), + [arg6] "{x15}" (arg6) : "memory" ); } From 2117fbdae35dddf368c4ce5bb39cc73fa0f78d4c Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 19 Jul 2019 16:50:45 -0400 Subject: [PATCH 11/31] update C headers to llvm9 upstream commit 1931d3cb20a00da732c5210b123656632982fde0 --- lib/include/__clang_cuda_builtin_vars.h | 20 +- lib/include/__clang_cuda_cmath.h | 49 +- lib/include/__clang_cuda_complex_builtins.h | 20 +- lib/include/__clang_cuda_device_functions.h | 93 +- lib/include/__clang_cuda_intrinsics.h | 20 +- lib/include/__clang_cuda_libdevice_declares.h | 890 ++++--- .../__clang_cuda_math_forward_declares.h | 70 +- lib/include/__clang_cuda_runtime_wrapper.h | 32 +- lib/include/__stddef_max_align_t.h | 22 +- lib/include/__wmmintrin_aes.h | 20 +- lib/include/__wmmintrin_pclmul.h | 20 +- lib/include/adxintrin.h | 20 +- lib/include/altivec.h | 20 +- lib/include/ammintrin.h | 20 +- lib/include/arm64intr.h | 20 +- lib/include/arm_acle.h | 38 +- lib/include/arm_neon.h | 396 +-- lib/include/armintr.h | 20 +- lib/include/avx2intrin.h | 32 +- lib/include/avx512bf16intrin.h | 279 ++ lib/include/avx512bitalgintrin.h | 20 +- lib/include/avx512bwintrin.h | 44 +- lib/include/avx512cdintrin.h | 52 +- lib/include/avx512dqintrin.h | 20 +- lib/include/avx512erintrin.h | 20 +- lib/include/avx512fintrin.h | 115 +- lib/include/avx512ifmaintrin.h | 20 +- lib/include/avx512ifmavlintrin.h | 20 +- lib/include/avx512pfintrin.h | 20 +- lib/include/avx512vbmi2intrin.h | 20 +- lib/include/avx512vbmiintrin.h | 20 +- lib/include/avx512vbmivlintrin.h | 20 +- lib/include/avx512vlbf16intrin.h | 474 ++++ lib/include/avx512vlbitalgintrin.h | 20 +- lib/include/avx512vlbwintrin.h | 36 +- lib/include/avx512vlcdintrin.h | 86 +- lib/include/avx512vldqintrin.h | 52 +- lib/include/avx512vlintrin.h | 77 +- lib/include/avx512vlvbmi2intrin.h | 20 +- lib/include/avx512vlvnniintrin.h | 20 +- lib/include/avx512vlvp2intersectintrin.h | 121 + lib/include/avx512vnniintrin.h | 20 +- lib/include/avx512vp2intersectintrin.h | 77 + lib/include/avx512vpopcntdqintrin.h | 20 +- lib/include/avx512vpopcntdqvlintrin.h | 20 +- lib/include/avxintrin.h | 50 +- lib/include/bmi2intrin.h | 20 +- lib/include/bmiintrin.h | 20 +- lib/include/cetintrin.h | 20 +- lib/include/cldemoteintrin.h | 20 +- lib/include/clflushoptintrin.h | 20 +- lib/include/clwbintrin.h | 20 +- lib/include/clzerointrin.h | 20 +- lib/include/cpuid.h | 24 +- lib/include/emmintrin.h | 55 +- lib/include/enqcmdintrin.h | 63 + lib/include/f16cintrin.h | 26 +- lib/include/float.h | 28 +- lib/include/fma4intrin.h | 20 +- lib/include/fmaintrin.h | 20 +- lib/include/fxsrintrin.h | 20 +- lib/include/gfniintrin.h | 20 +- lib/include/htmintrin.h | 20 +- lib/include/htmxlintrin.h | 20 +- lib/include/ia32intrin.h | 320 ++- lib/include/immintrin.h | 62 +- lib/include/intrin.h | 42 +- lib/include/inttypes.h | 25 +- lib/include/invpcidintrin.h | 20 +- lib/include/iso646.h | 22 +- lib/include/limits.h | 22 +- lib/include/lwpintrin.h | 20 +- lib/include/lzcntintrin.h | 20 +- lib/include/mm3dnow.h | 20 +- lib/include/mm_malloc.h | 20 +- lib/include/mmintrin.h | 22 +- lib/include/module.modulemap | 21 +- lib/include/movdirintrin.h | 20 +- lib/include/msa.h | 20 +- lib/include/mwaitxintrin.h | 20 +- lib/include/nmmintrin.h | 20 +- lib/include/opencl-c-base.h | 578 ++++ lib/include/opencl-c.h | 698 +---- .../openmp_wrappers/__clang_openmp_math.h | 35 + .../__clang_openmp_math_declares.h | 33 + lib/include/openmp_wrappers/cmath | 16 + lib/include/openmp_wrappers/math.h | 17 + lib/include/pconfigintrin.h | 24 +- lib/include/pkuintrin.h | 20 +- lib/include/pmmintrin.h | 20 +- lib/include/popcntintrin.h | 52 +- lib/include/ppc_wrappers/emmintrin.h | 2318 +++++++++++++++++ lib/include/ppc_wrappers/mm_malloc.h | 44 + lib/include/ppc_wrappers/mmintrin.h | 1443 ++++++++++ lib/include/ppc_wrappers/xmmintrin.h | 1838 +++++++++++++ lib/include/prfchwintrin.h | 20 +- lib/include/ptwriteintrin.h | 20 +- lib/include/rdseedintrin.h | 20 +- lib/include/rtmintrin.h | 20 +- lib/include/s390intrin.h | 20 +- lib/include/sgxintrin.h | 24 +- lib/include/shaintrin.h | 20 +- lib/include/smmintrin.h | 20 +- lib/include/stdalign.h | 20 +- lib/include/stdarg.h | 22 +- lib/include/stdatomic.h | 20 +- lib/include/stdbool.h | 22 +- lib/include/stddef.h | 22 +- lib/include/stdint.h | 27 +- lib/include/stdnoreturn.h | 20 +- lib/include/tbmintrin.h | 20 +- lib/include/tgmath.h | 22 +- lib/include/tmmintrin.h | 20 +- lib/include/unwind.h | 24 +- lib/include/vadefs.h | 20 +- lib/include/vaesintrin.h | 20 +- lib/include/varargs.h | 20 +- lib/include/vecintrin.h | 426 ++- lib/include/vpclmulqdqintrin.h | 20 +- lib/include/waitpkgintrin.h | 20 +- lib/include/wbnoinvdintrin.h | 20 +- lib/include/wmmintrin.h | 20 +- lib/include/x86intrin.h | 20 +- lib/include/xmmintrin.h | 40 +- lib/include/xopintrin.h | 20 +- lib/include/xsavecintrin.h | 20 +- lib/include/xsaveintrin.h | 39 +- lib/include/xsaveoptintrin.h | 20 +- lib/include/xsavesintrin.h | 20 +- lib/include/xtestintrin.h | 20 +- 130 files changed, 9519 insertions(+), 3542 deletions(-) create mode 100644 lib/include/avx512bf16intrin.h create mode 100644 lib/include/avx512vlbf16intrin.h create mode 100644 lib/include/avx512vlvp2intersectintrin.h create mode 100644 lib/include/avx512vp2intersectintrin.h create mode 100644 lib/include/enqcmdintrin.h create mode 100644 lib/include/opencl-c-base.h create mode 100644 lib/include/openmp_wrappers/__clang_openmp_math.h create mode 100644 lib/include/openmp_wrappers/__clang_openmp_math_declares.h create mode 100644 lib/include/openmp_wrappers/cmath create mode 100644 lib/include/openmp_wrappers/math.h create mode 100644 lib/include/ppc_wrappers/emmintrin.h create mode 100644 lib/include/ppc_wrappers/mm_malloc.h create mode 100644 lib/include/ppc_wrappers/mmintrin.h create mode 100644 lib/include/ppc_wrappers/xmmintrin.h diff --git a/lib/include/__clang_cuda_builtin_vars.h b/lib/include/__clang_cuda_builtin_vars.h index 290c4b2984..2ba1521f25 100644 --- a/lib/include/__clang_cuda_builtin_vars.h +++ b/lib/include/__clang_cuda_builtin_vars.h @@ -1,22 +1,8 @@ /*===---- cuda_builtin_vars.h - CUDA built-in variables ---------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/__clang_cuda_cmath.h b/lib/include/__clang_cuda_cmath.h index 5331ba401a..834a2e3fd1 100644 --- a/lib/include/__clang_cuda_cmath.h +++ b/lib/include/__clang_cuda_cmath.h @@ -1,22 +1,8 @@ /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -44,12 +30,32 @@ // implementation. Declaring in the global namespace and pulling into namespace // std covers all of the known knowns. +#ifdef _OPENMP +#define __DEVICE__ static __attribute__((always_inline)) +#else #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) +#endif +// For C++ 17 we need to include noexcept attribute to be compatible +// with the header-defined version. This may be removed once +// variant is supported. +#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L +#define __NOEXCEPT noexcept +#else +#define __NOEXCEPT +#endif + +#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } __DEVICE__ long abs(long __n) { return ::labs(__n); } __DEVICE__ float abs(float __x) { return ::fabsf(__x); } __DEVICE__ double abs(double __x) { return ::fabs(__x); } +#endif +// TODO: remove once variat is supported. +#if defined(_OPENMP) && defined(__cplusplus) +__DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } +__DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } +#endif __DEVICE__ float acos(float __x) { return ::acosf(__x); } __DEVICE__ float asin(float __x) { return ::asinf(__x); } __DEVICE__ float atan(float __x) { return ::atanf(__x); } @@ -58,9 +64,11 @@ __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } __DEVICE__ float cos(float __x) { return ::cosf(__x); } __DEVICE__ float cosh(float __x) { return ::coshf(__x); } __DEVICE__ float exp(float __x) { return ::expf(__x); } -__DEVICE__ float fabs(float __x) { return ::fabsf(__x); } +__DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } __DEVICE__ float floor(float __x) { return ::floorf(__x); } __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } +// TODO: remove when variant is supported +#ifndef _OPENMP __DEVICE__ int fpclassify(float __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); @@ -69,6 +77,7 @@ __DEVICE__ int fpclassify(double __x) { return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, FP_ZERO, __x); } +#endif __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } @@ -448,7 +457,10 @@ using ::remainderf; using ::remquof; using ::rintf; using ::roundf; +// TODO: remove once variant is supported +#ifndef _OPENMP using ::scalblnf; +#endif using ::scalbnf; using ::sinf; using ::sinhf; @@ -467,6 +479,7 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif +#undef __NOEXCEPT #undef __DEVICE__ #endif diff --git a/lib/include/__clang_cuda_complex_builtins.h b/lib/include/__clang_cuda_complex_builtins.h index beef7deff8..576a958b16 100644 --- a/lib/include/__clang_cuda_complex_builtins.h +++ b/lib/include/__clang_cuda_complex_builtins.h @@ -1,22 +1,8 @@ /*===-- __clang_cuda_complex_builtins - CUDA impls of runtime complex fns ---=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/__clang_cuda_device_functions.h b/lib/include/__clang_cuda_device_functions.h index 67bbc68b16..50ad674f94 100644 --- a/lib/include/__clang_cuda_device_functions.h +++ b/lib/include/__clang_cuda_device_functions.h @@ -1,22 +1,8 @@ /*===---- __clang_cuda_device_functions.h - CUDA runtime support -----------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -24,15 +10,21 @@ #ifndef __CLANG_CUDA_DEVICE_FUNCTIONS_H__ #define __CLANG_CUDA_DEVICE_FUNCTIONS_H__ +#ifndef _OPENMP #if CUDA_VERSION < 9000 #error This file is intended to be used with CUDA-9+ only. #endif +#endif // __DEVICE__ is a helper macro with common set of attributes for the wrappers // we implement in this file. We need static in order to avoid emitting unused // functions and __forceinline__ helps inlining these wrappers at -O1. #pragma push_macro("__DEVICE__") +#ifdef _OPENMP +#define __DEVICE__ static __attribute__((always_inline)) +#else #define __DEVICE__ static __device__ __forceinline__ +#endif // libdevice provides fast low precision and slow full-recision implementations // for some functions. Which one gets selected depends on @@ -45,6 +37,15 @@ #define __FAST_OR_SLOW(fast, slow) slow #endif +// For C++ 17 we need to include noexcept attribute to be compatible +// with the header-defined version. This may be removed once +// variant is supported. +#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L +#define __NOEXCEPT noexcept +#else +#define __NOEXCEPT +#endif + __DEVICE__ int __all(int __a) { return __nvvm_vote_all(__a); } __DEVICE__ int __any(int __a) { return __nvvm_vote_any(__a); } __DEVICE__ unsigned int __ballot(int __a) { return __nvvm_vote_ballot(__a); } @@ -52,8 +53,13 @@ __DEVICE__ unsigned int __brev(unsigned int __a) { return __nv_brev(__a); } __DEVICE__ unsigned long long __brevll(unsigned long long __a) { return __nv_brevll(__a); } +#if defined(__cplusplus) __DEVICE__ void __brkpt() { asm volatile("brkpt;"); } __DEVICE__ void __brkpt(int __a) { __brkpt(); } +#else +__DEVICE__ void __attribute__((overloadable)) __brkpt(void) { asm volatile("brkpt;"); } +__DEVICE__ void __attribute__((overloadable)) __brkpt(int __a) { __brkpt(); } +#endif __DEVICE__ unsigned int __byte_perm(unsigned int __a, unsigned int __b, unsigned int __c) { return __nv_byte_perm(__a, __b, __c); @@ -237,6 +243,9 @@ __DEVICE__ int __ffs(int __a) { return __nv_ffs(__a); } __DEVICE__ int __ffsll(long long __a) { return __nv_ffsll(__a); } __DEVICE__ int __finite(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __finitef(float __a) { return __nv_finitef(__a); } +#ifdef _MSC_VER +__DEVICE__ int __finitel(long double __a); +#endif __DEVICE__ int __float2int_rd(float __a) { return __nv_float2int_rd(__a); } __DEVICE__ int __float2int_rn(float __a) { return __nv_float2int_rn(__a); } __DEVICE__ int __float2int_ru(float __a) { return __nv_float2int_ru(__a); } @@ -445,8 +454,14 @@ __DEVICE__ float __int_as_float(int __a) { return __nv_int_as_float(__a); } __DEVICE__ int __isfinited(double __a) { return __nv_isfinited(__a); } __DEVICE__ int __isinf(double __a) { return __nv_isinfd(__a); } __DEVICE__ int __isinff(float __a) { return __nv_isinff(__a); } +#ifdef _MSC_VER +__DEVICE__ int __isinfl(long double __a); +#endif __DEVICE__ int __isnan(double __a) { return __nv_isnand(__a); } __DEVICE__ int __isnanf(float __a) { return __nv_isnanf(__a); } +#ifdef _MSC_VER +__DEVICE__ int __isnanl(long double __a); +#endif __DEVICE__ double __ll2double_rd(long long __a) { return __nv_ll2double_rd(__a); } @@ -520,8 +535,8 @@ __DEVICE__ unsigned int __sad(int __a, int __b, unsigned int __c) { __DEVICE__ float __saturatef(float __a) { return __nv_saturatef(__a); } __DEVICE__ int __signbitd(double __a) { return __nv_signbitd(__a); } __DEVICE__ int __signbitf(float __a) { return __nv_signbitf(__a); } -__DEVICE__ void __sincosf(float __a, float *__sptr, float *__cptr) { - return __nv_fast_sincosf(__a, __sptr, __cptr); +__DEVICE__ void __sincosf(float __a, float *__s, float *__c) { + return __nv_fast_sincosf(__a, __s, __c); } __DEVICE__ float __sinf(float __a) { return __nv_fast_sinf(__a); } __DEVICE__ int __syncthreads_and(int __a) { return __nvvm_bar0_and(__a); } @@ -1468,7 +1483,8 @@ __DEVICE__ unsigned int __vsubus4(unsigned int __a, unsigned int __b) { return r; } #endif // CUDA_VERSION >= 9020 -__DEVICE__ int abs(int __a) { return __nv_abs(__a); } +__DEVICE__ int abs(int __a) __NOEXCEPT { return __nv_abs(__a); } +__DEVICE__ double fabs(double __a) __NOEXCEPT { return __nv_fabs(__a); } __DEVICE__ double acos(double __a) { return __nv_acos(__a); } __DEVICE__ float acosf(float __a) { return __nv_acosf(__a); } __DEVICE__ double acosh(double __a) { return __nv_acosh(__a); } @@ -1487,8 +1503,10 @@ __DEVICE__ double cbrt(double __a) { return __nv_cbrt(__a); } __DEVICE__ float cbrtf(float __a) { return __nv_cbrtf(__a); } __DEVICE__ double ceil(double __a) { return __nv_ceil(__a); } __DEVICE__ float ceilf(float __a) { return __nv_ceilf(__a); } +#ifndef _OPENMP __DEVICE__ int clock() { return __nvvm_read_ptx_sreg_clock(); } __DEVICE__ long long clock64() { return __nvvm_read_ptx_sreg_clock64(); } +#endif __DEVICE__ double copysign(double __a, double __b) { return __nv_copysign(__a, __b); } @@ -1525,7 +1543,6 @@ __DEVICE__ float exp2f(float __a) { return __nv_exp2f(__a); } __DEVICE__ float expf(float __a) { return __nv_expf(__a); } __DEVICE__ double expm1(double __a) { return __nv_expm1(__a); } __DEVICE__ float expm1f(float __a) { return __nv_expm1f(__a); } -__DEVICE__ double fabs(double __a) { return __nv_fabs(__a); } __DEVICE__ float fabsf(float __a) { return __nv_fabsf(__a); } __DEVICE__ double fdim(double __a, double __b) { return __nv_fdim(__a, __b); } __DEVICE__ float fdimf(float __a, float __b) { return __nv_fdimf(__a, __b); } @@ -1563,16 +1580,16 @@ __DEVICE__ double j1(double __a) { return __nv_j1(__a); } __DEVICE__ float j1f(float __a) { return __nv_j1f(__a); } __DEVICE__ double jn(int __n, double __a) { return __nv_jn(__n, __a); } __DEVICE__ float jnf(int __n, float __a) { return __nv_jnf(__n, __a); } -#if defined(__LP64__) -__DEVICE__ long labs(long __a) { return llabs(__a); }; +#if defined(__LP64__) || defined(_WIN64) +__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_llabs(__a); }; #else -__DEVICE__ long labs(long __a) { return __nv_abs(__a); }; +__DEVICE__ long labs(long __a) __NOEXCEPT { return __nv_abs(__a); }; #endif __DEVICE__ double ldexp(double __a, int __b) { return __nv_ldexp(__a, __b); } __DEVICE__ float ldexpf(float __a, int __b) { return __nv_ldexpf(__a, __b); } __DEVICE__ double lgamma(double __a) { return __nv_lgamma(__a); } __DEVICE__ float lgammaf(float __a) { return __nv_lgammaf(__a); } -__DEVICE__ long long llabs(long long __a) { return __nv_llabs(__a); } +__DEVICE__ long long llabs(long long __a) __NOEXCEPT { return __nv_llabs(__a); } __DEVICE__ long long llmax(long long __a, long long __b) { return __nv_llmax(__a, __b); } @@ -1597,7 +1614,7 @@ __DEVICE__ float logbf(float __a) { return __nv_logbf(__a); } __DEVICE__ float logf(float __a) { return __FAST_OR_SLOW(__nv_fast_logf, __nv_logf)(__a); } -#if defined(__LP64__) +#if defined(__LP64__) || defined(_WIN64) __DEVICE__ long lrint(double __a) { return llrint(__a); } __DEVICE__ long lrintf(float __a) { return __float2ll_rn(__a); } __DEVICE__ long lround(double __a) { return llround(__a); } @@ -1609,12 +1626,16 @@ __DEVICE__ long lround(double __a) { return round(__a); } __DEVICE__ long lroundf(float __a) { return roundf(__a); } #endif __DEVICE__ int max(int __a, int __b) { return __nv_max(__a, __b); } +// These functions shouldn't be declared when including this header +// for math function resolution purposes. +#ifndef _OPENMP __DEVICE__ void *memcpy(void *__a, const void *__b, size_t __c) { return __builtin_memcpy(__a, __b, __c); } __DEVICE__ void *memset(void *__a, int __b, size_t __c) { return __builtin_memset(__a, __b, __c); } +#endif __DEVICE__ int min(int __a, int __b) { return __nv_min(__a, __b); } __DEVICE__ double modf(double __a, double *__b) { return __nv_modf(__a, __b); } __DEVICE__ float modff(float __a, float *__b) { return __nv_modff(__a, __b); } @@ -1698,6 +1719,8 @@ __DEVICE__ double rsqrt(double __a) { return __nv_rsqrt(__a); } __DEVICE__ float rsqrtf(float __a) { return __nv_rsqrtf(__a); } __DEVICE__ double scalbn(double __a, int __b) { return __nv_scalbn(__a, __b); } __DEVICE__ float scalbnf(float __a, int __b) { return __nv_scalbnf(__a, __b); } +// TODO: remove once variant is supported +#ifndef _OPENMP __DEVICE__ double scalbln(double __a, long __b) { if (__b > INT_MAX) return __a > 0 ? HUGE_VAL : -HUGE_VAL; @@ -1712,18 +1735,19 @@ __DEVICE__ float scalblnf(float __a, long __b) { return __a > 0 ? 0.f : -0.f; return scalbnf(__a, (int)__b); } +#endif __DEVICE__ double sin(double __a) { return __nv_sin(__a); } -__DEVICE__ void sincos(double __a, double *__sptr, double *__cptr) { - return __nv_sincos(__a, __sptr, __cptr); +__DEVICE__ void sincos(double __a, double *__s, double *__c) { + return __nv_sincos(__a, __s, __c); } -__DEVICE__ void sincosf(float __a, float *__sptr, float *__cptr) { - return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __sptr, __cptr); +__DEVICE__ void sincosf(float __a, float *__s, float *__c) { + return __FAST_OR_SLOW(__nv_fast_sincosf, __nv_sincosf)(__a, __s, __c); } -__DEVICE__ void sincospi(double __a, double *__sptr, double *__cptr) { - return __nv_sincospi(__a, __sptr, __cptr); +__DEVICE__ void sincospi(double __a, double *__s, double *__c) { + return __nv_sincospi(__a, __s, __c); } -__DEVICE__ void sincospif(float __a, float *__sptr, float *__cptr) { - return __nv_sincospif(__a, __sptr, __cptr); +__DEVICE__ void sincospif(float __a, float *__s, float *__c) { + return __nv_sincospif(__a, __s, __c); } __DEVICE__ float sinf(float __a) { return __FAST_OR_SLOW(__nv_fast_sinf, __nv_sinf)(__a); @@ -1763,6 +1787,7 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); } __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); } __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); } +#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #pragma pop_macro("__FAST_OR_SLOW") #endif // __CLANG_CUDA_DEVICE_FUNCTIONS_H__ diff --git a/lib/include/__clang_cuda_intrinsics.h b/lib/include/__clang_cuda_intrinsics.h index 3c0cde94ed..2970d17f89 100644 --- a/lib/include/__clang_cuda_intrinsics.h +++ b/lib/include/__clang_cuda_intrinsics.h @@ -1,22 +1,8 @@ /*===--- __clang_cuda_intrinsics.h - Device-side CUDA intrinsic wrappers ---=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/__clang_cuda_libdevice_declares.h b/lib/include/__clang_cuda_libdevice_declares.h index 71df7f849d..4d70353394 100644 --- a/lib/include/__clang_cuda_libdevice_declares.h +++ b/lib/include/__clang_cuda_libdevice_declares.h @@ -1,22 +1,8 @@ /*===-- __clang_cuda_libdevice_declares.h - decls for libdevice functions --=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -24,443 +10,453 @@ #ifndef __CLANG_CUDA_LIBDEVICE_DECLARES_H__ #define __CLANG_CUDA_LIBDEVICE_DECLARES_H__ +#if defined(__cplusplus) extern "C" { +#endif -__device__ int __nv_abs(int __a); -__device__ double __nv_acos(double __a); -__device__ float __nv_acosf(float __a); -__device__ double __nv_acosh(double __a); -__device__ float __nv_acoshf(float __a); -__device__ double __nv_asin(double __a); -__device__ float __nv_asinf(float __a); -__device__ double __nv_asinh(double __a); -__device__ float __nv_asinhf(float __a); -__device__ double __nv_atan2(double __a, double __b); -__device__ float __nv_atan2f(float __a, float __b); -__device__ double __nv_atan(double __a); -__device__ float __nv_atanf(float __a); -__device__ double __nv_atanh(double __a); -__device__ float __nv_atanhf(float __a); -__device__ int __nv_brev(int __a); -__device__ long long __nv_brevll(long long __a); -__device__ int __nv_byte_perm(int __a, int __b, int __c); -__device__ double __nv_cbrt(double __a); -__device__ float __nv_cbrtf(float __a); -__device__ double __nv_ceil(double __a); -__device__ float __nv_ceilf(float __a); -__device__ int __nv_clz(int __a); -__device__ int __nv_clzll(long long __a); -__device__ double __nv_copysign(double __a, double __b); -__device__ float __nv_copysignf(float __a, float __b); -__device__ double __nv_cos(double __a); -__device__ float __nv_cosf(float __a); -__device__ double __nv_cosh(double __a); -__device__ float __nv_coshf(float __a); -__device__ double __nv_cospi(double __a); -__device__ float __nv_cospif(float __a); -__device__ double __nv_cyl_bessel_i0(double __a); -__device__ float __nv_cyl_bessel_i0f(float __a); -__device__ double __nv_cyl_bessel_i1(double __a); -__device__ float __nv_cyl_bessel_i1f(float __a); -__device__ double __nv_dadd_rd(double __a, double __b); -__device__ double __nv_dadd_rn(double __a, double __b); -__device__ double __nv_dadd_ru(double __a, double __b); -__device__ double __nv_dadd_rz(double __a, double __b); -__device__ double __nv_ddiv_rd(double __a, double __b); -__device__ double __nv_ddiv_rn(double __a, double __b); -__device__ double __nv_ddiv_ru(double __a, double __b); -__device__ double __nv_ddiv_rz(double __a, double __b); -__device__ double __nv_dmul_rd(double __a, double __b); -__device__ double __nv_dmul_rn(double __a, double __b); -__device__ double __nv_dmul_ru(double __a, double __b); -__device__ double __nv_dmul_rz(double __a, double __b); -__device__ float __nv_double2float_rd(double __a); -__device__ float __nv_double2float_rn(double __a); -__device__ float __nv_double2float_ru(double __a); -__device__ float __nv_double2float_rz(double __a); -__device__ int __nv_double2hiint(double __a); -__device__ int __nv_double2int_rd(double __a); -__device__ int __nv_double2int_rn(double __a); -__device__ int __nv_double2int_ru(double __a); -__device__ int __nv_double2int_rz(double __a); -__device__ long long __nv_double2ll_rd(double __a); -__device__ long long __nv_double2ll_rn(double __a); -__device__ long long __nv_double2ll_ru(double __a); -__device__ long long __nv_double2ll_rz(double __a); -__device__ int __nv_double2loint(double __a); -__device__ unsigned int __nv_double2uint_rd(double __a); -__device__ unsigned int __nv_double2uint_rn(double __a); -__device__ unsigned int __nv_double2uint_ru(double __a); -__device__ unsigned int __nv_double2uint_rz(double __a); -__device__ unsigned long long __nv_double2ull_rd(double __a); -__device__ unsigned long long __nv_double2ull_rn(double __a); -__device__ unsigned long long __nv_double2ull_ru(double __a); -__device__ unsigned long long __nv_double2ull_rz(double __a); -__device__ unsigned long long __nv_double_as_longlong(double __a); -__device__ double __nv_drcp_rd(double __a); -__device__ double __nv_drcp_rn(double __a); -__device__ double __nv_drcp_ru(double __a); -__device__ double __nv_drcp_rz(double __a); -__device__ double __nv_dsqrt_rd(double __a); -__device__ double __nv_dsqrt_rn(double __a); -__device__ double __nv_dsqrt_ru(double __a); -__device__ double __nv_dsqrt_rz(double __a); -__device__ double __nv_dsub_rd(double __a, double __b); -__device__ double __nv_dsub_rn(double __a, double __b); -__device__ double __nv_dsub_ru(double __a, double __b); -__device__ double __nv_dsub_rz(double __a, double __b); -__device__ double __nv_erfc(double __a); -__device__ float __nv_erfcf(float __a); -__device__ double __nv_erfcinv(double __a); -__device__ float __nv_erfcinvf(float __a); -__device__ double __nv_erfcx(double __a); -__device__ float __nv_erfcxf(float __a); -__device__ double __nv_erf(double __a); -__device__ float __nv_erff(float __a); -__device__ double __nv_erfinv(double __a); -__device__ float __nv_erfinvf(float __a); -__device__ double __nv_exp10(double __a); -__device__ float __nv_exp10f(float __a); -__device__ double __nv_exp2(double __a); -__device__ float __nv_exp2f(float __a); -__device__ double __nv_exp(double __a); -__device__ float __nv_expf(float __a); -__device__ double __nv_expm1(double __a); -__device__ float __nv_expm1f(float __a); -__device__ double __nv_fabs(double __a); -__device__ float __nv_fabsf(float __a); -__device__ float __nv_fadd_rd(float __a, float __b); -__device__ float __nv_fadd_rn(float __a, float __b); -__device__ float __nv_fadd_ru(float __a, float __b); -__device__ float __nv_fadd_rz(float __a, float __b); -__device__ float __nv_fast_cosf(float __a); -__device__ float __nv_fast_exp10f(float __a); -__device__ float __nv_fast_expf(float __a); -__device__ float __nv_fast_fdividef(float __a, float __b); -__device__ float __nv_fast_log10f(float __a); -__device__ float __nv_fast_log2f(float __a); -__device__ float __nv_fast_logf(float __a); -__device__ float __nv_fast_powf(float __a, float __b); -__device__ void __nv_fast_sincosf(float __a, float *__sptr, float *__cptr); -__device__ float __nv_fast_sinf(float __a); -__device__ float __nv_fast_tanf(float __a); -__device__ double __nv_fdim(double __a, double __b); -__device__ float __nv_fdimf(float __a, float __b); -__device__ float __nv_fdiv_rd(float __a, float __b); -__device__ float __nv_fdiv_rn(float __a, float __b); -__device__ float __nv_fdiv_ru(float __a, float __b); -__device__ float __nv_fdiv_rz(float __a, float __b); -__device__ int __nv_ffs(int __a); -__device__ int __nv_ffsll(long long __a); -__device__ int __nv_finitef(float __a); -__device__ unsigned short __nv_float2half_rn(float __a); -__device__ int __nv_float2int_rd(float __a); -__device__ int __nv_float2int_rn(float __a); -__device__ int __nv_float2int_ru(float __a); -__device__ int __nv_float2int_rz(float __a); -__device__ long long __nv_float2ll_rd(float __a); -__device__ long long __nv_float2ll_rn(float __a); -__device__ long long __nv_float2ll_ru(float __a); -__device__ long long __nv_float2ll_rz(float __a); -__device__ unsigned int __nv_float2uint_rd(float __a); -__device__ unsigned int __nv_float2uint_rn(float __a); -__device__ unsigned int __nv_float2uint_ru(float __a); -__device__ unsigned int __nv_float2uint_rz(float __a); -__device__ unsigned long long __nv_float2ull_rd(float __a); -__device__ unsigned long long __nv_float2ull_rn(float __a); -__device__ unsigned long long __nv_float2ull_ru(float __a); -__device__ unsigned long long __nv_float2ull_rz(float __a); -__device__ int __nv_float_as_int(float __a); -__device__ unsigned int __nv_float_as_uint(float __a); -__device__ double __nv_floor(double __a); -__device__ float __nv_floorf(float __a); -__device__ double __nv_fma(double __a, double __b, double __c); -__device__ float __nv_fmaf(float __a, float __b, float __c); -__device__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c); -__device__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c); -__device__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c); -__device__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c); -__device__ float __nv_fmaf_rd(float __a, float __b, float __c); -__device__ float __nv_fmaf_rn(float __a, float __b, float __c); -__device__ float __nv_fmaf_ru(float __a, float __b, float __c); -__device__ float __nv_fmaf_rz(float __a, float __b, float __c); -__device__ double __nv_fma_rd(double __a, double __b, double __c); -__device__ double __nv_fma_rn(double __a, double __b, double __c); -__device__ double __nv_fma_ru(double __a, double __b, double __c); -__device__ double __nv_fma_rz(double __a, double __b, double __c); -__device__ double __nv_fmax(double __a, double __b); -__device__ float __nv_fmaxf(float __a, float __b); -__device__ double __nv_fmin(double __a, double __b); -__device__ float __nv_fminf(float __a, float __b); -__device__ double __nv_fmod(double __a, double __b); -__device__ float __nv_fmodf(float __a, float __b); -__device__ float __nv_fmul_rd(float __a, float __b); -__device__ float __nv_fmul_rn(float __a, float __b); -__device__ float __nv_fmul_ru(float __a, float __b); -__device__ float __nv_fmul_rz(float __a, float __b); -__device__ float __nv_frcp_rd(float __a); -__device__ float __nv_frcp_rn(float __a); -__device__ float __nv_frcp_ru(float __a); -__device__ float __nv_frcp_rz(float __a); -__device__ double __nv_frexp(double __a, int *__b); -__device__ float __nv_frexpf(float __a, int *__b); -__device__ float __nv_frsqrt_rn(float __a); -__device__ float __nv_fsqrt_rd(float __a); -__device__ float __nv_fsqrt_rn(float __a); -__device__ float __nv_fsqrt_ru(float __a); -__device__ float __nv_fsqrt_rz(float __a); -__device__ float __nv_fsub_rd(float __a, float __b); -__device__ float __nv_fsub_rn(float __a, float __b); -__device__ float __nv_fsub_ru(float __a, float __b); -__device__ float __nv_fsub_rz(float __a, float __b); -__device__ int __nv_hadd(int __a, int __b); -__device__ float __nv_half2float(unsigned short __h); -__device__ double __nv_hiloint2double(int __a, int __b); -__device__ double __nv_hypot(double __a, double __b); -__device__ float __nv_hypotf(float __a, float __b); -__device__ int __nv_ilogb(double __a); -__device__ int __nv_ilogbf(float __a); -__device__ double __nv_int2double_rn(int __a); -__device__ float __nv_int2float_rd(int __a); -__device__ float __nv_int2float_rn(int __a); -__device__ float __nv_int2float_ru(int __a); -__device__ float __nv_int2float_rz(int __a); -__device__ float __nv_int_as_float(int __a); -__device__ int __nv_isfinited(double __a); -__device__ int __nv_isinfd(double __a); -__device__ int __nv_isinff(float __a); -__device__ int __nv_isnand(double __a); -__device__ int __nv_isnanf(float __a); -__device__ double __nv_j0(double __a); -__device__ float __nv_j0f(float __a); -__device__ double __nv_j1(double __a); -__device__ float __nv_j1f(float __a); -__device__ float __nv_jnf(int __a, float __b); -__device__ double __nv_jn(int __a, double __b); -__device__ double __nv_ldexp(double __a, int __b); -__device__ float __nv_ldexpf(float __a, int __b); -__device__ double __nv_lgamma(double __a); -__device__ float __nv_lgammaf(float __a); -__device__ double __nv_ll2double_rd(long long __a); -__device__ double __nv_ll2double_rn(long long __a); -__device__ double __nv_ll2double_ru(long long __a); -__device__ double __nv_ll2double_rz(long long __a); -__device__ float __nv_ll2float_rd(long long __a); -__device__ float __nv_ll2float_rn(long long __a); -__device__ float __nv_ll2float_ru(long long __a); -__device__ float __nv_ll2float_rz(long long __a); -__device__ long long __nv_llabs(long long __a); -__device__ long long __nv_llmax(long long __a, long long __b); -__device__ long long __nv_llmin(long long __a, long long __b); -__device__ long long __nv_llrint(double __a); -__device__ long long __nv_llrintf(float __a); -__device__ long long __nv_llround(double __a); -__device__ long long __nv_llroundf(float __a); -__device__ double __nv_log10(double __a); -__device__ float __nv_log10f(float __a); -__device__ double __nv_log1p(double __a); -__device__ float __nv_log1pf(float __a); -__device__ double __nv_log2(double __a); -__device__ float __nv_log2f(float __a); -__device__ double __nv_logb(double __a); -__device__ float __nv_logbf(float __a); -__device__ double __nv_log(double __a); -__device__ float __nv_logf(float __a); -__device__ double __nv_longlong_as_double(long long __a); -__device__ int __nv_max(int __a, int __b); -__device__ int __nv_min(int __a, int __b); -__device__ double __nv_modf(double __a, double *__b); -__device__ float __nv_modff(float __a, float *__b); -__device__ int __nv_mul24(int __a, int __b); -__device__ long long __nv_mul64hi(long long __a, long long __b); -__device__ int __nv_mulhi(int __a, int __b); -__device__ double __nv_nan(const signed char *__a); -__device__ float __nv_nanf(const signed char *__a); -__device__ double __nv_nearbyint(double __a); -__device__ float __nv_nearbyintf(float __a); -__device__ double __nv_nextafter(double __a, double __b); -__device__ float __nv_nextafterf(float __a, float __b); -__device__ double __nv_norm3d(double __a, double __b, double __c); -__device__ float __nv_norm3df(float __a, float __b, float __c); -__device__ double __nv_norm4d(double __a, double __b, double __c, double __d); -__device__ float __nv_norm4df(float __a, float __b, float __c, float __d); -__device__ double __nv_normcdf(double __a); -__device__ float __nv_normcdff(float __a); -__device__ double __nv_normcdfinv(double __a); -__device__ float __nv_normcdfinvf(float __a); -__device__ float __nv_normf(int __a, const float *__b); -__device__ double __nv_norm(int __a, const double *__b); -__device__ int __nv_popc(int __a); -__device__ int __nv_popcll(long long __a); -__device__ double __nv_pow(double __a, double __b); -__device__ float __nv_powf(float __a, float __b); -__device__ double __nv_powi(double __a, int __b); -__device__ float __nv_powif(float __a, int __b); -__device__ double __nv_rcbrt(double __a); -__device__ float __nv_rcbrtf(float __a); -__device__ double __nv_rcp64h(double __a); -__device__ double __nv_remainder(double __a, double __b); -__device__ float __nv_remainderf(float __a, float __b); -__device__ double __nv_remquo(double __a, double __b, int *__c); -__device__ float __nv_remquof(float __a, float __b, int *__c); -__device__ int __nv_rhadd(int __a, int __b); -__device__ double __nv_rhypot(double __a, double __b); -__device__ float __nv_rhypotf(float __a, float __b); -__device__ double __nv_rint(double __a); -__device__ float __nv_rintf(float __a); -__device__ double __nv_rnorm3d(double __a, double __b, double __c); -__device__ float __nv_rnorm3df(float __a, float __b, float __c); -__device__ double __nv_rnorm4d(double __a, double __b, double __c, double __d); -__device__ float __nv_rnorm4df(float __a, float __b, float __c, float __d); -__device__ float __nv_rnormf(int __a, const float *__b); -__device__ double __nv_rnorm(int __a, const double *__b); -__device__ double __nv_round(double __a); -__device__ float __nv_roundf(float __a); -__device__ double __nv_rsqrt(double __a); -__device__ float __nv_rsqrtf(float __a); -__device__ int __nv_sad(int __a, int __b, int __c); -__device__ float __nv_saturatef(float __a); -__device__ double __nv_scalbn(double __a, int __b); -__device__ float __nv_scalbnf(float __a, int __b); -__device__ int __nv_signbitd(double __a); -__device__ int __nv_signbitf(float __a); -__device__ void __nv_sincos(double __a, double *__b, double *__c); -__device__ void __nv_sincosf(float __a, float *__b, float *__c); -__device__ void __nv_sincospi(double __a, double *__b, double *__c); -__device__ void __nv_sincospif(float __a, float *__b, float *__c); -__device__ double __nv_sin(double __a); -__device__ float __nv_sinf(float __a); -__device__ double __nv_sinh(double __a); -__device__ float __nv_sinhf(float __a); -__device__ double __nv_sinpi(double __a); -__device__ float __nv_sinpif(float __a); -__device__ double __nv_sqrt(double __a); -__device__ float __nv_sqrtf(float __a); -__device__ double __nv_tan(double __a); -__device__ float __nv_tanf(float __a); -__device__ double __nv_tanh(double __a); -__device__ float __nv_tanhf(float __a); -__device__ double __nv_tgamma(double __a); -__device__ float __nv_tgammaf(float __a); -__device__ double __nv_trunc(double __a); -__device__ float __nv_truncf(float __a); -__device__ int __nv_uhadd(unsigned int __a, unsigned int __b); -__device__ double __nv_uint2double_rn(unsigned int __i); -__device__ float __nv_uint2float_rd(unsigned int __a); -__device__ float __nv_uint2float_rn(unsigned int __a); -__device__ float __nv_uint2float_ru(unsigned int __a); -__device__ float __nv_uint2float_rz(unsigned int __a); -__device__ float __nv_uint_as_float(unsigned int __a); -__device__ double __nv_ull2double_rd(unsigned long long __a); -__device__ double __nv_ull2double_rn(unsigned long long __a); -__device__ double __nv_ull2double_ru(unsigned long long __a); -__device__ double __nv_ull2double_rz(unsigned long long __a); -__device__ float __nv_ull2float_rd(unsigned long long __a); -__device__ float __nv_ull2float_rn(unsigned long long __a); -__device__ float __nv_ull2float_ru(unsigned long long __a); -__device__ float __nv_ull2float_rz(unsigned long long __a); -__device__ unsigned long long __nv_ullmax(unsigned long long __a, +#if defined(_OPENMP) +#define __DEVICE__ +#elif defined(__CUDA__) +#define __DEVICE__ __device__ +#endif + +__DEVICE__ int __nv_abs(int __a); +__DEVICE__ double __nv_acos(double __a); +__DEVICE__ float __nv_acosf(float __a); +__DEVICE__ double __nv_acosh(double __a); +__DEVICE__ float __nv_acoshf(float __a); +__DEVICE__ double __nv_asin(double __a); +__DEVICE__ float __nv_asinf(float __a); +__DEVICE__ double __nv_asinh(double __a); +__DEVICE__ float __nv_asinhf(float __a); +__DEVICE__ double __nv_atan2(double __a, double __b); +__DEVICE__ float __nv_atan2f(float __a, float __b); +__DEVICE__ double __nv_atan(double __a); +__DEVICE__ float __nv_atanf(float __a); +__DEVICE__ double __nv_atanh(double __a); +__DEVICE__ float __nv_atanhf(float __a); +__DEVICE__ int __nv_brev(int __a); +__DEVICE__ long long __nv_brevll(long long __a); +__DEVICE__ int __nv_byte_perm(int __a, int __b, int __c); +__DEVICE__ double __nv_cbrt(double __a); +__DEVICE__ float __nv_cbrtf(float __a); +__DEVICE__ double __nv_ceil(double __a); +__DEVICE__ float __nv_ceilf(float __a); +__DEVICE__ int __nv_clz(int __a); +__DEVICE__ int __nv_clzll(long long __a); +__DEVICE__ double __nv_copysign(double __a, double __b); +__DEVICE__ float __nv_copysignf(float __a, float __b); +__DEVICE__ double __nv_cos(double __a); +__DEVICE__ float __nv_cosf(float __a); +__DEVICE__ double __nv_cosh(double __a); +__DEVICE__ float __nv_coshf(float __a); +__DEVICE__ double __nv_cospi(double __a); +__DEVICE__ float __nv_cospif(float __a); +__DEVICE__ double __nv_cyl_bessel_i0(double __a); +__DEVICE__ float __nv_cyl_bessel_i0f(float __a); +__DEVICE__ double __nv_cyl_bessel_i1(double __a); +__DEVICE__ float __nv_cyl_bessel_i1f(float __a); +__DEVICE__ double __nv_dadd_rd(double __a, double __b); +__DEVICE__ double __nv_dadd_rn(double __a, double __b); +__DEVICE__ double __nv_dadd_ru(double __a, double __b); +__DEVICE__ double __nv_dadd_rz(double __a, double __b); +__DEVICE__ double __nv_ddiv_rd(double __a, double __b); +__DEVICE__ double __nv_ddiv_rn(double __a, double __b); +__DEVICE__ double __nv_ddiv_ru(double __a, double __b); +__DEVICE__ double __nv_ddiv_rz(double __a, double __b); +__DEVICE__ double __nv_dmul_rd(double __a, double __b); +__DEVICE__ double __nv_dmul_rn(double __a, double __b); +__DEVICE__ double __nv_dmul_ru(double __a, double __b); +__DEVICE__ double __nv_dmul_rz(double __a, double __b); +__DEVICE__ float __nv_double2float_rd(double __a); +__DEVICE__ float __nv_double2float_rn(double __a); +__DEVICE__ float __nv_double2float_ru(double __a); +__DEVICE__ float __nv_double2float_rz(double __a); +__DEVICE__ int __nv_double2hiint(double __a); +__DEVICE__ int __nv_double2int_rd(double __a); +__DEVICE__ int __nv_double2int_rn(double __a); +__DEVICE__ int __nv_double2int_ru(double __a); +__DEVICE__ int __nv_double2int_rz(double __a); +__DEVICE__ long long __nv_double2ll_rd(double __a); +__DEVICE__ long long __nv_double2ll_rn(double __a); +__DEVICE__ long long __nv_double2ll_ru(double __a); +__DEVICE__ long long __nv_double2ll_rz(double __a); +__DEVICE__ int __nv_double2loint(double __a); +__DEVICE__ unsigned int __nv_double2uint_rd(double __a); +__DEVICE__ unsigned int __nv_double2uint_rn(double __a); +__DEVICE__ unsigned int __nv_double2uint_ru(double __a); +__DEVICE__ unsigned int __nv_double2uint_rz(double __a); +__DEVICE__ unsigned long long __nv_double2ull_rd(double __a); +__DEVICE__ unsigned long long __nv_double2ull_rn(double __a); +__DEVICE__ unsigned long long __nv_double2ull_ru(double __a); +__DEVICE__ unsigned long long __nv_double2ull_rz(double __a); +__DEVICE__ unsigned long long __nv_double_as_longlong(double __a); +__DEVICE__ double __nv_drcp_rd(double __a); +__DEVICE__ double __nv_drcp_rn(double __a); +__DEVICE__ double __nv_drcp_ru(double __a); +__DEVICE__ double __nv_drcp_rz(double __a); +__DEVICE__ double __nv_dsqrt_rd(double __a); +__DEVICE__ double __nv_dsqrt_rn(double __a); +__DEVICE__ double __nv_dsqrt_ru(double __a); +__DEVICE__ double __nv_dsqrt_rz(double __a); +__DEVICE__ double __nv_dsub_rd(double __a, double __b); +__DEVICE__ double __nv_dsub_rn(double __a, double __b); +__DEVICE__ double __nv_dsub_ru(double __a, double __b); +__DEVICE__ double __nv_dsub_rz(double __a, double __b); +__DEVICE__ double __nv_erfc(double __a); +__DEVICE__ float __nv_erfcf(float __a); +__DEVICE__ double __nv_erfcinv(double __a); +__DEVICE__ float __nv_erfcinvf(float __a); +__DEVICE__ double __nv_erfcx(double __a); +__DEVICE__ float __nv_erfcxf(float __a); +__DEVICE__ double __nv_erf(double __a); +__DEVICE__ float __nv_erff(float __a); +__DEVICE__ double __nv_erfinv(double __a); +__DEVICE__ float __nv_erfinvf(float __a); +__DEVICE__ double __nv_exp10(double __a); +__DEVICE__ float __nv_exp10f(float __a); +__DEVICE__ double __nv_exp2(double __a); +__DEVICE__ float __nv_exp2f(float __a); +__DEVICE__ double __nv_exp(double __a); +__DEVICE__ float __nv_expf(float __a); +__DEVICE__ double __nv_expm1(double __a); +__DEVICE__ float __nv_expm1f(float __a); +__DEVICE__ double __nv_fabs(double __a); +__DEVICE__ float __nv_fabsf(float __a); +__DEVICE__ float __nv_fadd_rd(float __a, float __b); +__DEVICE__ float __nv_fadd_rn(float __a, float __b); +__DEVICE__ float __nv_fadd_ru(float __a, float __b); +__DEVICE__ float __nv_fadd_rz(float __a, float __b); +__DEVICE__ float __nv_fast_cosf(float __a); +__DEVICE__ float __nv_fast_exp10f(float __a); +__DEVICE__ float __nv_fast_expf(float __a); +__DEVICE__ float __nv_fast_fdividef(float __a, float __b); +__DEVICE__ float __nv_fast_log10f(float __a); +__DEVICE__ float __nv_fast_log2f(float __a); +__DEVICE__ float __nv_fast_logf(float __a); +__DEVICE__ float __nv_fast_powf(float __a, float __b); +__DEVICE__ void __nv_fast_sincosf(float __a, float *__s, float *__c); +__DEVICE__ float __nv_fast_sinf(float __a); +__DEVICE__ float __nv_fast_tanf(float __a); +__DEVICE__ double __nv_fdim(double __a, double __b); +__DEVICE__ float __nv_fdimf(float __a, float __b); +__DEVICE__ float __nv_fdiv_rd(float __a, float __b); +__DEVICE__ float __nv_fdiv_rn(float __a, float __b); +__DEVICE__ float __nv_fdiv_ru(float __a, float __b); +__DEVICE__ float __nv_fdiv_rz(float __a, float __b); +__DEVICE__ int __nv_ffs(int __a); +__DEVICE__ int __nv_ffsll(long long __a); +__DEVICE__ int __nv_finitef(float __a); +__DEVICE__ unsigned short __nv_float2half_rn(float __a); +__DEVICE__ int __nv_float2int_rd(float __a); +__DEVICE__ int __nv_float2int_rn(float __a); +__DEVICE__ int __nv_float2int_ru(float __a); +__DEVICE__ int __nv_float2int_rz(float __a); +__DEVICE__ long long __nv_float2ll_rd(float __a); +__DEVICE__ long long __nv_float2ll_rn(float __a); +__DEVICE__ long long __nv_float2ll_ru(float __a); +__DEVICE__ long long __nv_float2ll_rz(float __a); +__DEVICE__ unsigned int __nv_float2uint_rd(float __a); +__DEVICE__ unsigned int __nv_float2uint_rn(float __a); +__DEVICE__ unsigned int __nv_float2uint_ru(float __a); +__DEVICE__ unsigned int __nv_float2uint_rz(float __a); +__DEVICE__ unsigned long long __nv_float2ull_rd(float __a); +__DEVICE__ unsigned long long __nv_float2ull_rn(float __a); +__DEVICE__ unsigned long long __nv_float2ull_ru(float __a); +__DEVICE__ unsigned long long __nv_float2ull_rz(float __a); +__DEVICE__ int __nv_float_as_int(float __a); +__DEVICE__ unsigned int __nv_float_as_uint(float __a); +__DEVICE__ double __nv_floor(double __a); +__DEVICE__ float __nv_floorf(float __a); +__DEVICE__ double __nv_fma(double __a, double __b, double __c); +__DEVICE__ float __nv_fmaf(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_ieee_rd(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_ieee_rn(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_ieee_ru(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_ieee_rz(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_rd(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_rn(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_ru(float __a, float __b, float __c); +__DEVICE__ float __nv_fmaf_rz(float __a, float __b, float __c); +__DEVICE__ double __nv_fma_rd(double __a, double __b, double __c); +__DEVICE__ double __nv_fma_rn(double __a, double __b, double __c); +__DEVICE__ double __nv_fma_ru(double __a, double __b, double __c); +__DEVICE__ double __nv_fma_rz(double __a, double __b, double __c); +__DEVICE__ double __nv_fmax(double __a, double __b); +__DEVICE__ float __nv_fmaxf(float __a, float __b); +__DEVICE__ double __nv_fmin(double __a, double __b); +__DEVICE__ float __nv_fminf(float __a, float __b); +__DEVICE__ double __nv_fmod(double __a, double __b); +__DEVICE__ float __nv_fmodf(float __a, float __b); +__DEVICE__ float __nv_fmul_rd(float __a, float __b); +__DEVICE__ float __nv_fmul_rn(float __a, float __b); +__DEVICE__ float __nv_fmul_ru(float __a, float __b); +__DEVICE__ float __nv_fmul_rz(float __a, float __b); +__DEVICE__ float __nv_frcp_rd(float __a); +__DEVICE__ float __nv_frcp_rn(float __a); +__DEVICE__ float __nv_frcp_ru(float __a); +__DEVICE__ float __nv_frcp_rz(float __a); +__DEVICE__ double __nv_frexp(double __a, int *__b); +__DEVICE__ float __nv_frexpf(float __a, int *__b); +__DEVICE__ float __nv_frsqrt_rn(float __a); +__DEVICE__ float __nv_fsqrt_rd(float __a); +__DEVICE__ float __nv_fsqrt_rn(float __a); +__DEVICE__ float __nv_fsqrt_ru(float __a); +__DEVICE__ float __nv_fsqrt_rz(float __a); +__DEVICE__ float __nv_fsub_rd(float __a, float __b); +__DEVICE__ float __nv_fsub_rn(float __a, float __b); +__DEVICE__ float __nv_fsub_ru(float __a, float __b); +__DEVICE__ float __nv_fsub_rz(float __a, float __b); +__DEVICE__ int __nv_hadd(int __a, int __b); +__DEVICE__ float __nv_half2float(unsigned short __h); +__DEVICE__ double __nv_hiloint2double(int __a, int __b); +__DEVICE__ double __nv_hypot(double __a, double __b); +__DEVICE__ float __nv_hypotf(float __a, float __b); +__DEVICE__ int __nv_ilogb(double __a); +__DEVICE__ int __nv_ilogbf(float __a); +__DEVICE__ double __nv_int2double_rn(int __a); +__DEVICE__ float __nv_int2float_rd(int __a); +__DEVICE__ float __nv_int2float_rn(int __a); +__DEVICE__ float __nv_int2float_ru(int __a); +__DEVICE__ float __nv_int2float_rz(int __a); +__DEVICE__ float __nv_int_as_float(int __a); +__DEVICE__ int __nv_isfinited(double __a); +__DEVICE__ int __nv_isinfd(double __a); +__DEVICE__ int __nv_isinff(float __a); +__DEVICE__ int __nv_isnand(double __a); +__DEVICE__ int __nv_isnanf(float __a); +__DEVICE__ double __nv_j0(double __a); +__DEVICE__ float __nv_j0f(float __a); +__DEVICE__ double __nv_j1(double __a); +__DEVICE__ float __nv_j1f(float __a); +__DEVICE__ float __nv_jnf(int __a, float __b); +__DEVICE__ double __nv_jn(int __a, double __b); +__DEVICE__ double __nv_ldexp(double __a, int __b); +__DEVICE__ float __nv_ldexpf(float __a, int __b); +__DEVICE__ double __nv_lgamma(double __a); +__DEVICE__ float __nv_lgammaf(float __a); +__DEVICE__ double __nv_ll2double_rd(long long __a); +__DEVICE__ double __nv_ll2double_rn(long long __a); +__DEVICE__ double __nv_ll2double_ru(long long __a); +__DEVICE__ double __nv_ll2double_rz(long long __a); +__DEVICE__ float __nv_ll2float_rd(long long __a); +__DEVICE__ float __nv_ll2float_rn(long long __a); +__DEVICE__ float __nv_ll2float_ru(long long __a); +__DEVICE__ float __nv_ll2float_rz(long long __a); +__DEVICE__ long long __nv_llabs(long long __a); +__DEVICE__ long long __nv_llmax(long long __a, long long __b); +__DEVICE__ long long __nv_llmin(long long __a, long long __b); +__DEVICE__ long long __nv_llrint(double __a); +__DEVICE__ long long __nv_llrintf(float __a); +__DEVICE__ long long __nv_llround(double __a); +__DEVICE__ long long __nv_llroundf(float __a); +__DEVICE__ double __nv_log10(double __a); +__DEVICE__ float __nv_log10f(float __a); +__DEVICE__ double __nv_log1p(double __a); +__DEVICE__ float __nv_log1pf(float __a); +__DEVICE__ double __nv_log2(double __a); +__DEVICE__ float __nv_log2f(float __a); +__DEVICE__ double __nv_logb(double __a); +__DEVICE__ float __nv_logbf(float __a); +__DEVICE__ double __nv_log(double __a); +__DEVICE__ float __nv_logf(float __a); +__DEVICE__ double __nv_longlong_as_double(long long __a); +__DEVICE__ int __nv_max(int __a, int __b); +__DEVICE__ int __nv_min(int __a, int __b); +__DEVICE__ double __nv_modf(double __a, double *__b); +__DEVICE__ float __nv_modff(float __a, float *__b); +__DEVICE__ int __nv_mul24(int __a, int __b); +__DEVICE__ long long __nv_mul64hi(long long __a, long long __b); +__DEVICE__ int __nv_mulhi(int __a, int __b); +__DEVICE__ double __nv_nan(const signed char *__a); +__DEVICE__ float __nv_nanf(const signed char *__a); +__DEVICE__ double __nv_nearbyint(double __a); +__DEVICE__ float __nv_nearbyintf(float __a); +__DEVICE__ double __nv_nextafter(double __a, double __b); +__DEVICE__ float __nv_nextafterf(float __a, float __b); +__DEVICE__ double __nv_norm3d(double __a, double __b, double __c); +__DEVICE__ float __nv_norm3df(float __a, float __b, float __c); +__DEVICE__ double __nv_norm4d(double __a, double __b, double __c, double __d); +__DEVICE__ float __nv_norm4df(float __a, float __b, float __c, float __d); +__DEVICE__ double __nv_normcdf(double __a); +__DEVICE__ float __nv_normcdff(float __a); +__DEVICE__ double __nv_normcdfinv(double __a); +__DEVICE__ float __nv_normcdfinvf(float __a); +__DEVICE__ float __nv_normf(int __a, const float *__b); +__DEVICE__ double __nv_norm(int __a, const double *__b); +__DEVICE__ int __nv_popc(int __a); +__DEVICE__ int __nv_popcll(long long __a); +__DEVICE__ double __nv_pow(double __a, double __b); +__DEVICE__ float __nv_powf(float __a, float __b); +__DEVICE__ double __nv_powi(double __a, int __b); +__DEVICE__ float __nv_powif(float __a, int __b); +__DEVICE__ double __nv_rcbrt(double __a); +__DEVICE__ float __nv_rcbrtf(float __a); +__DEVICE__ double __nv_rcp64h(double __a); +__DEVICE__ double __nv_remainder(double __a, double __b); +__DEVICE__ float __nv_remainderf(float __a, float __b); +__DEVICE__ double __nv_remquo(double __a, double __b, int *__c); +__DEVICE__ float __nv_remquof(float __a, float __b, int *__c); +__DEVICE__ int __nv_rhadd(int __a, int __b); +__DEVICE__ double __nv_rhypot(double __a, double __b); +__DEVICE__ float __nv_rhypotf(float __a, float __b); +__DEVICE__ double __nv_rint(double __a); +__DEVICE__ float __nv_rintf(float __a); +__DEVICE__ double __nv_rnorm3d(double __a, double __b, double __c); +__DEVICE__ float __nv_rnorm3df(float __a, float __b, float __c); +__DEVICE__ double __nv_rnorm4d(double __a, double __b, double __c, double __d); +__DEVICE__ float __nv_rnorm4df(float __a, float __b, float __c, float __d); +__DEVICE__ float __nv_rnormf(int __a, const float *__b); +__DEVICE__ double __nv_rnorm(int __a, const double *__b); +__DEVICE__ double __nv_round(double __a); +__DEVICE__ float __nv_roundf(float __a); +__DEVICE__ double __nv_rsqrt(double __a); +__DEVICE__ float __nv_rsqrtf(float __a); +__DEVICE__ int __nv_sad(int __a, int __b, int __c); +__DEVICE__ float __nv_saturatef(float __a); +__DEVICE__ double __nv_scalbn(double __a, int __b); +__DEVICE__ float __nv_scalbnf(float __a, int __b); +__DEVICE__ int __nv_signbitd(double __a); +__DEVICE__ int __nv_signbitf(float __a); +__DEVICE__ void __nv_sincos(double __a, double *__b, double *__c); +__DEVICE__ void __nv_sincosf(float __a, float *__b, float *__c); +__DEVICE__ void __nv_sincospi(double __a, double *__b, double *__c); +__DEVICE__ void __nv_sincospif(float __a, float *__b, float *__c); +__DEVICE__ double __nv_sin(double __a); +__DEVICE__ float __nv_sinf(float __a); +__DEVICE__ double __nv_sinh(double __a); +__DEVICE__ float __nv_sinhf(float __a); +__DEVICE__ double __nv_sinpi(double __a); +__DEVICE__ float __nv_sinpif(float __a); +__DEVICE__ double __nv_sqrt(double __a); +__DEVICE__ float __nv_sqrtf(float __a); +__DEVICE__ double __nv_tan(double __a); +__DEVICE__ float __nv_tanf(float __a); +__DEVICE__ double __nv_tanh(double __a); +__DEVICE__ float __nv_tanhf(float __a); +__DEVICE__ double __nv_tgamma(double __a); +__DEVICE__ float __nv_tgammaf(float __a); +__DEVICE__ double __nv_trunc(double __a); +__DEVICE__ float __nv_truncf(float __a); +__DEVICE__ int __nv_uhadd(unsigned int __a, unsigned int __b); +__DEVICE__ double __nv_uint2double_rn(unsigned int __i); +__DEVICE__ float __nv_uint2float_rd(unsigned int __a); +__DEVICE__ float __nv_uint2float_rn(unsigned int __a); +__DEVICE__ float __nv_uint2float_ru(unsigned int __a); +__DEVICE__ float __nv_uint2float_rz(unsigned int __a); +__DEVICE__ float __nv_uint_as_float(unsigned int __a); +__DEVICE__ double __nv_ull2double_rd(unsigned long long __a); +__DEVICE__ double __nv_ull2double_rn(unsigned long long __a); +__DEVICE__ double __nv_ull2double_ru(unsigned long long __a); +__DEVICE__ double __nv_ull2double_rz(unsigned long long __a); +__DEVICE__ float __nv_ull2float_rd(unsigned long long __a); +__DEVICE__ float __nv_ull2float_rn(unsigned long long __a); +__DEVICE__ float __nv_ull2float_ru(unsigned long long __a); +__DEVICE__ float __nv_ull2float_rz(unsigned long long __a); +__DEVICE__ unsigned long long __nv_ullmax(unsigned long long __a, unsigned long long __b); -__device__ unsigned long long __nv_ullmin(unsigned long long __a, +__DEVICE__ unsigned long long __nv_ullmin(unsigned long long __a, unsigned long long __b); -__device__ unsigned int __nv_umax(unsigned int __a, unsigned int __b); -__device__ unsigned int __nv_umin(unsigned int __a, unsigned int __b); -__device__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b); -__device__ unsigned long long __nv_umul64hi(unsigned long long __a, +__DEVICE__ unsigned int __nv_umax(unsigned int __a, unsigned int __b); +__DEVICE__ unsigned int __nv_umin(unsigned int __a, unsigned int __b); +__DEVICE__ unsigned int __nv_umul24(unsigned int __a, unsigned int __b); +__DEVICE__ unsigned long long __nv_umul64hi(unsigned long long __a, unsigned long long __b); -__device__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b); -__device__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b); -__device__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, +__DEVICE__ unsigned int __nv_umulhi(unsigned int __a, unsigned int __b); +__DEVICE__ unsigned int __nv_urhadd(unsigned int __a, unsigned int __b); +__DEVICE__ unsigned int __nv_usad(unsigned int __a, unsigned int __b, unsigned int __c); #if CUDA_VERSION >= 9000 && CUDA_VERSION < 9020 -__device__ int __nv_vabs2(int __a); -__device__ int __nv_vabs4(int __a); -__device__ int __nv_vabsdiffs2(int __a, int __b); -__device__ int __nv_vabsdiffs4(int __a, int __b); -__device__ int __nv_vabsdiffu2(int __a, int __b); -__device__ int __nv_vabsdiffu4(int __a, int __b); -__device__ int __nv_vabsss2(int __a); -__device__ int __nv_vabsss4(int __a); -__device__ int __nv_vadd2(int __a, int __b); -__device__ int __nv_vadd4(int __a, int __b); -__device__ int __nv_vaddss2(int __a, int __b); -__device__ int __nv_vaddss4(int __a, int __b); -__device__ int __nv_vaddus2(int __a, int __b); -__device__ int __nv_vaddus4(int __a, int __b); -__device__ int __nv_vavgs2(int __a, int __b); -__device__ int __nv_vavgs4(int __a, int __b); -__device__ int __nv_vavgu2(int __a, int __b); -__device__ int __nv_vavgu4(int __a, int __b); -__device__ int __nv_vcmpeq2(int __a, int __b); -__device__ int __nv_vcmpeq4(int __a, int __b); -__device__ int __nv_vcmpges2(int __a, int __b); -__device__ int __nv_vcmpges4(int __a, int __b); -__device__ int __nv_vcmpgeu2(int __a, int __b); -__device__ int __nv_vcmpgeu4(int __a, int __b); -__device__ int __nv_vcmpgts2(int __a, int __b); -__device__ int __nv_vcmpgts4(int __a, int __b); -__device__ int __nv_vcmpgtu2(int __a, int __b); -__device__ int __nv_vcmpgtu4(int __a, int __b); -__device__ int __nv_vcmples2(int __a, int __b); -__device__ int __nv_vcmples4(int __a, int __b); -__device__ int __nv_vcmpleu2(int __a, int __b); -__device__ int __nv_vcmpleu4(int __a, int __b); -__device__ int __nv_vcmplts2(int __a, int __b); -__device__ int __nv_vcmplts4(int __a, int __b); -__device__ int __nv_vcmpltu2(int __a, int __b); -__device__ int __nv_vcmpltu4(int __a, int __b); -__device__ int __nv_vcmpne2(int __a, int __b); -__device__ int __nv_vcmpne4(int __a, int __b); -__device__ int __nv_vhaddu2(int __a, int __b); -__device__ int __nv_vhaddu4(int __a, int __b); -__device__ int __nv_vmaxs2(int __a, int __b); -__device__ int __nv_vmaxs4(int __a, int __b); -__device__ int __nv_vmaxu2(int __a, int __b); -__device__ int __nv_vmaxu4(int __a, int __b); -__device__ int __nv_vmins2(int __a, int __b); -__device__ int __nv_vmins4(int __a, int __b); -__device__ int __nv_vminu2(int __a, int __b); -__device__ int __nv_vminu4(int __a, int __b); -__device__ int __nv_vneg2(int __a); -__device__ int __nv_vneg4(int __a); -__device__ int __nv_vnegss2(int __a); -__device__ int __nv_vnegss4(int __a); -__device__ int __nv_vsads2(int __a, int __b); -__device__ int __nv_vsads4(int __a, int __b); -__device__ int __nv_vsadu2(int __a, int __b); -__device__ int __nv_vsadu4(int __a, int __b); -__device__ int __nv_vseteq2(int __a, int __b); -__device__ int __nv_vseteq4(int __a, int __b); -__device__ int __nv_vsetges2(int __a, int __b); -__device__ int __nv_vsetges4(int __a, int __b); -__device__ int __nv_vsetgeu2(int __a, int __b); -__device__ int __nv_vsetgeu4(int __a, int __b); -__device__ int __nv_vsetgts2(int __a, int __b); -__device__ int __nv_vsetgts4(int __a, int __b); -__device__ int __nv_vsetgtu2(int __a, int __b); -__device__ int __nv_vsetgtu4(int __a, int __b); -__device__ int __nv_vsetles2(int __a, int __b); -__device__ int __nv_vsetles4(int __a, int __b); -__device__ int __nv_vsetleu2(int __a, int __b); -__device__ int __nv_vsetleu4(int __a, int __b); -__device__ int __nv_vsetlts2(int __a, int __b); -__device__ int __nv_vsetlts4(int __a, int __b); -__device__ int __nv_vsetltu2(int __a, int __b); -__device__ int __nv_vsetltu4(int __a, int __b); -__device__ int __nv_vsetne2(int __a, int __b); -__device__ int __nv_vsetne4(int __a, int __b); -__device__ int __nv_vsub2(int __a, int __b); -__device__ int __nv_vsub4(int __a, int __b); -__device__ int __nv_vsubss2(int __a, int __b); -__device__ int __nv_vsubss4(int __a, int __b); -__device__ int __nv_vsubus2(int __a, int __b); -__device__ int __nv_vsubus4(int __a, int __b); +__DEVICE__ int __nv_vabs2(int __a); +__DEVICE__ int __nv_vabs4(int __a); +__DEVICE__ int __nv_vabsdiffs2(int __a, int __b); +__DEVICE__ int __nv_vabsdiffs4(int __a, int __b); +__DEVICE__ int __nv_vabsdiffu2(int __a, int __b); +__DEVICE__ int __nv_vabsdiffu4(int __a, int __b); +__DEVICE__ int __nv_vabsss2(int __a); +__DEVICE__ int __nv_vabsss4(int __a); +__DEVICE__ int __nv_vadd2(int __a, int __b); +__DEVICE__ int __nv_vadd4(int __a, int __b); +__DEVICE__ int __nv_vaddss2(int __a, int __b); +__DEVICE__ int __nv_vaddss4(int __a, int __b); +__DEVICE__ int __nv_vaddus2(int __a, int __b); +__DEVICE__ int __nv_vaddus4(int __a, int __b); +__DEVICE__ int __nv_vavgs2(int __a, int __b); +__DEVICE__ int __nv_vavgs4(int __a, int __b); +__DEVICE__ int __nv_vavgu2(int __a, int __b); +__DEVICE__ int __nv_vavgu4(int __a, int __b); +__DEVICE__ int __nv_vcmpeq2(int __a, int __b); +__DEVICE__ int __nv_vcmpeq4(int __a, int __b); +__DEVICE__ int __nv_vcmpges2(int __a, int __b); +__DEVICE__ int __nv_vcmpges4(int __a, int __b); +__DEVICE__ int __nv_vcmpgeu2(int __a, int __b); +__DEVICE__ int __nv_vcmpgeu4(int __a, int __b); +__DEVICE__ int __nv_vcmpgts2(int __a, int __b); +__DEVICE__ int __nv_vcmpgts4(int __a, int __b); +__DEVICE__ int __nv_vcmpgtu2(int __a, int __b); +__DEVICE__ int __nv_vcmpgtu4(int __a, int __b); +__DEVICE__ int __nv_vcmples2(int __a, int __b); +__DEVICE__ int __nv_vcmples4(int __a, int __b); +__DEVICE__ int __nv_vcmpleu2(int __a, int __b); +__DEVICE__ int __nv_vcmpleu4(int __a, int __b); +__DEVICE__ int __nv_vcmplts2(int __a, int __b); +__DEVICE__ int __nv_vcmplts4(int __a, int __b); +__DEVICE__ int __nv_vcmpltu2(int __a, int __b); +__DEVICE__ int __nv_vcmpltu4(int __a, int __b); +__DEVICE__ int __nv_vcmpne2(int __a, int __b); +__DEVICE__ int __nv_vcmpne4(int __a, int __b); +__DEVICE__ int __nv_vhaddu2(int __a, int __b); +__DEVICE__ int __nv_vhaddu4(int __a, int __b); +__DEVICE__ int __nv_vmaxs2(int __a, int __b); +__DEVICE__ int __nv_vmaxs4(int __a, int __b); +__DEVICE__ int __nv_vmaxu2(int __a, int __b); +__DEVICE__ int __nv_vmaxu4(int __a, int __b); +__DEVICE__ int __nv_vmins2(int __a, int __b); +__DEVICE__ int __nv_vmins4(int __a, int __b); +__DEVICE__ int __nv_vminu2(int __a, int __b); +__DEVICE__ int __nv_vminu4(int __a, int __b); +__DEVICE__ int __nv_vneg2(int __a); +__DEVICE__ int __nv_vneg4(int __a); +__DEVICE__ int __nv_vnegss2(int __a); +__DEVICE__ int __nv_vnegss4(int __a); +__DEVICE__ int __nv_vsads2(int __a, int __b); +__DEVICE__ int __nv_vsads4(int __a, int __b); +__DEVICE__ int __nv_vsadu2(int __a, int __b); +__DEVICE__ int __nv_vsadu4(int __a, int __b); +__DEVICE__ int __nv_vseteq2(int __a, int __b); +__DEVICE__ int __nv_vseteq4(int __a, int __b); +__DEVICE__ int __nv_vsetges2(int __a, int __b); +__DEVICE__ int __nv_vsetges4(int __a, int __b); +__DEVICE__ int __nv_vsetgeu2(int __a, int __b); +__DEVICE__ int __nv_vsetgeu4(int __a, int __b); +__DEVICE__ int __nv_vsetgts2(int __a, int __b); +__DEVICE__ int __nv_vsetgts4(int __a, int __b); +__DEVICE__ int __nv_vsetgtu2(int __a, int __b); +__DEVICE__ int __nv_vsetgtu4(int __a, int __b); +__DEVICE__ int __nv_vsetles2(int __a, int __b); +__DEVICE__ int __nv_vsetles4(int __a, int __b); +__DEVICE__ int __nv_vsetleu2(int __a, int __b); +__DEVICE__ int __nv_vsetleu4(int __a, int __b); +__DEVICE__ int __nv_vsetlts2(int __a, int __b); +__DEVICE__ int __nv_vsetlts4(int __a, int __b); +__DEVICE__ int __nv_vsetltu2(int __a, int __b); +__DEVICE__ int __nv_vsetltu4(int __a, int __b); +__DEVICE__ int __nv_vsetne2(int __a, int __b); +__DEVICE__ int __nv_vsetne4(int __a, int __b); +__DEVICE__ int __nv_vsub2(int __a, int __b); +__DEVICE__ int __nv_vsub4(int __a, int __b); +__DEVICE__ int __nv_vsubss2(int __a, int __b); +__DEVICE__ int __nv_vsubss4(int __a, int __b); +__DEVICE__ int __nv_vsubus2(int __a, int __b); +__DEVICE__ int __nv_vsubus4(int __a, int __b); #endif // CUDA_VERSION -__device__ double __nv_y0(double __a); -__device__ float __nv_y0f(float __a); -__device__ double __nv_y1(double __a); -__device__ float __nv_y1f(float __a); -__device__ float __nv_ynf(int __a, float __b); -__device__ double __nv_yn(int __a, double __b); +__DEVICE__ double __nv_y0(double __a); +__DEVICE__ float __nv_y0f(float __a); +__DEVICE__ double __nv_y1(double __a); +__DEVICE__ float __nv_y1f(float __a); +__DEVICE__ float __nv_ynf(int __a, float __b); +__DEVICE__ double __nv_yn(int __a, double __b); +#if defined(__cplusplus) } // extern "C" +#endif #endif // __CLANG_CUDA_LIBDEVICE_DECLARES_H__ diff --git a/lib/include/__clang_cuda_math_forward_declares.h b/lib/include/__clang_cuda_math_forward_declares.h index c31b1f4cda..0afe4db556 100644 --- a/lib/include/__clang_cuda_math_forward_declares.h +++ b/lib/include/__clang_cuda_math_forward_declares.h @@ -1,22 +1,8 @@ /*===- __clang_math_forward_declares.h - Prototypes of __device__ math fns --=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -34,14 +20,37 @@ // would preclude the use of our own __device__ overloads for these functions. #pragma push_macro("__DEVICE__") +#ifdef _OPENMP +#define __DEVICE__ static __inline__ __attribute__((always_inline)) +#else #define __DEVICE__ \ static __inline__ __attribute__((always_inline)) __attribute__((device)) +#endif -__DEVICE__ double abs(double); -__DEVICE__ float abs(float); -__DEVICE__ int abs(int); +// For C++ 17 we need to include noexcept attribute to be compatible +// with the header-defined version. This may be removed once +// variant is supported. +#if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L +#define __NOEXCEPT noexcept +#else +#define __NOEXCEPT +#endif + +#if !(defined(_OPENMP) && defined(__cplusplus)) __DEVICE__ long abs(long); __DEVICE__ long long abs(long long); +__DEVICE__ double abs(double); +__DEVICE__ float abs(float); +#endif +// While providing the CUDA declarations and definitions for math functions, +// we may manually define additional functions. +// TODO: Once variant is supported the additional functions will have +// to be removed. +#if defined(_OPENMP) && defined(__cplusplus) +__DEVICE__ const double abs(const double); +__DEVICE__ const float abs(const float); +#endif +__DEVICE__ int abs(int) __NOEXCEPT; __DEVICE__ double acos(double); __DEVICE__ float acos(float); __DEVICE__ double acosh(double); @@ -76,8 +85,8 @@ __DEVICE__ double exp(double); __DEVICE__ float exp(float); __DEVICE__ double expm1(double); __DEVICE__ float expm1(float); -__DEVICE__ double fabs(double); -__DEVICE__ float fabs(float); +__DEVICE__ double fabs(double) __NOEXCEPT; +__DEVICE__ float fabs(float) __NOEXCEPT; __DEVICE__ double fdim(double, double); __DEVICE__ float fdim(float, float); __DEVICE__ double floor(double); @@ -98,12 +107,18 @@ __DEVICE__ double hypot(double, double); __DEVICE__ float hypot(float, float); __DEVICE__ int ilogb(double); __DEVICE__ int ilogb(float); +#ifdef _MSC_VER +__DEVICE__ bool isfinite(long double); +#endif __DEVICE__ bool isfinite(double); __DEVICE__ bool isfinite(float); __DEVICE__ bool isgreater(double, double); __DEVICE__ bool isgreaterequal(double, double); __DEVICE__ bool isgreaterequal(float, float); __DEVICE__ bool isgreater(float, float); +#ifdef _MSC_VER +__DEVICE__ bool isinf(long double); +#endif __DEVICE__ bool isinf(double); __DEVICE__ bool isinf(float); __DEVICE__ bool isless(double, double); @@ -112,18 +127,21 @@ __DEVICE__ bool islessequal(float, float); __DEVICE__ bool isless(float, float); __DEVICE__ bool islessgreater(double, double); __DEVICE__ bool islessgreater(float, float); +#ifdef _MSC_VER +__DEVICE__ bool isnan(long double); +#endif __DEVICE__ bool isnan(double); __DEVICE__ bool isnan(float); __DEVICE__ bool isnormal(double); __DEVICE__ bool isnormal(float); __DEVICE__ bool isunordered(double, double); __DEVICE__ bool isunordered(float, float); -__DEVICE__ long labs(long); +__DEVICE__ long labs(long) __NOEXCEPT; __DEVICE__ double ldexp(double, int); __DEVICE__ float ldexp(float, int); __DEVICE__ double lgamma(double); __DEVICE__ float lgamma(float); -__DEVICE__ long long llabs(long long); +__DEVICE__ long long llabs(long long) __NOEXCEPT; __DEVICE__ long long llrint(double); __DEVICE__ long long llrint(float); __DEVICE__ double log10(double); @@ -134,6 +152,9 @@ __DEVICE__ double log2(double); __DEVICE__ float log2(float); __DEVICE__ double logb(double); __DEVICE__ float logb(float); +#if defined(_OPENMP) && defined(__cplusplus) +__DEVICE__ long double log(long double); +#endif __DEVICE__ double log(double); __DEVICE__ float log(float); __DEVICE__ long lrint(double); @@ -281,6 +302,7 @@ _GLIBCXX_END_NAMESPACE_VERSION } // namespace std #endif +#undef __NOEXCEPT #pragma pop_macro("__DEVICE__") #endif diff --git a/lib/include/__clang_cuda_runtime_wrapper.h b/lib/include/__clang_cuda_runtime_wrapper.h index f05c0454a8..3e362dd967 100644 --- a/lib/include/__clang_cuda_runtime_wrapper.h +++ b/lib/include/__clang_cuda_runtime_wrapper.h @@ -1,22 +1,8 @@ /*===---- __clang_cuda_runtime_wrapper.h - CUDA runtime support -------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -62,7 +48,7 @@ #include "cuda.h" #if !defined(CUDA_VERSION) #error "cuda.h did not define CUDA_VERSION" -#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10000 +#elif CUDA_VERSION < 7000 || CUDA_VERSION > 10010 #error "Unsupported CUDA version!" #endif @@ -426,5 +412,15 @@ __device__ inline __cuda_builtin_gridDim_t::operator dim3() const { #pragma pop_macro("__USE_FAST_MATH__") #pragma pop_macro("__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__") +// CUDA runtime uses this undocumented function to access kernel launch +// configuration. The declaration is in crt/device_functions.h but that file +// includes a lot of other stuff we don't want. Instead, we'll provide our own +// declaration for it here. +#if CUDA_VERSION >= 9020 +extern "C" unsigned __cudaPushCallConfiguration(dim3 gridDim, dim3 blockDim, + size_t sharedMem = 0, + void *stream = 0); +#endif + #endif // __CUDA__ #endif // __CLANG_CUDA_RUNTIME_WRAPPER_H__ diff --git a/lib/include/__stddef_max_align_t.h b/lib/include/__stddef_max_align_t.h index 1e10ca9865..e3b439285d 100644 --- a/lib/include/__stddef_max_align_t.h +++ b/lib/include/__stddef_max_align_t.h @@ -1,24 +1,8 @@ /*===---- __stddef_max_align_t.h - Definition of max_align_t for modules ---=== * - * Copyright (c) 2014 Chandler Carruth - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/__wmmintrin_aes.h b/lib/include/__wmmintrin_aes.h index 70c355efc4..f540319c7f 100644 --- a/lib/include/__wmmintrin_aes.h +++ b/lib/include/__wmmintrin_aes.h @@ -1,22 +1,8 @@ /*===---- __wmmintrin_aes.h - AES intrinsics -------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/__wmmintrin_pclmul.h b/lib/include/__wmmintrin_pclmul.h index e0f928796a..fef4b93dbb 100644 --- a/lib/include/__wmmintrin_pclmul.h +++ b/lib/include/__wmmintrin_pclmul.h @@ -1,22 +1,8 @@ /*===---- __wmmintrin_pclmul.h - PCMUL intrinsics ---------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/adxintrin.h b/lib/include/adxintrin.h index d6c454db85..72b9ed08f4 100644 --- a/lib/include/adxintrin.h +++ b/lib/include/adxintrin.h @@ -1,22 +1,8 @@ /*===---- adxintrin.h - ADX intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/altivec.h b/lib/include/altivec.h index 2dc6adb900..4008440b2b 100644 --- a/lib/include/altivec.h +++ b/lib/include/altivec.h @@ -1,22 +1,8 @@ /*===---- altivec.h - Standard header for type generic math ---------------===*\ * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ diff --git a/lib/include/ammintrin.h b/lib/include/ammintrin.h index 680b4465ea..3806be6ebc 100644 --- a/lib/include/ammintrin.h +++ b/lib/include/ammintrin.h @@ -1,22 +1,8 @@ /*===---- ammintrin.h - SSE4a intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/arm64intr.h b/lib/include/arm64intr.h index be52283618..4943b2db69 100644 --- a/lib/include/arm64intr.h +++ b/lib/include/arm64intr.h @@ -1,22 +1,8 @@ /*===---- arm64intr.h - ARM64 Windows intrinsics -------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/arm_acle.h b/lib/include/arm_acle.h index ab25897982..096cc261af 100644 --- a/lib/include/arm_acle.h +++ b/lib/include/arm_acle.h @@ -1,22 +1,8 @@ /*===---- arm_acle.h - ARM Non-Neon intrinsics -----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -611,6 +597,14 @@ __crc32cd(uint32_t __a, uint64_t __b) { } #endif +/* Armv8.3-A Javascript conversion intrinsic */ +#if __ARM_64BIT_STATE && defined(__ARM_FEATURE_JCVT) +static __inline__ int32_t __attribute__((__always_inline__, __nodebug__)) +__jcvt(double __a) { + return __builtin_arm_jcvt(__a); +} +#endif + /* 10.1 Special register intrinsics */ #define __arm_rsr(sysreg) __builtin_arm_rsr(sysreg) #define __arm_rsr64(sysreg) __builtin_arm_rsr64(sysreg) @@ -619,6 +613,16 @@ __crc32cd(uint32_t __a, uint64_t __b) { #define __arm_wsr64(sysreg, v) __builtin_arm_wsr64(sysreg, v) #define __arm_wsrp(sysreg, v) __builtin_arm_wsrp(sysreg, v) +// Memory Tagging Extensions (MTE) Intrinsics +#if __ARM_FEATURE_MEMORY_TAGGING +#define __arm_mte_create_random_tag(__ptr, __mask) __builtin_arm_irg(__ptr, __mask) +#define __arm_mte_increment_tag(__ptr, __tag_offset) __builtin_arm_addg(__ptr, __tag_offset) +#define __arm_mte_exclude_tag(__ptr, __excluded) __builtin_arm_gmi(__ptr, __excluded) +#define __arm_mte_get_tag(__ptr) __builtin_arm_ldg(__ptr) +#define __arm_mte_set_tag(__ptr) __builtin_arm_stg(__ptr) +#define __arm_mte_ptrdiff(__ptra, __ptrb) __builtin_arm_subp(__ptra, __ptrb) +#endif + #if defined(__cplusplus) } #endif diff --git a/lib/include/arm_neon.h b/lib/include/arm_neon.h index d6765b36d6..694bdfc9ce 100644 --- a/lib/include/arm_neon.h +++ b/lib/include/arm_neon.h @@ -44247,13 +44247,13 @@ __ai float32x2_t vfms_f32(float32x2_t __p0, float32x2_t __p1, float32x2_t __p2) #endif #if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmlalq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_high_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else -__ai float32x4_t vfmlalq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); @@ -44262,7 +44262,7 @@ __ai float32x4_t vfmlalq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai float32x4_t __noswap_vfmlalq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t __noswap_vfmlalq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_high_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; @@ -44270,13 +44270,13 @@ __ai float32x4_t __noswap_vfmlalq_high_u32(float32x4_t __p0, float16x8_t __p1, f #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfmlal_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_high_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else -__ai float32x2_t vfmlal_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); @@ -44285,7 +44285,7 @@ __ai float32x2_t vfmlal_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai float32x2_t __noswap_vfmlal_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t __noswap_vfmlal_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_high_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; @@ -44293,13 +44293,13 @@ __ai float32x2_t __noswap_vfmlal_high_u32(float32x2_t __p0, float16x4_t __p1, fl #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmlalq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_low_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else -__ai float32x4_t vfmlalq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); @@ -44308,7 +44308,7 @@ __ai float32x4_t vfmlalq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai float32x4_t __noswap_vfmlalq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t __noswap_vfmlalq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlalq_low_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; @@ -44316,13 +44316,13 @@ __ai float32x4_t __noswap_vfmlalq_low_u32(float32x4_t __p0, float16x8_t __p1, fl #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfmlal_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_low_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else -__ai float32x2_t vfmlal_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); @@ -44331,7 +44331,7 @@ __ai float32x2_t vfmlal_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai float32x2_t __noswap_vfmlal_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t __noswap_vfmlal_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlal_low_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; @@ -44339,13 +44339,13 @@ __ai float32x2_t __noswap_vfmlal_low_u32(float32x2_t __p0, float16x4_t __p1, flo #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmlslq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_high_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else -__ai float32x4_t vfmlslq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); @@ -44354,7 +44354,7 @@ __ai float32x4_t vfmlslq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_ __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai float32x4_t __noswap_vfmlslq_high_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t __noswap_vfmlslq_high_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_high_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; @@ -44362,13 +44362,13 @@ __ai float32x4_t __noswap_vfmlslq_high_u32(float32x4_t __p0, float16x8_t __p1, f #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfmlsl_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_high_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else -__ai float32x2_t vfmlsl_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); @@ -44377,7 +44377,7 @@ __ai float32x2_t vfmlsl_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai float32x2_t __noswap_vfmlsl_high_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t __noswap_vfmlsl_high_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_high_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; @@ -44385,13 +44385,13 @@ __ai float32x2_t __noswap_vfmlsl_high_u32(float32x2_t __p0, float16x4_t __p1, fl #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x4_t vfmlslq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_low_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; } #else -__ai float32x4_t vfmlslq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); float16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); float16x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); @@ -44400,7 +44400,7 @@ __ai float32x4_t vfmlslq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); return __ret; } -__ai float32x4_t __noswap_vfmlslq_low_u32(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { +__ai float32x4_t __noswap_vfmlslq_low_f16(float32x4_t __p0, float16x8_t __p1, float16x8_t __p2) { float32x4_t __ret; __ret = (float32x4_t) __builtin_neon_vfmlslq_low_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 41); return __ret; @@ -44408,13 +44408,13 @@ __ai float32x4_t __noswap_vfmlslq_low_u32(float32x4_t __p0, float16x8_t __p1, fl #endif #ifdef __LITTLE_ENDIAN__ -__ai float32x2_t vfmlsl_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_low_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; } #else -__ai float32x2_t vfmlsl_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); float16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); float16x4_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 3, 2, 1, 0); @@ -44423,7 +44423,7 @@ __ai float32x2_t vfmlsl_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __ret = __builtin_shufflevector(__ret, __ret, 1, 0); return __ret; } -__ai float32x2_t __noswap_vfmlsl_low_u32(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { +__ai float32x2_t __noswap_vfmlsl_low_f16(float32x2_t __p0, float16x4_t __p1, float16x4_t __p2) { float32x2_t __ret; __ret = (float32x2_t) __builtin_neon_vfmlsl_low_v((int8x8_t)__p0, (int8x8_t)__p1, (int8x8_t)__p2, 9); return __ret; @@ -64095,15 +64095,15 @@ __ai uint8x16_t vqtbl1q_u8(uint8x16_t __p0, uint8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl1q_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64129,15 +64129,15 @@ __ai uint8x8_t vqtbl1_u8(uint8x16_t __p0, uint8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vqtbl1_s8(int8x16_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl1_s8(int8x16_t __p0, uint8x8_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl1_v((int8x16_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64203,17 +64203,17 @@ __ai uint8x16_t vqtbl2q_u8(uint8x16x2_t __p0, uint8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl2q_s8(int8x16x2_t __p0, uint8x16_t __p1) { int8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl2q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64241,17 +64241,17 @@ __ai uint8x8_t vqtbl2_u8(uint8x16x2_t __p0, uint8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl2_s8(int8x16x2_t __p0, uint8x8_t __p1) { int8x16x2_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl2_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64320,18 +64320,18 @@ __ai uint8x16_t vqtbl3q_u8(uint8x16x3_t __p0, uint8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl3q_s8(int8x16x3_t __p0, uint8x16_t __p1) { int8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl3q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64360,18 +64360,18 @@ __ai uint8x8_t vqtbl3_u8(uint8x16x3_t __p0, uint8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl3_s8(int8x16x3_t __p0, uint8x8_t __p1) { int8x16x3_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl3_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64443,19 +64443,19 @@ __ai uint8x16_t vqtbl4q_u8(uint8x16x4_t __p0, uint8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, int8x16_t __p1) { +__ai int8x16_t vqtbl4q_s8(int8x16x4_t __p0, uint8x16_t __p1) { int8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbl4q_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64485,19 +64485,19 @@ __ai uint8x8_t vqtbl4_u8(uint8x16x4_t __p0, uint8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__p0.val[0], (int8x16_t)__p0.val[1], (int8x16_t)__p0.val[2], (int8x16_t)__p0.val[3], (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, int8x8_t __p1) { +__ai int8x8_t vqtbl4_s8(int8x16x4_t __p0, uint8x8_t __p1) { int8x16x4_t __rev0; __rev0.val[0] = __builtin_shufflevector(__p0.val[0], __p0.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[1] = __builtin_shufflevector(__p0.val[1], __p0.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[2] = __builtin_shufflevector(__p0.val[2], __p0.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev0.val[3] = __builtin_shufflevector(__p0.val[3], __p0.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbl4_v((int8x16_t)__rev0.val[0], (int8x16_t)__rev0.val[1], (int8x16_t)__rev0.val[2], (int8x16_t)__rev0.val[3], (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64560,16 +64560,16 @@ __ai uint8x16_t vqtbx1q_u8(uint8x16_t __p0, uint8x16_t __p1, uint8x16_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__p0, (int8x16_t)__p1, (int8x16_t)__p2, 32); return __ret; } #else -__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx1q_s8(int8x16_t __p0, int8x16_t __p1, uint8x16_t __p2) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx1q_v((int8x16_t)__rev0, (int8x16_t)__rev1, (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64596,16 +64596,16 @@ __ai uint8x8_t vqtbx1_u8(uint8x8_t __p0, uint8x16_t __p1, uint8x8_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__p0, (int8x16_t)__p1, (int8x8_t)__p2, 0); return __ret; } #else -__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx1_s8(int8x8_t __p0, int8x16_t __p1, uint8x8_t __p2) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx1_v((int8x8_t)__rev0, (int8x16_t)__rev1, (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64674,18 +64674,18 @@ __ai uint8x16_t vqtbx2q_u8(uint8x16_t __p0, uint8x16x2_t __p1, uint8x16_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p2, 32); return __ret; } #else -__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx2q_s8(int8x16_t __p0, int8x16x2_t __p1, uint8x16_t __p2) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx2q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64714,18 +64714,18 @@ __ai uint8x8_t vqtbx2_u8(uint8x8_t __p0, uint8x16x2_t __p1, uint8x8_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x8_t)__p2, 0); return __ret; } #else -__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx2_s8(int8x8_t __p0, int8x16x2_t __p1, uint8x8_t __p2) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x2_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx2_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64797,19 +64797,19 @@ __ai uint8x16_t vqtbx3q_u8(uint8x16_t __p0, uint8x16x3_t __p1, uint8x16_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p2, 32); return __ret; } #else -__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx3q_s8(int8x16_t __p0, int8x16x3_t __p1, uint8x16_t __p2) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx3q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64839,19 +64839,19 @@ __ai uint8x8_t vqtbx3_u8(uint8x8_t __p0, uint8x16x3_t __p1, uint8x8_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x8_t)__p2, 0); return __ret; } #else -__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx3_s8(int8x8_t __p0, int8x16x3_t __p1, uint8x8_t __p2) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x3_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx3_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64926,20 +64926,20 @@ __ai uint8x16_t vqtbx4q_u8(uint8x16_t __p0, uint8x16x4_t __p1, uint8x16_t __p2) #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x16_t)__p2, 32); return __ret; } #else -__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, int8x16_t __p2) { +__ai int8x16_t vqtbx4q_s8(int8x16_t __p0, int8x16x4_t __p1, uint8x16_t __p2) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vqtbx4q_v((int8x16_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x16_t)__rev2, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -64970,20 +64970,20 @@ __ai uint8x8_t vqtbx4_u8(uint8x8_t __p0, uint8x16x4_t __p1, uint8x8_t __p2) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__p0, (int8x16_t)__p1.val[0], (int8x16_t)__p1.val[1], (int8x16_t)__p1.val[2], (int8x16_t)__p1.val[3], (int8x8_t)__p2, 0); return __ret; } #else -__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, int8x8_t __p2) { +__ai int8x8_t vqtbx4_s8(int8x8_t __p0, int8x16x4_t __p1, uint8x8_t __p2) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); int8x16x4_t __rev1; __rev1.val[0] = __builtin_shufflevector(__p1.val[0], __p1.val[0], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[1] = __builtin_shufflevector(__p1.val[1], __p1.val[1], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[2] = __builtin_shufflevector(__p1.val[2], __p1.val[2], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); __rev1.val[3] = __builtin_shufflevector(__p1.val[3], __p1.val[3], 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev2; __rev2 = __builtin_shufflevector(__p2, __p2, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vqtbx4_v((int8x8_t)__rev0, (int8x16_t)__rev1.val[0], (int8x16_t)__rev1.val[1], (int8x16_t)__rev1.val[2], (int8x16_t)__rev1.val[3], (int8x8_t)__rev2, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -66293,13 +66293,13 @@ __ai int64_t vshld_s64(int64_t __p0, int64_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); return __ret; } #else -__ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { +__ai uint8_t vsqaddb_u8(uint8_t __p0, int8_t __p1) { uint8_t __ret; __ret = (uint8_t) __builtin_neon_vsqaddb_u8(__p0, __p1); return __ret; @@ -66307,13 +66307,13 @@ __ai uint8_t vsqaddb_u8(uint8_t __p0, uint8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); return __ret; } #else -__ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { +__ai uint32_t vsqadds_u32(uint32_t __p0, int32_t __p1) { uint32_t __ret; __ret = (uint32_t) __builtin_neon_vsqadds_u32(__p0, __p1); return __ret; @@ -66321,13 +66321,13 @@ __ai uint32_t vsqadds_u32(uint32_t __p0, uint32_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); return __ret; } #else -__ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { +__ai uint64_t vsqaddd_u64(uint64_t __p0, int64_t __p1) { uint64_t __ret; __ret = (uint64_t) __builtin_neon_vsqaddd_u64(__p0, __p1); return __ret; @@ -66335,13 +66335,13 @@ __ai uint64_t vsqaddd_u64(uint64_t __p0, uint64_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); return __ret; } #else -__ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { +__ai uint16_t vsqaddh_u16(uint16_t __p0, int16_t __p1) { uint16_t __ret; __ret = (uint16_t) __builtin_neon_vsqaddh_u16(__p0, __p1); return __ret; @@ -66349,15 +66349,15 @@ __ai uint16_t vsqaddh_u16(uint16_t __p0, uint16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 48); return __ret; } #else -__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { +__ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, int8x16_t __p1) { uint8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); uint8x16_t __ret; __ret = (uint8x16_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 48); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -66366,15 +66366,15 @@ __ai uint8x16_t vsqaddq_u8(uint8x16_t __p0, uint8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { +__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 50); return __ret; } #else -__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { +__ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, int32x4_t __p1) { uint32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint32x4_t __ret; __ret = (uint32x4_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 50); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); @@ -66383,15 +66383,15 @@ __ai uint32x4_t vsqaddq_u32(uint32x4_t __p0, uint32x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { +__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 51); return __ret; } #else -__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { +__ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, int64x2_t __p1) { uint64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint64x2_t __ret; __ret = (uint64x2_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 51); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); @@ -66400,15 +66400,15 @@ __ai uint64x2_t vsqaddq_u64(uint64x2_t __p0, uint64x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 49); return __ret; } #else -__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { +__ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, int16x8_t __p1) { uint16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint16x8_t __ret; __ret = (uint16x8_t) __builtin_neon_vsqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 49); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -66417,15 +66417,15 @@ __ai uint16x8_t vsqaddq_u16(uint16x8_t __p0, uint16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { +__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 16); return __ret; } #else -__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { +__ai uint8x8_t vsqadd_u8(uint8x8_t __p0, int8x8_t __p1) { uint8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); uint8x8_t __ret; __ret = (uint8x8_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 16); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -66434,15 +66434,15 @@ __ai uint8x8_t vsqadd_u8(uint8x8_t __p0, uint8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { +__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 18); return __ret; } #else -__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { +__ai uint32x2_t vsqadd_u32(uint32x2_t __p0, int32x2_t __p1) { uint32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); uint32x2_t __ret; __ret = (uint32x2_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 18); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); @@ -66451,13 +66451,13 @@ __ai uint32x2_t vsqadd_u32(uint32x2_t __p0, uint32x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { +__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; } #else -__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { +__ai uint64x1_t vsqadd_u64(uint64x1_t __p0, int64x1_t __p1) { uint64x1_t __ret; __ret = (uint64x1_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 19); return __ret; @@ -66465,15 +66465,15 @@ __ai uint64x1_t vsqadd_u64(uint64x1_t __p0, uint64x1_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { +__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 17); return __ret; } #else -__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, uint16x4_t __p1) { +__ai uint16x4_t vsqadd_u16(uint16x4_t __p0, int16x4_t __p1) { uint16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); uint16x4_t __ret; __ret = (uint16x4_t) __builtin_neon_vsqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 17); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); @@ -68919,13 +68919,13 @@ __ai int64_t vtstd_s64(int64_t __p0, int64_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8_t vuqaddb_s8(int8_t __p0, int8_t __p1) { +__ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); return __ret; } #else -__ai int8_t vuqaddb_s8(int8_t __p0, int8_t __p1) { +__ai int8_t vuqaddb_s8(int8_t __p0, uint8_t __p1) { int8_t __ret; __ret = (int8_t) __builtin_neon_vuqaddb_s8(__p0, __p1); return __ret; @@ -68933,13 +68933,13 @@ __ai int8_t vuqaddb_s8(int8_t __p0, int8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int32_t vuqadds_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); return __ret; } #else -__ai int32_t vuqadds_s32(int32_t __p0, int32_t __p1) { +__ai int32_t vuqadds_s32(int32_t __p0, uint32_t __p1) { int32_t __ret; __ret = (int32_t) __builtin_neon_vuqadds_s32(__p0, __p1); return __ret; @@ -68947,13 +68947,13 @@ __ai int32_t vuqadds_s32(int32_t __p0, int32_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int64_t vuqaddd_s64(int64_t __p0, int64_t __p1) { +__ai int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); return __ret; } #else -__ai int64_t vuqaddd_s64(int64_t __p0, int64_t __p1) { +__ai int64_t vuqaddd_s64(int64_t __p0, uint64_t __p1) { int64_t __ret; __ret = (int64_t) __builtin_neon_vuqaddd_s64(__p0, __p1); return __ret; @@ -68961,13 +68961,13 @@ __ai int64_t vuqaddd_s64(int64_t __p0, int64_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int16_t vuqaddh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); return __ret; } #else -__ai int16_t vuqaddh_s16(int16_t __p0, int16_t __p1) { +__ai int16_t vuqaddh_s16(int16_t __p0, uint16_t __p1) { int16_t __ret; __ret = (int16_t) __builtin_neon_vuqaddh_s16(__p0, __p1); return __ret; @@ -68975,15 +68975,15 @@ __ai int16_t vuqaddh_s16(int16_t __p0, int16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x16_t vuqaddq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 32); return __ret; } #else -__ai int8x16_t vuqaddq_s8(int8x16_t __p0, int8x16_t __p1) { +__ai int8x16_t vuqaddq_s8(int8x16_t __p0, uint8x16_t __p1) { int8x16_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x16_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); int8x16_t __ret; __ret = (int8x16_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 32); __ret = __builtin_shufflevector(__ret, __ret, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); @@ -68992,15 +68992,15 @@ __ai int8x16_t vuqaddq_s8(int8x16_t __p0, int8x16_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x4_t vuqaddq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 34); return __ret; } #else -__ai int32x4_t vuqaddq_s32(int32x4_t __p0, int32x4_t __p1) { +__ai int32x4_t vuqaddq_s32(int32x4_t __p0, uint32x4_t __p1) { int32x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint32x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int32x4_t __ret; __ret = (int32x4_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 34); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); @@ -69009,15 +69009,15 @@ __ai int32x4_t vuqaddq_s32(int32x4_t __p0, int32x4_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x2_t vuqaddq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 35); return __ret; } #else -__ai int64x2_t vuqaddq_s64(int64x2_t __p0, int64x2_t __p1) { +__ai int64x2_t vuqaddq_s64(int64x2_t __p0, uint64x2_t __p1) { int64x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint64x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int64x2_t __ret; __ret = (int64x2_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 35); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); @@ -69026,15 +69026,15 @@ __ai int64x2_t vuqaddq_s64(int64x2_t __p0, int64x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x8_t vuqaddq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__p0, (int8x16_t)__p1, 33); return __ret; } #else -__ai int16x8_t vuqaddq_s16(int16x8_t __p0, int16x8_t __p1) { +__ai int16x8_t vuqaddq_s16(int16x8_t __p0, uint16x8_t __p1) { int16x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint16x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int16x8_t __ret; __ret = (int16x8_t) __builtin_neon_vuqaddq_v((int8x16_t)__rev0, (int8x16_t)__rev1, 33); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -69043,15 +69043,15 @@ __ai int16x8_t vuqaddq_s16(int16x8_t __p0, int16x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int8x8_t vuqadd_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 0); return __ret; } #else -__ai int8x8_t vuqadd_s8(int8x8_t __p0, int8x8_t __p1) { +__ai int8x8_t vuqadd_s8(int8x8_t __p0, uint8x8_t __p1) { int8x8_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 7, 6, 5, 4, 3, 2, 1, 0); - int8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); + uint8x8_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 7, 6, 5, 4, 3, 2, 1, 0); int8x8_t __ret; __ret = (int8x8_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 0); __ret = __builtin_shufflevector(__ret, __ret, 7, 6, 5, 4, 3, 2, 1, 0); @@ -69060,15 +69060,15 @@ __ai int8x8_t vuqadd_s8(int8x8_t __p0, int8x8_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int32x2_t vuqadd_s32(int32x2_t __p0, int32x2_t __p1) { +__ai int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 2); return __ret; } #else -__ai int32x2_t vuqadd_s32(int32x2_t __p0, int32x2_t __p1) { +__ai int32x2_t vuqadd_s32(int32x2_t __p0, uint32x2_t __p1) { int32x2_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 1, 0); - int32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); + uint32x2_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 1, 0); int32x2_t __ret; __ret = (int32x2_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 2); __ret = __builtin_shufflevector(__ret, __ret, 1, 0); @@ -69077,13 +69077,13 @@ __ai int32x2_t vuqadd_s32(int32x2_t __p0, int32x2_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int64x1_t vuqadd_s64(int64x1_t __p0, int64x1_t __p1) { +__ai int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; } #else -__ai int64x1_t vuqadd_s64(int64x1_t __p0, int64x1_t __p1) { +__ai int64x1_t vuqadd_s64(int64x1_t __p0, uint64x1_t __p1) { int64x1_t __ret; __ret = (int64x1_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 3); return __ret; @@ -69091,15 +69091,15 @@ __ai int64x1_t vuqadd_s64(int64x1_t __p0, int64x1_t __p1) { #endif #ifdef __LITTLE_ENDIAN__ -__ai int16x4_t vuqadd_s16(int16x4_t __p0, int16x4_t __p1) { +__ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__p0, (int8x8_t)__p1, 1); return __ret; } #else -__ai int16x4_t vuqadd_s16(int16x4_t __p0, int16x4_t __p1) { +__ai int16x4_t vuqadd_s16(int16x4_t __p0, uint16x4_t __p1) { int16x4_t __rev0; __rev0 = __builtin_shufflevector(__p0, __p0, 3, 2, 1, 0); - int16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); + uint16x4_t __rev1; __rev1 = __builtin_shufflevector(__p1, __p1, 3, 2, 1, 0); int16x4_t __ret; __ret = (int16x4_t) __builtin_neon_vuqadd_v((int8x8_t)__rev0, (int8x8_t)__rev1, 1); __ret = __builtin_shufflevector(__ret, __ret, 3, 2, 1, 0); @@ -71912,16 +71912,16 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in #if defined(__ARM_FEATURE_FP16FML) && defined(__aarch64__) #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_high_u32(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ +#define vfmlalq_lane_high_f16(__p0_264, __p1_264, __p2_264, __p3_264) __extension__ ({ \ float32x4_t __s0_264 = __p0_264; \ float16x8_t __s1_264 = __p1_264; \ float16x4_t __s2_264 = __p2_264; \ float32x4_t __ret_264; \ - __ret_264 = vfmlalq_high_u32(__s0_264, __s1_264, (float16x8_t) {vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264)}); \ + __ret_264 = vfmlalq_high_f16(__s0_264, __s1_264, (float16x8_t) {vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264), vget_lane_f16(__s2_264, __p3_264)}); \ __ret_264; \ }) #else -#define vfmlalq_lane_high_u32(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ +#define vfmlalq_lane_high_f16(__p0_265, __p1_265, __p2_265, __p3_265) __extension__ ({ \ float32x4_t __s0_265 = __p0_265; \ float16x8_t __s1_265 = __p1_265; \ float16x4_t __s2_265 = __p2_265; \ @@ -71929,23 +71929,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_265; __rev1_265 = __builtin_shufflevector(__s1_265, __s1_265, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_265; __rev2_265 = __builtin_shufflevector(__s2_265, __s2_265, 3, 2, 1, 0); \ float32x4_t __ret_265; \ - __ret_265 = __noswap_vfmlalq_high_u32(__rev0_265, __rev1_265, (float16x8_t) {__noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265)}); \ + __ret_265 = __noswap_vfmlalq_high_f16(__rev0_265, __rev1_265, (float16x8_t) {__noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265), __noswap_vget_lane_f16(__rev2_265, __p3_265)}); \ __ret_265 = __builtin_shufflevector(__ret_265, __ret_265, 3, 2, 1, 0); \ __ret_265; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_high_u32(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ +#define vfmlal_lane_high_f16(__p0_266, __p1_266, __p2_266, __p3_266) __extension__ ({ \ float32x2_t __s0_266 = __p0_266; \ float16x4_t __s1_266 = __p1_266; \ float16x4_t __s2_266 = __p2_266; \ float32x2_t __ret_266; \ - __ret_266 = vfmlal_high_u32(__s0_266, __s1_266, (float16x4_t) {vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266)}); \ + __ret_266 = vfmlal_high_f16(__s0_266, __s1_266, (float16x4_t) {vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266), vget_lane_f16(__s2_266, __p3_266)}); \ __ret_266; \ }) #else -#define vfmlal_lane_high_u32(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ +#define vfmlal_lane_high_f16(__p0_267, __p1_267, __p2_267, __p3_267) __extension__ ({ \ float32x2_t __s0_267 = __p0_267; \ float16x4_t __s1_267 = __p1_267; \ float16x4_t __s2_267 = __p2_267; \ @@ -71953,23 +71953,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_267; __rev1_267 = __builtin_shufflevector(__s1_267, __s1_267, 3, 2, 1, 0); \ float16x4_t __rev2_267; __rev2_267 = __builtin_shufflevector(__s2_267, __s2_267, 3, 2, 1, 0); \ float32x2_t __ret_267; \ - __ret_267 = __noswap_vfmlal_high_u32(__rev0_267, __rev1_267, (float16x4_t) {__noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267)}); \ + __ret_267 = __noswap_vfmlal_high_f16(__rev0_267, __rev1_267, (float16x4_t) {__noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267), __noswap_vget_lane_f16(__rev2_267, __p3_267)}); \ __ret_267 = __builtin_shufflevector(__ret_267, __ret_267, 1, 0); \ __ret_267; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_lane_low_u32(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ +#define vfmlalq_lane_low_f16(__p0_268, __p1_268, __p2_268, __p3_268) __extension__ ({ \ float32x4_t __s0_268 = __p0_268; \ float16x8_t __s1_268 = __p1_268; \ float16x4_t __s2_268 = __p2_268; \ float32x4_t __ret_268; \ - __ret_268 = vfmlalq_low_u32(__s0_268, __s1_268, (float16x8_t) {vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268)}); \ + __ret_268 = vfmlalq_low_f16(__s0_268, __s1_268, (float16x8_t) {vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268), vget_lane_f16(__s2_268, __p3_268)}); \ __ret_268; \ }) #else -#define vfmlalq_lane_low_u32(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ +#define vfmlalq_lane_low_f16(__p0_269, __p1_269, __p2_269, __p3_269) __extension__ ({ \ float32x4_t __s0_269 = __p0_269; \ float16x8_t __s1_269 = __p1_269; \ float16x4_t __s2_269 = __p2_269; \ @@ -71977,23 +71977,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_269; __rev1_269 = __builtin_shufflevector(__s1_269, __s1_269, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_269; __rev2_269 = __builtin_shufflevector(__s2_269, __s2_269, 3, 2, 1, 0); \ float32x4_t __ret_269; \ - __ret_269 = __noswap_vfmlalq_low_u32(__rev0_269, __rev1_269, (float16x8_t) {__noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269)}); \ + __ret_269 = __noswap_vfmlalq_low_f16(__rev0_269, __rev1_269, (float16x8_t) {__noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269), __noswap_vget_lane_f16(__rev2_269, __p3_269)}); \ __ret_269 = __builtin_shufflevector(__ret_269, __ret_269, 3, 2, 1, 0); \ __ret_269; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_lane_low_u32(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ +#define vfmlal_lane_low_f16(__p0_270, __p1_270, __p2_270, __p3_270) __extension__ ({ \ float32x2_t __s0_270 = __p0_270; \ float16x4_t __s1_270 = __p1_270; \ float16x4_t __s2_270 = __p2_270; \ float32x2_t __ret_270; \ - __ret_270 = vfmlal_low_u32(__s0_270, __s1_270, (float16x4_t) {vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270)}); \ + __ret_270 = vfmlal_low_f16(__s0_270, __s1_270, (float16x4_t) {vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270), vget_lane_f16(__s2_270, __p3_270)}); \ __ret_270; \ }) #else -#define vfmlal_lane_low_u32(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ +#define vfmlal_lane_low_f16(__p0_271, __p1_271, __p2_271, __p3_271) __extension__ ({ \ float32x2_t __s0_271 = __p0_271; \ float16x4_t __s1_271 = __p1_271; \ float16x4_t __s2_271 = __p2_271; \ @@ -72001,23 +72001,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_271; __rev1_271 = __builtin_shufflevector(__s1_271, __s1_271, 3, 2, 1, 0); \ float16x4_t __rev2_271; __rev2_271 = __builtin_shufflevector(__s2_271, __s2_271, 3, 2, 1, 0); \ float32x2_t __ret_271; \ - __ret_271 = __noswap_vfmlal_low_u32(__rev0_271, __rev1_271, (float16x4_t) {__noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271)}); \ + __ret_271 = __noswap_vfmlal_low_f16(__rev0_271, __rev1_271, (float16x4_t) {__noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271), __noswap_vget_lane_f16(__rev2_271, __p3_271)}); \ __ret_271 = __builtin_shufflevector(__ret_271, __ret_271, 1, 0); \ __ret_271; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_high_u32(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ +#define vfmlalq_laneq_high_f16(__p0_272, __p1_272, __p2_272, __p3_272) __extension__ ({ \ float32x4_t __s0_272 = __p0_272; \ float16x8_t __s1_272 = __p1_272; \ float16x8_t __s2_272 = __p2_272; \ float32x4_t __ret_272; \ - __ret_272 = vfmlalq_high_u32(__s0_272, __s1_272, (float16x8_t) {vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272)}); \ + __ret_272 = vfmlalq_high_f16(__s0_272, __s1_272, (float16x8_t) {vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272), vgetq_lane_f16(__s2_272, __p3_272)}); \ __ret_272; \ }) #else -#define vfmlalq_laneq_high_u32(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ +#define vfmlalq_laneq_high_f16(__p0_273, __p1_273, __p2_273, __p3_273) __extension__ ({ \ float32x4_t __s0_273 = __p0_273; \ float16x8_t __s1_273 = __p1_273; \ float16x8_t __s2_273 = __p2_273; \ @@ -72025,23 +72025,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_273; __rev1_273 = __builtin_shufflevector(__s1_273, __s1_273, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_273; __rev2_273 = __builtin_shufflevector(__s2_273, __s2_273, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x4_t __ret_273; \ - __ret_273 = __noswap_vfmlalq_high_u32(__rev0_273, __rev1_273, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273)}); \ + __ret_273 = __noswap_vfmlalq_high_f16(__rev0_273, __rev1_273, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273), __noswap_vgetq_lane_f16(__rev2_273, __p3_273)}); \ __ret_273 = __builtin_shufflevector(__ret_273, __ret_273, 3, 2, 1, 0); \ __ret_273; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_high_u32(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ +#define vfmlal_laneq_high_f16(__p0_274, __p1_274, __p2_274, __p3_274) __extension__ ({ \ float32x2_t __s0_274 = __p0_274; \ float16x4_t __s1_274 = __p1_274; \ float16x8_t __s2_274 = __p2_274; \ float32x2_t __ret_274; \ - __ret_274 = vfmlal_high_u32(__s0_274, __s1_274, (float16x4_t) {vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274)}); \ + __ret_274 = vfmlal_high_f16(__s0_274, __s1_274, (float16x4_t) {vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274), vgetq_lane_f16(__s2_274, __p3_274)}); \ __ret_274; \ }) #else -#define vfmlal_laneq_high_u32(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ +#define vfmlal_laneq_high_f16(__p0_275, __p1_275, __p2_275, __p3_275) __extension__ ({ \ float32x2_t __s0_275 = __p0_275; \ float16x4_t __s1_275 = __p1_275; \ float16x8_t __s2_275 = __p2_275; \ @@ -72049,23 +72049,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_275; __rev1_275 = __builtin_shufflevector(__s1_275, __s1_275, 3, 2, 1, 0); \ float16x8_t __rev2_275; __rev2_275 = __builtin_shufflevector(__s2_275, __s2_275, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x2_t __ret_275; \ - __ret_275 = __noswap_vfmlal_high_u32(__rev0_275, __rev1_275, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275)}); \ + __ret_275 = __noswap_vfmlal_high_f16(__rev0_275, __rev1_275, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275), __noswap_vgetq_lane_f16(__rev2_275, __p3_275)}); \ __ret_275 = __builtin_shufflevector(__ret_275, __ret_275, 1, 0); \ __ret_275; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlalq_laneq_low_u32(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ +#define vfmlalq_laneq_low_f16(__p0_276, __p1_276, __p2_276, __p3_276) __extension__ ({ \ float32x4_t __s0_276 = __p0_276; \ float16x8_t __s1_276 = __p1_276; \ float16x8_t __s2_276 = __p2_276; \ float32x4_t __ret_276; \ - __ret_276 = vfmlalq_low_u32(__s0_276, __s1_276, (float16x8_t) {vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276)}); \ + __ret_276 = vfmlalq_low_f16(__s0_276, __s1_276, (float16x8_t) {vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276), vgetq_lane_f16(__s2_276, __p3_276)}); \ __ret_276; \ }) #else -#define vfmlalq_laneq_low_u32(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ +#define vfmlalq_laneq_low_f16(__p0_277, __p1_277, __p2_277, __p3_277) __extension__ ({ \ float32x4_t __s0_277 = __p0_277; \ float16x8_t __s1_277 = __p1_277; \ float16x8_t __s2_277 = __p2_277; \ @@ -72073,23 +72073,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_277; __rev1_277 = __builtin_shufflevector(__s1_277, __s1_277, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_277; __rev2_277 = __builtin_shufflevector(__s2_277, __s2_277, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x4_t __ret_277; \ - __ret_277 = __noswap_vfmlalq_low_u32(__rev0_277, __rev1_277, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277)}); \ + __ret_277 = __noswap_vfmlalq_low_f16(__rev0_277, __rev1_277, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277), __noswap_vgetq_lane_f16(__rev2_277, __p3_277)}); \ __ret_277 = __builtin_shufflevector(__ret_277, __ret_277, 3, 2, 1, 0); \ __ret_277; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlal_laneq_low_u32(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ +#define vfmlal_laneq_low_f16(__p0_278, __p1_278, __p2_278, __p3_278) __extension__ ({ \ float32x2_t __s0_278 = __p0_278; \ float16x4_t __s1_278 = __p1_278; \ float16x8_t __s2_278 = __p2_278; \ float32x2_t __ret_278; \ - __ret_278 = vfmlal_low_u32(__s0_278, __s1_278, (float16x4_t) {vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278)}); \ + __ret_278 = vfmlal_low_f16(__s0_278, __s1_278, (float16x4_t) {vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278), vgetq_lane_f16(__s2_278, __p3_278)}); \ __ret_278; \ }) #else -#define vfmlal_laneq_low_u32(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ +#define vfmlal_laneq_low_f16(__p0_279, __p1_279, __p2_279, __p3_279) __extension__ ({ \ float32x2_t __s0_279 = __p0_279; \ float16x4_t __s1_279 = __p1_279; \ float16x8_t __s2_279 = __p2_279; \ @@ -72097,23 +72097,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_279; __rev1_279 = __builtin_shufflevector(__s1_279, __s1_279, 3, 2, 1, 0); \ float16x8_t __rev2_279; __rev2_279 = __builtin_shufflevector(__s2_279, __s2_279, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x2_t __ret_279; \ - __ret_279 = __noswap_vfmlal_low_u32(__rev0_279, __rev1_279, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279)}); \ + __ret_279 = __noswap_vfmlal_low_f16(__rev0_279, __rev1_279, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279), __noswap_vgetq_lane_f16(__rev2_279, __p3_279)}); \ __ret_279 = __builtin_shufflevector(__ret_279, __ret_279, 1, 0); \ __ret_279; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_high_u32(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ +#define vfmlslq_lane_high_f16(__p0_280, __p1_280, __p2_280, __p3_280) __extension__ ({ \ float32x4_t __s0_280 = __p0_280; \ float16x8_t __s1_280 = __p1_280; \ float16x4_t __s2_280 = __p2_280; \ float32x4_t __ret_280; \ - __ret_280 = vfmlslq_high_u32(__s0_280, __s1_280, (float16x8_t) {vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280)}); \ + __ret_280 = vfmlslq_high_f16(__s0_280, __s1_280, (float16x8_t) {vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280), vget_lane_f16(__s2_280, __p3_280)}); \ __ret_280; \ }) #else -#define vfmlslq_lane_high_u32(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ +#define vfmlslq_lane_high_f16(__p0_281, __p1_281, __p2_281, __p3_281) __extension__ ({ \ float32x4_t __s0_281 = __p0_281; \ float16x8_t __s1_281 = __p1_281; \ float16x4_t __s2_281 = __p2_281; \ @@ -72121,23 +72121,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_281; __rev1_281 = __builtin_shufflevector(__s1_281, __s1_281, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_281; __rev2_281 = __builtin_shufflevector(__s2_281, __s2_281, 3, 2, 1, 0); \ float32x4_t __ret_281; \ - __ret_281 = __noswap_vfmlslq_high_u32(__rev0_281, __rev1_281, (float16x8_t) {__noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281)}); \ + __ret_281 = __noswap_vfmlslq_high_f16(__rev0_281, __rev1_281, (float16x8_t) {__noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281), __noswap_vget_lane_f16(__rev2_281, __p3_281)}); \ __ret_281 = __builtin_shufflevector(__ret_281, __ret_281, 3, 2, 1, 0); \ __ret_281; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_high_u32(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ +#define vfmlsl_lane_high_f16(__p0_282, __p1_282, __p2_282, __p3_282) __extension__ ({ \ float32x2_t __s0_282 = __p0_282; \ float16x4_t __s1_282 = __p1_282; \ float16x4_t __s2_282 = __p2_282; \ float32x2_t __ret_282; \ - __ret_282 = vfmlsl_high_u32(__s0_282, __s1_282, (float16x4_t) {vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282)}); \ + __ret_282 = vfmlsl_high_f16(__s0_282, __s1_282, (float16x4_t) {vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282), vget_lane_f16(__s2_282, __p3_282)}); \ __ret_282; \ }) #else -#define vfmlsl_lane_high_u32(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ +#define vfmlsl_lane_high_f16(__p0_283, __p1_283, __p2_283, __p3_283) __extension__ ({ \ float32x2_t __s0_283 = __p0_283; \ float16x4_t __s1_283 = __p1_283; \ float16x4_t __s2_283 = __p2_283; \ @@ -72145,23 +72145,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_283; __rev1_283 = __builtin_shufflevector(__s1_283, __s1_283, 3, 2, 1, 0); \ float16x4_t __rev2_283; __rev2_283 = __builtin_shufflevector(__s2_283, __s2_283, 3, 2, 1, 0); \ float32x2_t __ret_283; \ - __ret_283 = __noswap_vfmlsl_high_u32(__rev0_283, __rev1_283, (float16x4_t) {__noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283)}); \ + __ret_283 = __noswap_vfmlsl_high_f16(__rev0_283, __rev1_283, (float16x4_t) {__noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283), __noswap_vget_lane_f16(__rev2_283, __p3_283)}); \ __ret_283 = __builtin_shufflevector(__ret_283, __ret_283, 1, 0); \ __ret_283; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_lane_low_u32(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ +#define vfmlslq_lane_low_f16(__p0_284, __p1_284, __p2_284, __p3_284) __extension__ ({ \ float32x4_t __s0_284 = __p0_284; \ float16x8_t __s1_284 = __p1_284; \ float16x4_t __s2_284 = __p2_284; \ float32x4_t __ret_284; \ - __ret_284 = vfmlslq_low_u32(__s0_284, __s1_284, (float16x8_t) {vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284)}); \ + __ret_284 = vfmlslq_low_f16(__s0_284, __s1_284, (float16x8_t) {vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284), vget_lane_f16(__s2_284, __p3_284)}); \ __ret_284; \ }) #else -#define vfmlslq_lane_low_u32(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ +#define vfmlslq_lane_low_f16(__p0_285, __p1_285, __p2_285, __p3_285) __extension__ ({ \ float32x4_t __s0_285 = __p0_285; \ float16x8_t __s1_285 = __p1_285; \ float16x4_t __s2_285 = __p2_285; \ @@ -72169,23 +72169,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_285; __rev1_285 = __builtin_shufflevector(__s1_285, __s1_285, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x4_t __rev2_285; __rev2_285 = __builtin_shufflevector(__s2_285, __s2_285, 3, 2, 1, 0); \ float32x4_t __ret_285; \ - __ret_285 = __noswap_vfmlslq_low_u32(__rev0_285, __rev1_285, (float16x8_t) {__noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285)}); \ + __ret_285 = __noswap_vfmlslq_low_f16(__rev0_285, __rev1_285, (float16x8_t) {__noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285), __noswap_vget_lane_f16(__rev2_285, __p3_285)}); \ __ret_285 = __builtin_shufflevector(__ret_285, __ret_285, 3, 2, 1, 0); \ __ret_285; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_lane_low_u32(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ +#define vfmlsl_lane_low_f16(__p0_286, __p1_286, __p2_286, __p3_286) __extension__ ({ \ float32x2_t __s0_286 = __p0_286; \ float16x4_t __s1_286 = __p1_286; \ float16x4_t __s2_286 = __p2_286; \ float32x2_t __ret_286; \ - __ret_286 = vfmlsl_low_u32(__s0_286, __s1_286, (float16x4_t) {vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286)}); \ + __ret_286 = vfmlsl_low_f16(__s0_286, __s1_286, (float16x4_t) {vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286), vget_lane_f16(__s2_286, __p3_286)}); \ __ret_286; \ }) #else -#define vfmlsl_lane_low_u32(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ +#define vfmlsl_lane_low_f16(__p0_287, __p1_287, __p2_287, __p3_287) __extension__ ({ \ float32x2_t __s0_287 = __p0_287; \ float16x4_t __s1_287 = __p1_287; \ float16x4_t __s2_287 = __p2_287; \ @@ -72193,23 +72193,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_287; __rev1_287 = __builtin_shufflevector(__s1_287, __s1_287, 3, 2, 1, 0); \ float16x4_t __rev2_287; __rev2_287 = __builtin_shufflevector(__s2_287, __s2_287, 3, 2, 1, 0); \ float32x2_t __ret_287; \ - __ret_287 = __noswap_vfmlsl_low_u32(__rev0_287, __rev1_287, (float16x4_t) {__noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287)}); \ + __ret_287 = __noswap_vfmlsl_low_f16(__rev0_287, __rev1_287, (float16x4_t) {__noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287), __noswap_vget_lane_f16(__rev2_287, __p3_287)}); \ __ret_287 = __builtin_shufflevector(__ret_287, __ret_287, 1, 0); \ __ret_287; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_high_u32(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ +#define vfmlslq_laneq_high_f16(__p0_288, __p1_288, __p2_288, __p3_288) __extension__ ({ \ float32x4_t __s0_288 = __p0_288; \ float16x8_t __s1_288 = __p1_288; \ float16x8_t __s2_288 = __p2_288; \ float32x4_t __ret_288; \ - __ret_288 = vfmlslq_high_u32(__s0_288, __s1_288, (float16x8_t) {vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288)}); \ + __ret_288 = vfmlslq_high_f16(__s0_288, __s1_288, (float16x8_t) {vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288), vgetq_lane_f16(__s2_288, __p3_288)}); \ __ret_288; \ }) #else -#define vfmlslq_laneq_high_u32(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ +#define vfmlslq_laneq_high_f16(__p0_289, __p1_289, __p2_289, __p3_289) __extension__ ({ \ float32x4_t __s0_289 = __p0_289; \ float16x8_t __s1_289 = __p1_289; \ float16x8_t __s2_289 = __p2_289; \ @@ -72217,23 +72217,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_289; __rev1_289 = __builtin_shufflevector(__s1_289, __s1_289, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_289; __rev2_289 = __builtin_shufflevector(__s2_289, __s2_289, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x4_t __ret_289; \ - __ret_289 = __noswap_vfmlslq_high_u32(__rev0_289, __rev1_289, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289)}); \ + __ret_289 = __noswap_vfmlslq_high_f16(__rev0_289, __rev1_289, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289), __noswap_vgetq_lane_f16(__rev2_289, __p3_289)}); \ __ret_289 = __builtin_shufflevector(__ret_289, __ret_289, 3, 2, 1, 0); \ __ret_289; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_high_u32(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ +#define vfmlsl_laneq_high_f16(__p0_290, __p1_290, __p2_290, __p3_290) __extension__ ({ \ float32x2_t __s0_290 = __p0_290; \ float16x4_t __s1_290 = __p1_290; \ float16x8_t __s2_290 = __p2_290; \ float32x2_t __ret_290; \ - __ret_290 = vfmlsl_high_u32(__s0_290, __s1_290, (float16x4_t) {vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290)}); \ + __ret_290 = vfmlsl_high_f16(__s0_290, __s1_290, (float16x4_t) {vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290), vgetq_lane_f16(__s2_290, __p3_290)}); \ __ret_290; \ }) #else -#define vfmlsl_laneq_high_u32(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ +#define vfmlsl_laneq_high_f16(__p0_291, __p1_291, __p2_291, __p3_291) __extension__ ({ \ float32x2_t __s0_291 = __p0_291; \ float16x4_t __s1_291 = __p1_291; \ float16x8_t __s2_291 = __p2_291; \ @@ -72241,23 +72241,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_291; __rev1_291 = __builtin_shufflevector(__s1_291, __s1_291, 3, 2, 1, 0); \ float16x8_t __rev2_291; __rev2_291 = __builtin_shufflevector(__s2_291, __s2_291, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x2_t __ret_291; \ - __ret_291 = __noswap_vfmlsl_high_u32(__rev0_291, __rev1_291, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291)}); \ + __ret_291 = __noswap_vfmlsl_high_f16(__rev0_291, __rev1_291, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291), __noswap_vgetq_lane_f16(__rev2_291, __p3_291)}); \ __ret_291 = __builtin_shufflevector(__ret_291, __ret_291, 1, 0); \ __ret_291; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlslq_laneq_low_u32(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ +#define vfmlslq_laneq_low_f16(__p0_292, __p1_292, __p2_292, __p3_292) __extension__ ({ \ float32x4_t __s0_292 = __p0_292; \ float16x8_t __s1_292 = __p1_292; \ float16x8_t __s2_292 = __p2_292; \ float32x4_t __ret_292; \ - __ret_292 = vfmlslq_low_u32(__s0_292, __s1_292, (float16x8_t) {vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292)}); \ + __ret_292 = vfmlslq_low_f16(__s0_292, __s1_292, (float16x8_t) {vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292), vgetq_lane_f16(__s2_292, __p3_292)}); \ __ret_292; \ }) #else -#define vfmlslq_laneq_low_u32(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ +#define vfmlslq_laneq_low_f16(__p0_293, __p1_293, __p2_293, __p3_293) __extension__ ({ \ float32x4_t __s0_293 = __p0_293; \ float16x8_t __s1_293 = __p1_293; \ float16x8_t __s2_293 = __p2_293; \ @@ -72265,23 +72265,23 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x8_t __rev1_293; __rev1_293 = __builtin_shufflevector(__s1_293, __s1_293, 7, 6, 5, 4, 3, 2, 1, 0); \ float16x8_t __rev2_293; __rev2_293 = __builtin_shufflevector(__s2_293, __s2_293, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x4_t __ret_293; \ - __ret_293 = __noswap_vfmlslq_low_u32(__rev0_293, __rev1_293, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293)}); \ + __ret_293 = __noswap_vfmlslq_low_f16(__rev0_293, __rev1_293, (float16x8_t) {__noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293), __noswap_vgetq_lane_f16(__rev2_293, __p3_293)}); \ __ret_293 = __builtin_shufflevector(__ret_293, __ret_293, 3, 2, 1, 0); \ __ret_293; \ }) #endif #ifdef __LITTLE_ENDIAN__ -#define vfmlsl_laneq_low_u32(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ +#define vfmlsl_laneq_low_f16(__p0_294, __p1_294, __p2_294, __p3_294) __extension__ ({ \ float32x2_t __s0_294 = __p0_294; \ float16x4_t __s1_294 = __p1_294; \ float16x8_t __s2_294 = __p2_294; \ float32x2_t __ret_294; \ - __ret_294 = vfmlsl_low_u32(__s0_294, __s1_294, (float16x4_t) {vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294)}); \ + __ret_294 = vfmlsl_low_f16(__s0_294, __s1_294, (float16x4_t) {vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294), vgetq_lane_f16(__s2_294, __p3_294)}); \ __ret_294; \ }) #else -#define vfmlsl_laneq_low_u32(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ +#define vfmlsl_laneq_low_f16(__p0_295, __p1_295, __p2_295, __p3_295) __extension__ ({ \ float32x2_t __s0_295 = __p0_295; \ float16x4_t __s1_295 = __p1_295; \ float16x8_t __s2_295 = __p2_295; \ @@ -72289,7 +72289,7 @@ int16x8_t __reint2_263 = __noswap_vsetq_lane_s16(*(int16_t *) &__reint_263, *(in float16x4_t __rev1_295; __rev1_295 = __builtin_shufflevector(__s1_295, __s1_295, 3, 2, 1, 0); \ float16x8_t __rev2_295; __rev2_295 = __builtin_shufflevector(__s2_295, __s2_295, 7, 6, 5, 4, 3, 2, 1, 0); \ float32x2_t __ret_295; \ - __ret_295 = __noswap_vfmlsl_low_u32(__rev0_295, __rev1_295, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295)}); \ + __ret_295 = __noswap_vfmlsl_low_f16(__rev0_295, __rev1_295, (float16x4_t) {__noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295), __noswap_vgetq_lane_f16(__rev2_295, __p3_295)}); \ __ret_295 = __builtin_shufflevector(__ret_295, __ret_295, 1, 0); \ __ret_295; \ }) diff --git a/lib/include/armintr.h b/lib/include/armintr.h index 933afcbb91..300ed4ee47 100644 --- a/lib/include/armintr.h +++ b/lib/include/armintr.h @@ -1,22 +1,8 @@ /*===---- armintr.h - ARM Windows intrinsics -------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx2intrin.h b/lib/include/avx2intrin.h index 9688a96fde..162e83ea2f 100644 --- a/lib/include/avx2intrin.h +++ b/lib/include/avx2intrin.h @@ -1,22 +1,8 @@ /*===---- avx2intrin.h - AVX2 intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -146,21 +132,13 @@ _mm256_andnot_si256(__m256i __a, __m256i __b) static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { - typedef unsigned short __v32hu __attribute__((__vector_size__(64))); - return (__m256i)__builtin_convertvector( - ((__builtin_convertvector((__v32qu)__a, __v32hu) + - __builtin_convertvector((__v32qu)__b, __v32hu)) + 1) - >> 1, __v32qu); + return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { - typedef unsigned int __v16su __attribute__((__vector_size__(64))); - return (__m256i)__builtin_convertvector( - ((__builtin_convertvector((__v16hu)__a, __v16su) + - __builtin_convertvector((__v16hu)__b, __v16su)) + 1) - >> 1, __v16hu); + return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 diff --git a/lib/include/avx512bf16intrin.h b/lib/include/avx512bf16intrin.h new file mode 100644 index 0000000000..d1d87e72f1 --- /dev/null +++ b/lib/include/avx512bf16intrin.h @@ -0,0 +1,279 @@ +/*===------------ avx512bf16intrin.h - AVX512_BF16 intrinsics --------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512BF16INTRIN_H +#define __AVX512BF16INTRIN_H + +typedef short __m512bh __attribute__((__vector_size__(64), __aligned__(64))); +typedef short __m256bh __attribute__((__vector_size__(32), __aligned__(32))); +typedef unsigned short __bfloat16; + +#define __DEFAULT_FN_ATTRS512 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"), \ + __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512bf16"))) + +/// Convert One BF16 Data to One Single Float Data. +/// +/// \headerfile +/// +/// This intrinsic does not correspond to a specific instruction. +/// +/// \param __A +/// A bfloat data. +/// \returns A float data whose sign field and exponent field keep unchanged, +/// and fraction field is extended to 23 bits. +static __inline__ float __DEFAULT_FN_ATTRS _mm_cvtsbh_ss(__bfloat16 __A) { + return __builtin_ia32_cvtsbf162ss_32(__A); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \param __B +/// A 512-bit vector of [16 x float]. +/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from +/// conversion of __B, and higher 256 bits come from conversion of __A. +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_cvtne2ps_pbh(__m512 __A, __m512 __B) { + return (__m512bh)__builtin_ia32_cvtne2ps2bf16_512((__v16sf) __A, + (__v16sf) __B); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \param __B +/// A 512-bit vector of [16 x float]. +/// \param __W +/// A 512-bit vector of [32 x bfloat]. +/// \param __U +/// A 32-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element from __W. +/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from +/// conversion of __B, and higher 256 bits come from conversion of __A. +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtne2ps_pbh(__m512bh __W, __mmask32 __U, __m512 __A, __m512 __B) { + return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_cvtne2ps_pbh(__A, __B), + (__v32hi)__W); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \param __B +/// A 512-bit vector of [16 x float]. +/// \param __U +/// A 32-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element is zero. +/// \returns A 512-bit vector of [32 x bfloat] whose lower 256 bits come from +/// conversion of __B, and higher 256 bits come from conversion of __A. +static __inline__ __m512bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtne2ps_pbh(__mmask32 __U, __m512 __A, __m512 __B) { + return (__m512bh)__builtin_ia32_selectw_512((__mmask32)__U, + (__v32hi)_mm512_cvtne2ps_pbh(__A, __B), + (__v32hi)_mm512_setzero_si512()); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +_mm512_cvtneps_pbh(__m512 __A) { + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)_mm256_undefined_si256(), + (__mmask16)-1); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \param __W +/// A 256-bit vector of [16 x bfloat]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element from __W. +/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtneps_pbh(__m256bh __W, __mmask16 __U, __m512 __A) { + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)__W, + (__mmask16)__U); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 512-bit vector of [16 x float]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element is zero. +/// \returns A 256-bit vector of [16 x bfloat] come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtneps_pbh(__mmask16 __U, __m512 __A) { + return (__m256bh)__builtin_ia32_cvtneps2bf16_512_mask((__v16sf)__A, + (__v16hi)_mm256_setzero_si256(), + (__mmask16)__U); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 512-bit vector of [32 x bfloat]. +/// \param __B +/// A 512-bit vector of [32 x bfloat]. +/// \param __D +/// A 512-bit vector of [16 x float]. +/// \returns A 512-bit vector of [16 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_dpbf16_ps(__m512 __D, __m512bh __A, __m512bh __B) { + return (__m512)__builtin_ia32_dpbf16ps_512((__v16sf) __D, + (__v16si) __A, + (__v16si) __B); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 512-bit vector of [32 x bfloat]. +/// \param __B +/// A 512-bit vector of [32 x bfloat]. +/// \param __D +/// A 512-bit vector of [16 x float]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. +/// \returns A 512-bit vector of [16 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask_dpbf16_ps(__m512 __D, __mmask16 __U, __m512bh __A, __m512bh __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), + (__v16sf)__D); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 512-bit vector of [32 x bfloat]. +/// \param __B +/// A 512-bit vector of [32 x bfloat]. +/// \param __D +/// A 512-bit vector of [16 x float]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. +/// \returns A 512-bit vector of [16 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_maskz_dpbf16_ps(__mmask16 __U, __m512 __D, __m512bh __A, __m512bh __B) { + return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, + (__v16sf)_mm512_dpbf16_ps(__D, __A, __B), + (__v16sf)_mm512_setzero_si512()); +} + +/// Convert Packed BF16 Data to Packed float Data. +/// +/// \headerfile +/// +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \returns A 512-bit vector of [16 x float] come from convertion of __A +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpbh_ps(__m256bh __A) { + return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( + (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); +} + +/// Convert Packed BF16 Data to Packed float Data using zeroing mask. +/// +/// \headerfile +/// +/// \param __U +/// A 16-bit mask. Elements are zeroed out when the corresponding mask +/// bit is not set. +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \returns A 512-bit vector of [16 x float] come from convertion of __A +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_maskz_cvtpbh_ps(__mmask16 __U, __m256bh __A) { + return _mm512_castsi512_ps((__m512i)_mm512_slli_epi32( + (__m512i)_mm512_maskz_cvtepi16_epi32((__mmask16)__U, (__m256i)__A), 16)); +} + +/// Convert Packed BF16 Data to Packed float Data using merging mask. +/// +/// \headerfile +/// +/// \param __S +/// A 512-bit vector of [16 x float]. Elements are copied from __S when +/// the corresponding mask bit is not set. +/// \param __U +/// A 16-bit mask. +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \returns A 512-bit vector of [16 x float] come from convertion of __A +static __inline__ __m512 __DEFAULT_FN_ATTRS512 +_mm512_mask_cvtpbh_ps(__m512 __S, __mmask16 __U, __m256bh __A) { + return _mm512_castsi512_ps((__m512i)_mm512_mask_slli_epi32( + (__m512i)__S, (__mmask16)__U, + (__m512i)_mm512_cvtepi16_epi32((__m256i)__A), 16)); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512 + +#endif diff --git a/lib/include/avx512bitalgintrin.h b/lib/include/avx512bitalgintrin.h index 56046f8c49..d4411d156b 100644 --- a/lib/include/avx512bitalgintrin.h +++ b/lib/include/avx512bitalgintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512bwintrin.h b/lib/include/avx512bwintrin.h index a90a255376..cb2e07619c 100644 --- a/lib/include/avx512bwintrin.h +++ b/lib/include/avx512bwintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512bwintrin.h - AVX512BW intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -719,11 +705,7 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu8 (__m512i __A, __m512i __B) { - typedef unsigned short __v64hu __attribute__((__vector_size__(128))); - return (__m512i)__builtin_convertvector( - ((__builtin_convertvector((__v64qu) __A, __v64hu) + - __builtin_convertvector((__v64qu) __B, __v64hu)) + 1) - >> 1, __v64qu); + return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -746,11 +728,7 @@ _mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B) static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_avg_epu16 (__m512i __A, __m512i __B) { - typedef unsigned int __v32su __attribute__((__vector_size__(128))); - return (__m512i)__builtin_convertvector( - ((__builtin_convertvector((__v32hu) __A, __v32su) + - __builtin_convertvector((__v32hu) __B, __v32su)) + 1) - >> 1, __v32hu); + return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B); } static __inline__ __m512i __DEFAULT_FN_ATTRS512 @@ -1733,14 +1711,14 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A) (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd (__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS512 +static __inline__ __mmask32 __DEFAULT_FN_ATTRS _mm512_kunpackw (__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, @@ -1751,7 +1729,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi16 (void const *__P) { struct __loadu_epi16 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi16*)__P)->__v; } @@ -1777,7 +1755,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi8 (void const *__P) { struct __loadu_epi8 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi8*)__P)->__v; } @@ -1803,7 +1781,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi16 (void *__P, __m512i __A) { struct __storeu_epi16 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } @@ -1820,7 +1798,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi8 (void *__P, __m512i __A) { struct __storeu_epi8 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } diff --git a/lib/include/avx512cdintrin.h b/lib/include/avx512cdintrin.h index e63902743c..bfdba84aa2 100644 --- a/lib/include/avx512cdintrin.h +++ b/lib/include/avx512cdintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512cdintrin.h - AVX512CD intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -34,49 +20,45 @@ static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) { - return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_si512 (), - (__mmask8) -1); + return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, - (__v8di) __W, - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_conflict_epi64(__A), + (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vpconflictdi_512_mask ((__v8di) __A, - (__v8di) _mm512_setzero_si512 (), - (__mmask8) __U); + return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, + (__v8di)_mm512_conflict_epi64(__A), + (__v8di)_mm512_setzero_si512 ()); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi32 (__m512i __A) { - return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_si512 (), - (__mmask16) -1); + return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, - (__v16si) __W, - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_conflict_epi32(__A), + (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) { - return (__m512i) __builtin_ia32_vpconflictsi_512_mask ((__v16si) __A, - (__v16si) _mm512_setzero_si512 (), - (__mmask16) __U); + return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, + (__v16si)_mm512_conflict_epi32(__A), + (__v16si)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS diff --git a/lib/include/avx512dqintrin.h b/lib/include/avx512dqintrin.h index 6e6c293af2..337256c50f 100644 --- a/lib/include/avx512dqintrin.h +++ b/lib/include/avx512dqintrin.h @@ -1,22 +1,8 @@ /*===---- avx512dqintrin.h - AVX512DQ intrinsics ---------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512erintrin.h b/lib/include/avx512erintrin.h index 6348275c8d..8570061699 100644 --- a/lib/include/avx512erintrin.h +++ b/lib/include/avx512erintrin.h @@ -1,22 +1,8 @@ /*===---- avx512erintrin.h - AVX512ER intrinsics ---------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512fintrin.h b/lib/include/avx512fintrin.h index 1c19993ff1..132761f9ef 100644 --- a/lib/include/avx512fintrin.h +++ b/lib/include/avx512fintrin.h @@ -1,22 +1,8 @@ /*===---- avx512fintrin.h - AVX512F intrinsics -----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -40,9 +26,13 @@ typedef unsigned short __v32hu __attribute__((__vector_size__(64))); typedef unsigned long long __v8du __attribute__((__vector_size__(64))); typedef unsigned int __v16su __attribute__((__vector_size__(64))); -typedef float __m512 __attribute__((__vector_size__(64))); -typedef double __m512d __attribute__((__vector_size__(64))); -typedef long long __m512i __attribute__((__vector_size__(64))); +typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64))); +typedef double __m512d __attribute__((__vector_size__(64), __aligned__(64))); +typedef long long __m512i __attribute__((__vector_size__(64), __aligned__(64))); + +typedef float __m512_u __attribute__((__vector_size__(64), __aligned__(1))); +typedef double __m512d_u __attribute__((__vector_size__(64), __aligned__(1))); +typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1))); typedef unsigned char __mmask8; typedef unsigned short __mmask16; @@ -1991,12 +1981,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_add_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_add_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_add_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_add_round_ps(A, B, R) \ (__m512)__builtin_ia32_addps512((__v16sf)(__m512)(A), \ @@ -2005,12 +1995,12 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_add_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_add_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2106,12 +2096,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_sub_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_sub_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_sub_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_sub_round_ps(A, B, R) \ (__m512)__builtin_ia32_subps512((__v16sf)(__m512)(A), \ @@ -2120,12 +2110,12 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_sub_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_sub_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2221,12 +2211,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_mul_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_mul_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_mul_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_mul_round_ps(A, B, R) \ (__m512)__builtin_ia32_mulps512((__v16sf)(__m512)(A), \ @@ -2235,12 +2225,12 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_mul_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_mul_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { @@ -2349,12 +2339,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_div_round_pd(W, U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)(__m512d)(W)); + (__v8df)(__m512d)(W)) #define _mm512_maskz_div_round_pd(U, A, B, R) \ (__m512d)__builtin_ia32_selectpd_512((__mmask8)(U), \ (__v8df)_mm512_div_round_pd((A), (B), (R)), \ - (__v8df)_mm512_setzero_pd()); + (__v8df)_mm512_setzero_pd()) #define _mm512_div_round_ps(A, B, R) \ (__m512)__builtin_ia32_divps512((__v16sf)(__m512)(A), \ @@ -2363,12 +2353,12 @@ _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { #define _mm512_mask_div_round_ps(W, U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)(__m512)(W)); + (__v16sf)(__m512)(W)) #define _mm512_maskz_div_round_ps(U, A, B, R) \ (__m512)__builtin_ia32_selectps_512((__mmask16)(U), \ (__v16sf)_mm512_div_round_ps((A), (B), (R)), \ - (__v16sf)_mm512_setzero_ps()); + (__v16sf)_mm512_setzero_ps()) #define _mm512_roundscale_ps(A, B) \ (__m512)__builtin_ia32_rndscaleps_mask((__v16sf)(__m512)(A), (int)(B), \ @@ -3789,20 +3779,9 @@ _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(W)) -#define _mm512_cvtps_ph(A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)-1) - -#define _mm512_mask_cvtps_ph(U, W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)(__m256i)(U), \ - (__mmask16)(W)) - -#define _mm512_maskz_cvtps_ph(W, A, I) \ - (__m256i)__builtin_ia32_vcvtps2ph512_mask((__v16sf)(__m512)(A), (int)(I), \ - (__v16hi)_mm256_setzero_si256(), \ - (__mmask16)(W)) +#define _mm512_cvtps_ph _mm512_cvt_roundps_ph +#define _mm512_mask_cvtps_ph _mm512_mask_cvt_roundps_ph +#define _mm512_maskz_cvtps_ph _mm512_maskz_cvt_roundps_ph #define _mm512_cvt_roundph_ps(A, R) \ (__m512)__builtin_ia32_vcvtph2ps512_mask((__v16hi)(__m256i)(A), \ @@ -4324,7 +4303,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512 (void const *__P) { struct __loadu_si512 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si512*)__P)->__v; } @@ -4333,7 +4312,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi32 (void const *__P) { struct __loadu_epi32 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi32*)__P)->__v; } @@ -4360,7 +4339,7 @@ static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_epi64 (void const *__P) { struct __loadu_epi64 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi64*)__P)->__v; } @@ -4420,7 +4399,7 @@ static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p) { struct __loadu_pd { - __m512d __v; + __m512d_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__p)->__v; } @@ -4429,7 +4408,7 @@ static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p) { struct __loadu_ps { - __m512 __v; + __m512_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } @@ -4504,7 +4483,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi64 (void *__P, __m512i __A) { struct __storeu_epi64 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } @@ -4520,7 +4499,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512 (void *__P, __m512i __A) { struct __storeu_si512 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si512*)__P)->__v = __A; } @@ -4529,7 +4508,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_epi32 (void *__P, __m512i __A) { struct __storeu_epi32 { - __m512i __v; + __m512i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } @@ -4551,7 +4530,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A) { struct __storeu_pd { - __m512d __v; + __m512d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__P)->__v = __A; } @@ -4567,7 +4546,7 @@ static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A) { struct __storeu_ps { - __m512 __v; + __m512_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__P)->__v = __A; } @@ -9329,7 +9308,7 @@ _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) __v2du __t6 = __t4 op __t5; \ __v2du __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __v2du __t8 = __t6 op __t7; \ - return __t8[0]; + return __t8[0] static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { _mm512_mask_reduce_operator(+); @@ -9381,7 +9360,7 @@ _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __m128d __t6 = __t4 op __t5; \ __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __m128d __t8 = __t6 op __t7; \ - return __t8[0]; + return __t8[0] static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { _mm512_mask_reduce_operator(+); @@ -9415,7 +9394,7 @@ _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { __v4su __t8 = __t6 op __t7; \ __v4su __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __v4su __t10 = __t8 op __t9; \ - return __t10[0]; + return __t10[0] static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { @@ -9473,7 +9452,7 @@ _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __m128 __t8 = __t6 op __t7; \ __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __m128 __t10 = __t8 op __t9; \ - return __t10[0]; + return __t10[0] static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { @@ -9505,7 +9484,7 @@ _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { __m512i __t4 = _mm512_##op(__t2, __t3); \ __m512i __t5 = (__m512i)__builtin_shufflevector((__v8di)__t4, (__v8di)__t4, 1, 0, 3, 2, 5, 4, 7, 6); \ __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ - return __t6[0]; + return __t6[0] static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { @@ -9563,7 +9542,7 @@ _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __m128i __t8 = _mm_##op(__t6, __t7); \ __m128i __t9 = (__m128i)__builtin_shufflevector((__v4si)__t8, (__v4si)__t8, 1, 0, 3, 2); \ __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ - return __t10[0]; + return __t10[0] static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { @@ -9619,7 +9598,7 @@ _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __m128d __t6 = _mm_##op(__t4, __t5); \ __m128d __t7 = __builtin_shufflevector(__t6, __t6, 1, 0); \ __m128d __t8 = _mm_##op(__t6, __t7); \ - return __t8[0]; + return __t8[0] static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V) { @@ -9655,7 +9634,7 @@ _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { __m128 __t8 = _mm_##op(__t6, __t7); \ __m128 __t9 = __builtin_shufflevector(__t8, __t8, 1, 0, 3, 2); \ __m128 __t10 = _mm_##op(__t8, __t9); \ - return __t10[0]; + return __t10[0] static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V) { diff --git a/lib/include/avx512ifmaintrin.h b/lib/include/avx512ifmaintrin.h index 159713049c..5f7da52f1f 100644 --- a/lib/include/avx512ifmaintrin.h +++ b/lib/include/avx512ifmaintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512ifmaintrin.h - IFMA intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512ifmavlintrin.h b/lib/include/avx512ifmavlintrin.h index afdea888c5..5889401d10 100644 --- a/lib/include/avx512ifmavlintrin.h +++ b/lib/include/avx512ifmavlintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512ifmavlintrin.h - IFMA intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512pfintrin.h b/lib/include/avx512pfintrin.h index 73b2234fb4..b8bcf49c6b 100644 --- a/lib/include/avx512pfintrin.h +++ b/lib/include/avx512pfintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512pfintrin.h - PF intrinsics ------------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vbmi2intrin.h b/lib/include/avx512vbmi2intrin.h index 5324252429..a23144616c 100644 --- a/lib/include/avx512vbmi2intrin.h +++ b/lib/include/avx512vbmi2intrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vbmi2intrin.h - VBMI2 intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vbmiintrin.h b/lib/include/avx512vbmiintrin.h index 5463d90155..c0e0f94d48 100644 --- a/lib/include/avx512vbmiintrin.h +++ b/lib/include/avx512vbmiintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vbmiintrin.h - VBMI intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vbmivlintrin.h b/lib/include/avx512vbmivlintrin.h index b5d5aa9af5..c5b96ae8ad 100644 --- a/lib/include/avx512vbmivlintrin.h +++ b/lib/include/avx512vbmivlintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vbmivlintrin.h - VBMI intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vlbf16intrin.h b/lib/include/avx512vlbf16intrin.h new file mode 100644 index 0000000000..1b1a744bcd --- /dev/null +++ b/lib/include/avx512vlbf16intrin.h @@ -0,0 +1,474 @@ +/*===--------- avx512vlbf16intrin.h - AVX512_BF16 intrinsics ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLBF16INTRIN_H +#define __AVX512VLBF16INTRIN_H + +typedef short __m128bh __attribute__((__vector_size__(16), __aligned__(16))); + +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl, avx512bf16"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl, avx512bf16"), __min_vector_width__(256))) + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __B, and higher 64 bits come from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_cvtne2ps_pbh(__m128 __A, __m128 __B) { + return (__m128bh)__builtin_ia32_cvtne2ps2bf16_128((__v4sf) __A, + (__v4sf) __B); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \param __W +/// A 128-bit vector of [8 x bfloat]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element from __W. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __B, and higher 64 bits come from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_cvtne2ps_pbh(__m128bh __W, __mmask8 __U, __m128 __A, __m128 __B) { + return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_cvtne2ps_pbh(__A, __B), + (__v8hi)__W); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __B +/// A 128-bit vector of [4 x float]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element is zero. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __B, and higher 64 bits come from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtne2ps_pbh(__mmask8 __U, __m128 __A, __m128 __B) { + return (__m128bh)__builtin_ia32_selectw_128((__mmask8)__U, + (__v8hi)_mm_cvtne2ps_pbh(__A, __B), + (__v8hi)_mm_setzero_si128()); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from +/// conversion of __B, and higher 128 bits come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_cvtne2ps_pbh(__m256 __A, __m256 __B) { + return (__m256bh)__builtin_ia32_cvtne2ps2bf16_256((__v8sf) __A, + (__v8sf) __B); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __W +/// A 256-bit vector of [16 x bfloat]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element from __W. +/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from +/// conversion of __B, and higher 128 bits come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtne2ps_pbh(__m256bh __W, __mmask16 __U, __m256 __A, __m256 __B) { + return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_cvtne2ps_pbh(__A, __B), + (__v16hi)__W); +} + +/// Convert Two Packed Single Data to One Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNE2PS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __B +/// A 256-bit vector of [8 x float]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A or __B. A 0 means element is zero. +/// \returns A 256-bit vector of [16 x bfloat] whose lower 128 bits come from +/// conversion of __B, and higher 128 bits come from conversion of __A. +static __inline__ __m256bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtne2ps_pbh(__mmask16 __U, __m256 __A, __m256 __B) { + return (__m256bh)__builtin_ia32_selectw_256((__mmask16)__U, + (__v16hi)_mm256_cvtne2ps_pbh(__A, __B), + (__v16hi)_mm256_setzero_si256()); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __A, and higher 64 bits are 0. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_cvtneps_pbh(__m128 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8)-1); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __W +/// A 128-bit vector of [8 x bfloat]. +/// \param __U +/// A 4-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element from __W. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __A, and higher 64 bits are 0. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m128 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + (__v8hi)__W, + (__mmask8)__U); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 128-bit vector of [4 x float]. +/// \param __U +/// A 4-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element is zero. +/// \returns A 128-bit vector of [8 x bfloat] whose lower 64 bits come from +/// conversion of __A, and higher 64 bits are 0. +static __inline__ __m128bh __DEFAULT_FN_ATTRS128 +_mm_maskz_cvtneps_pbh(__mmask8 __U, __m128 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_128_mask((__v4sf) __A, + (__v8hi)_mm_setzero_si128(), + (__mmask8)__U); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_cvtneps_pbh(__m256 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)_mm_undefined_si128(), + (__mmask8)-1); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __W +/// A 256-bit vector of [8 x bfloat]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element from __W. +/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtneps_pbh(__m128bh __W, __mmask8 __U, __m256 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)__W, + (__mmask8)__U); +} + +/// Convert Packed Single Data to Packed BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A 256-bit vector of [8 x float]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means conversion of __A. A 0 means element is zero. +/// \returns A 128-bit vector of [8 x bfloat] comes from conversion of __A. +static __inline__ __m128bh __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtneps_pbh(__mmask8 __U, __m256 __A) { + return (__m128bh)__builtin_ia32_cvtneps2bf16_256_mask((__v8sf)__A, + (__v8hi)_mm_setzero_si128(), + (__mmask8)__U); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \param __B +/// A 128-bit vector of [8 x bfloat]. +/// \param __D +/// A 128-bit vector of [4 x float]. +/// \returns A 128-bit vector of [4 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_dpbf16_ps(__m128 __D, __m128bh __A, __m128bh __B) { + return (__m128)__builtin_ia32_dpbf16ps_128((__v4sf)__D, + (__v4si)__A, + (__v4si)__B); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \param __B +/// A 128-bit vector of [8 x bfloat]. +/// \param __D +/// A 128-bit vector of [4 x float]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. +/// \returns A 128-bit vector of [4 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_mask_dpbf16_ps(__m128 __D, __mmask8 __U, __m128bh __A, __m128bh __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_dpbf16_ps(__D, __A, __B), + (__v4sf)__D); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \param __B +/// A 128-bit vector of [8 x bfloat]. +/// \param __D +/// A 128-bit vector of [4 x float]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. +/// \returns A 128-bit vector of [4 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m128 __DEFAULT_FN_ATTRS128 +_mm_maskz_dpbf16_ps(__mmask8 __U, __m128 __D, __m128bh __A, __m128bh __B) { + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm_dpbf16_ps(__D, __A, __B), + (__v4sf)_mm_setzero_si128()); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \param __B +/// A 256-bit vector of [16 x bfloat]. +/// \param __D +/// A 256-bit vector of [8 x float]. +/// \returns A 256-bit vector of [8 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_dpbf16_ps(__m256 __D, __m256bh __A, __m256bh __B) { + return (__m256)__builtin_ia32_dpbf16ps_256((__v8sf)__D, + (__v8si)__A, + (__v8si)__B); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \param __B +/// A 256-bit vector of [16 x bfloat]. +/// \param __D +/// A 256-bit vector of [8 x float]. +/// \param __U +/// A 16-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means __D. +/// \returns A 256-bit vector of [8 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_mask_dpbf16_ps(__m256 __D, __mmask8 __U, __m256bh __A, __m256bh __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), + (__v8sf)__D); +} + +/// Dot Product of BF16 Pairs Accumulated into Packed Single Precision. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VDPBF16PS instructions. +/// +/// \param __A +/// A 256-bit vector of [16 x bfloat]. +/// \param __B +/// A 256-bit vector of [16 x bfloat]. +/// \param __D +/// A 256-bit vector of [8 x float]. +/// \param __U +/// A 8-bit mask value specifying what is chosen for each element. +/// A 1 means __A and __B's dot product accumulated with __D. A 0 means 0. +/// \returns A 256-bit vector of [8 x float] comes from Dot Product of +/// __A, __B and __D +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_dpbf16_ps(__mmask8 __U, __m256 __D, __m256bh __A, __m256bh __B) { + return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, + (__v8sf)_mm256_dpbf16_ps(__D, __A, __B), + (__v8sf)_mm256_setzero_si256()); +} + +/// Convert One Single float Data to One BF16 Data. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VCVTNEPS2BF16 instructions. +/// +/// \param __A +/// A float data. +/// \returns A bf16 data whose sign field and exponent field keep unchanged, +/// and fraction field is truncated to 7 bits. +static __inline__ __bfloat16 __DEFAULT_FN_ATTRS128 _mm_cvtness_sbh(float __A) { + __v4sf __V = {__A, 0, 0, 0}; + __v8hi __R = __builtin_ia32_cvtneps2bf16_128_mask( + (__v4sf)__V, (__v8hi)_mm_undefined_si128(), (__mmask8)-1); + return __R[0]; +} + +/// Convert Packed BF16 Data to Packed float Data. +/// +/// \headerfile +/// +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \returns A 256-bit vector of [8 x float] come from convertion of __A +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtpbh_ps(__m128bh __A) { + return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( + (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), 16)); +} + +/// Convert Packed BF16 Data to Packed float Data using zeroing mask. +/// +/// \headerfile +/// +/// \param __U +/// A 8-bit mask. Elements are zeroed out when the corresponding mask +/// bit is not set. +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \returns A 256-bit vector of [8 x float] come from convertion of __A +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_maskz_cvtpbh_ps(__mmask8 __U, __m128bh __A) { + return _mm256_castsi256_ps((__m256i)_mm256_slli_epi32( + (__m256i)_mm256_maskz_cvtepi16_epi32((__mmask8)__U, (__m128i)__A), 16)); +} + +/// Convert Packed BF16 Data to Packed float Data using merging mask. +/// +/// \headerfile +/// +/// \param __S +/// A 256-bit vector of [8 x float]. Elements are copied from __S when +/// the corresponding mask bit is not set. +/// \param __U +/// A 8-bit mask. Elements are zeroed out when the corresponding mask +/// bit is not set. +/// \param __A +/// A 128-bit vector of [8 x bfloat]. +/// \returns A 256-bit vector of [8 x float] come from convertion of __A +static __inline__ __m256 __DEFAULT_FN_ATTRS256 +_mm256_mask_cvtpbh_ps(__m256 __S, __mmask8 __U, __m128bh __A) { + return _mm256_castsi256_ps((__m256i)_mm256_mask_slli_epi32( + (__m256i)__S, (__mmask8)__U, (__m256i)_mm256_cvtepi16_epi32((__m128i)__A), + 16)); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/include/avx512vlbitalgintrin.h b/lib/include/avx512vlbitalgintrin.h index 64860b2925..5154eae14c 100644 --- a/lib/include/avx512vlbitalgintrin.h +++ b/lib/include/avx512vlbitalgintrin.h @@ -1,23 +1,9 @@ /*===---- avx512vlbitalgintrin.h - BITALG intrinsics -----------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vlbwintrin.h b/lib/include/avx512vlbwintrin.h index 87e0023e8b..ead09466bc 100644 --- a/lib/include/avx512vlbwintrin.h +++ b/lib/include/avx512vlbwintrin.h @@ -1,22 +1,8 @@ /*===---- avx512vlbwintrin.h - AVX512VL and AVX512BW intrinsics ------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -2301,7 +2287,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi16 (void const *__P) { struct __loadu_epi16 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi16*)__P)->__v; } @@ -2327,7 +2313,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi16 (void const *__P) { struct __loadu_epi16 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi16*)__P)->__v; } @@ -2353,7 +2339,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi8 (void const *__P) { struct __loadu_epi8 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi8*)__P)->__v; } @@ -2379,7 +2365,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi8 (void const *__P) { struct __loadu_epi8 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi8*)__P)->__v; } @@ -2405,7 +2391,7 @@ static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi16 (void *__P, __m128i __A) { struct __storeu_epi16 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } @@ -2422,7 +2408,7 @@ static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi16 (void *__P, __m256i __A) { struct __storeu_epi16 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi16*)__P)->__v = __A; } @@ -2439,7 +2425,7 @@ static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi8 (void *__P, __m128i __A) { struct __storeu_epi8 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } @@ -2456,7 +2442,7 @@ static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi8 (void *__P, __m256i __A) { struct __storeu_epi8 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi8*)__P)->__v = __A; } diff --git a/lib/include/avx512vlcdintrin.h b/lib/include/avx512vlcdintrin.h index 903a7c2549..cc8b72528d 100644 --- a/lib/include/avx512vlcdintrin.h +++ b/lib/include/avx512vlcdintrin.h @@ -1,22 +1,8 @@ /*===---- avx512vlcdintrin.h - AVX512VL and AVX512CD intrinsics ------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -60,99 +46,89 @@ _mm256_broadcastmw_epi32 (__mmask16 __A) static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi64 (__m128i __A) { - return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, - (__v2di) _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, - (__v2di) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_conflict_epi64(__A), + (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, - (__v2di) - _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, + (__v2di)_mm_conflict_epi64(__A), + (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi64 (__m256i __A) { - return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, - (__v4di) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, - (__v4di) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_conflict_epi64(__A), + (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, - (__v4di) _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, + (__v4di)_mm256_conflict_epi64(__A), + (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi32 (__m128i __A) { - return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, - (__v4si) _mm_undefined_si128 (), - (__mmask8) -1); + return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, - (__v4si) __W, - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_conflict_epi32(__A), + (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) { - return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, - (__v4si) _mm_setzero_si128 (), - (__mmask8) __U); + return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, + (__v4si)_mm_conflict_epi32(__A), + (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi32 (__m256i __A) { - return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, - (__v8si) _mm256_undefined_si256 (), - (__mmask8) -1); + return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, - (__v8si) __W, - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_conflict_epi32(__A), + (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) { - return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, - (__v8si) - _mm256_setzero_si256 (), - (__mmask8) __U); + return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, + (__v8si)_mm256_conflict_epi32(__A), + (__v8si)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 diff --git a/lib/include/avx512vldqintrin.h b/lib/include/avx512vldqintrin.h index 9d13846e89..95ba574ea8 100644 --- a/lib/include/avx512vldqintrin.h +++ b/lib/include/avx512vldqintrin.h @@ -1,22 +1,8 @@ /*===---- avx512vldqintrin.h - AVX512VL and AVX512DQ intrinsics ------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -523,23 +509,21 @@ _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { - return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, - (__v4sf) _mm_setzero_ps(), - (__mmask8) -1); + return (__m128)__builtin_convertvector((__v4di)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { - return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm256_cvtepi64_ps(__A), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { - return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, - (__v4sf) _mm_setzero_ps(), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm256_cvtepi64_ps(__A), + (__v4sf)_mm_setzero_ps()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 @@ -771,23 +755,21 @@ _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { - return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, - (__v4sf) _mm_setzero_ps(), - (__mmask8) -1); + return (__m128)__builtin_convertvector((__v4du)__A, __v4sf); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { - return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, - (__v4sf) __W, - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm256_cvtepu64_ps(__A), + (__v4sf)__W); } static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { - return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, - (__v4sf) _mm_setzero_ps(), - (__mmask8) __U); + return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, + (__v4sf)_mm256_cvtepu64_ps(__A), + (__v4sf)_mm_setzero_ps()); } #define _mm_range_pd(A, B, C) \ diff --git a/lib/include/avx512vlintrin.h b/lib/include/avx512vlintrin.h index a2cdc0a96e..9494fc8a6e 100644 --- a/lib/include/avx512vlintrin.h +++ b/lib/include/avx512vlintrin.h @@ -1,22 +1,8 @@ /*===---- avx512vlintrin.h - AVX512VL intrinsics ---------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -5513,7 +5499,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi64 (void const *__P) { struct __loadu_epi64 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi64*)__P)->__v; } @@ -5539,7 +5525,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi64 (void const *__P) { struct __loadu_epi64 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi64*)__P)->__v; } @@ -5565,7 +5551,7 @@ static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_loadu_epi32 (void const *__P) { struct __loadu_epi32 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi32*)__P)->__v; } @@ -5591,7 +5577,7 @@ static __inline __m256i __DEFAULT_FN_ATTRS256 _mm256_loadu_epi32 (void const *__P) { struct __loadu_epi32 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_epi32*)__P)->__v; } @@ -5717,7 +5703,7 @@ static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi64 (void *__P, __m128i __A) { struct __storeu_epi64 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } @@ -5734,7 +5720,7 @@ static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi64 (void *__P, __m256i __A) { struct __storeu_epi64 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi64*)__P)->__v = __A; } @@ -5751,7 +5737,7 @@ static __inline void __DEFAULT_FN_ATTRS128 _mm_storeu_epi32 (void *__P, __m128i __A) { struct __storeu_epi32 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } @@ -5768,7 +5754,7 @@ static __inline void __DEFAULT_FN_ATTRS256 _mm256_storeu_epi32 (void *__P, __m256i __A) { struct __storeu_epi32 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_epi32*)__P)->__v = __A; } @@ -7000,7 +6986,7 @@ _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128 +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -7023,7 +7009,7 @@ _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS128 +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); @@ -7581,7 +7567,7 @@ _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) __M); } -static __inline__ void __DEFAULT_FN_ATTRS256 +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); @@ -8425,22 +8411,6 @@ _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) -{ - return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, - (__v8hi) __W, - (__mmask8) __U); -} - -static __inline __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) -{ - return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, - (__v8hi) _mm_setzero_si128 (), - (__mmask8) __U); -} - #define _mm_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph_mask((__v4sf)(__m128)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ @@ -8451,21 +8421,9 @@ _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)) -static __inline __m128i __DEFAULT_FN_ATTRS256 -_mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) -{ - return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, - (__v8hi) __W, - (__mmask8) __U); -} +#define _mm_mask_cvtps_ph _mm_mask_cvt_roundps_ph +#define _mm_maskz_cvtps_ph _mm_maskz_cvt_roundps_ph -static __inline __m128i __DEFAULT_FN_ATTRS256 -_mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) -{ - return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, - (__v8hi) _mm_setzero_si128(), - (__mmask8) __U); -} #define _mm256_mask_cvt_roundps_ph(W, U, A, I) \ (__m128i)__builtin_ia32_vcvtps2ph256_mask((__v8sf)(__m256)(A), (int)(I), \ (__v8hi)(__m128i)(W), \ @@ -8476,6 +8434,9 @@ _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)) +#define _mm256_mask_cvtps_ph _mm256_mask_cvt_roundps_ph +#define _mm256_maskz_cvtps_ph _mm256_maskz_cvt_roundps_ph + #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 diff --git a/lib/include/avx512vlvbmi2intrin.h b/lib/include/avx512vlvbmi2intrin.h index 632d14fb55..a40f926de7 100644 --- a/lib/include/avx512vlvbmi2intrin.h +++ b/lib/include/avx512vlvbmi2intrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vlvbmi2intrin.h - VBMI2 intrinsics -----------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vlvnniintrin.h b/lib/include/avx512vlvnniintrin.h index 62382268ec..b7c8fa08c6 100644 --- a/lib/include/avx512vlvnniintrin.h +++ b/lib/include/avx512vlvnniintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vlvnniintrin.h - VNNI intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vlvp2intersectintrin.h b/lib/include/avx512vlvp2intersectintrin.h new file mode 100644 index 0000000000..3e0815e5d4 --- /dev/null +++ b/lib/include/avx512vlvp2intersectintrin.h @@ -0,0 +1,121 @@ +/*===------ avx512vlvp2intersectintrin.h - VL VP2INTERSECT intrinsics ------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512VLVP2INTERSECT_H +#define _AVX512VLVP2INTERSECT_H + +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ + __min_vector_width__(128))) + +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vp2intersect"), \ + __min_vector_width__(256))) +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 256-bit vector of [8 x i32]. +/// \param __b +/// A 256-bit vector of [8 x i32] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_2intersect_epi32(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_d_256((__v8si)__a, (__v8si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 256-bit vector of [4 x i64]. +/// \param __b +/// A 256-bit vector of [4 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS256 +_mm256_2intersect_epi64(__m256i __a, __m256i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_256((__v4di)__a, (__v4di)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 128-bit vector of [4 x i32]. +/// \param __b +/// A 128-bit vector of [4 x i32] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_2intersect_epi32(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_d_128((__v4si)__a, (__v4si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 128-bit vector of [2 x i64]. +/// \param __b +/// A 128-bit vector of [2 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS128 +_mm_2intersect_epi64(__m128i __a, __m128i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_128((__v2di)__a, (__v2di)__b, __m0, __m1); +} + +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 + +#endif diff --git a/lib/include/avx512vnniintrin.h b/lib/include/avx512vnniintrin.h index 620ef5a789..9935a119aa 100644 --- a/lib/include/avx512vnniintrin.h +++ b/lib/include/avx512vnniintrin.h @@ -1,23 +1,9 @@ /*===------------- avx512vnniintrin.h - VNNI intrinsics ------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vp2intersectintrin.h b/lib/include/avx512vp2intersectintrin.h new file mode 100644 index 0000000000..5d3cb48cfd --- /dev/null +++ b/lib/include/avx512vp2intersectintrin.h @@ -0,0 +1,77 @@ +/*===------- avx512vpintersectintrin.h - VP2INTERSECT intrinsics ------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef _AVX512VP2INTERSECT_H +#define _AVX512VP2INTERSECT_H + +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vp2intersect"), \ + __min_vector_width__(512))) + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between dwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTD instruction. +/// +/// \param __a +/// A 512-bit vector of [16 x i32]. +/// \param __b +/// A 512-bit vector of [16 x i32] +/// \param __m0 +/// A pointer point to 16-bit mask +/// \param __m1 +/// A pointer point to 16-bit mask +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_2intersect_epi32(__m512i __a, __m512i __b, __mmask16 *__m0, __mmask16 *__m1) { + __builtin_ia32_vp2intersect_d_512((__v16si)__a, (__v16si)__b, __m0, __m1); +} + +/// Store, in an even/odd pair of mask registers, the indicators of the +/// locations of value matches between quadwords in operands __a and __b. +/// +/// \headerfile +/// +/// This intrinsic corresponds to the VP2INTERSECTQ instruction. +/// +/// \param __a +/// A 512-bit vector of [8 x i64]. +/// \param __b +/// A 512-bit vector of [8 x i64] +/// \param __m0 +/// A pointer point to 8-bit mask +/// \param __m1 +/// A pointer point to 8-bit mask +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_2intersect_epi64(__m512i __a, __m512i __b, __mmask8 *__m0, __mmask8 *__m1) { + __builtin_ia32_vp2intersect_q_512((__v8di)__a, (__v8di)__b, __m0, __m1); +} + +#undef __DEFAULT_FN_ATTRS + +#endif diff --git a/lib/include/avx512vpopcntdqintrin.h b/lib/include/avx512vpopcntdqintrin.h index c99f594569..bb435e6233 100644 --- a/lib/include/avx512vpopcntdqintrin.h +++ b/lib/include/avx512vpopcntdqintrin.h @@ -1,23 +1,9 @@ /*===----- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics-------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avx512vpopcntdqvlintrin.h b/lib/include/avx512vpopcntdqvlintrin.h index 681a75fa07..a3cb9b6bcc 100644 --- a/lib/include/avx512vpopcntdqvlintrin.h +++ b/lib/include/avx512vpopcntdqvlintrin.h @@ -1,23 +1,9 @@ /*===---- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics -------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/avxintrin.h b/lib/include/avxintrin.h index cb15396b3f..a01240b9d1 100644 --- a/lib/include/avxintrin.h +++ b/lib/include/avxintrin.h @@ -1,22 +1,8 @@ /*===---- avxintrin.h - AVX intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -45,9 +31,13 @@ typedef unsigned char __v32qu __attribute__ ((__vector_size__ (32))); * appear in the interface though. */ typedef signed char __v32qs __attribute__((__vector_size__(32))); -typedef float __m256 __attribute__ ((__vector_size__ (32))); -typedef double __m256d __attribute__((__vector_size__(32))); -typedef long long __m256i __attribute__((__vector_size__(32))); +typedef float __m256 __attribute__ ((__vector_size__ (32), __aligned__(32))); +typedef double __m256d __attribute__((__vector_size__(32), __aligned__(32))); +typedef long long __m256i __attribute__((__vector_size__(32), __aligned__(32))); + +typedef float __m256_u __attribute__ ((__vector_size__ (32), __aligned__(1))); +typedef double __m256d_u __attribute__((__vector_size__(32), __aligned__(1))); +typedef long long __m256i_u __attribute__((__vector_size__(32), __aligned__(1))); /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) @@ -3113,7 +3103,7 @@ static __inline __m256d __DEFAULT_FN_ATTRS _mm256_loadu_pd(double const *__p) { struct __loadu_pd { - __m256d __v; + __m256d_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__p)->__v; } @@ -3133,7 +3123,7 @@ static __inline __m256 __DEFAULT_FN_ATTRS _mm256_loadu_ps(float const *__p) { struct __loadu_ps { - __m256 __v; + __m256_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } @@ -3166,10 +3156,10 @@ _mm256_load_si256(__m256i const *__p) /// A pointer to a 256-bit integer vector containing integer values. /// \returns A 256-bit integer vector containing the moved values. static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_loadu_si256(__m256i const *__p) +_mm256_loadu_si256(__m256i_u const *__p) { struct __loadu_si256 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si256*)__p)->__v; } @@ -3246,7 +3236,7 @@ static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_pd(double *__p, __m256d __a) { struct __storeu_pd { - __m256d __v; + __m256d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__p)->__v = __a; } @@ -3266,7 +3256,7 @@ static __inline void __DEFAULT_FN_ATTRS _mm256_storeu_ps(float *__p, __m256 __a) { struct __storeu_ps { - __m256 __v; + __m256_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } @@ -3301,10 +3291,10 @@ _mm256_store_si256(__m256i *__p, __m256i __a) /// \param __a /// A 256-bit integer vector containing the values to be moved. static __inline void __DEFAULT_FN_ATTRS -_mm256_storeu_si256(__m256i *__p, __m256i __a) +_mm256_storeu_si256(__m256i_u *__p, __m256i __a) { struct __storeu_si256 { - __m256i __v; + __m256i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si256*)__p)->__v = __a; } @@ -4834,7 +4824,7 @@ _mm256_loadu2_m128d(double const *__addr_hi, double const *__addr_lo) /// address of the memory location does not have to be aligned. /// \returns A 256-bit integer vector containing the concatenated result. static __inline __m256i __DEFAULT_FN_ATTRS -_mm256_loadu2_m128i(__m128i const *__addr_hi, __m128i const *__addr_lo) +_mm256_loadu2_m128i(__m128i_u const *__addr_hi, __m128i_u const *__addr_lo) { __m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo)); return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1); @@ -4918,7 +4908,7 @@ _mm256_storeu2_m128d(double *__addr_hi, double *__addr_lo, __m256d __a) /// \param __a /// A 256-bit integer vector. static __inline void __DEFAULT_FN_ATTRS -_mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo, __m256i __a) +_mm256_storeu2_m128i(__m128i_u *__addr_hi, __m128i_u *__addr_lo, __m256i __a) { __m128i __v128; diff --git a/lib/include/bmi2intrin.h b/lib/include/bmi2intrin.h index fdae82cf2b..0b56aed5f4 100644 --- a/lib/include/bmi2intrin.h +++ b/lib/include/bmi2intrin.h @@ -1,22 +1,8 @@ /*===---- bmi2intrin.h - BMI2 intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/bmiintrin.h b/lib/include/bmiintrin.h index 56c20b78d3..b7af62f609 100644 --- a/lib/include/bmiintrin.h +++ b/lib/include/bmiintrin.h @@ -1,22 +1,8 @@ /*===---- bmiintrin.h - BMI intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/cetintrin.h b/lib/include/cetintrin.h index 120c95424d..4290e9d735 100644 --- a/lib/include/cetintrin.h +++ b/lib/include/cetintrin.h @@ -1,22 +1,8 @@ /*===---- cetintrin.h - CET intrinsic --------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/cldemoteintrin.h b/lib/include/cldemoteintrin.h index fa78148ebf..2413e7dea7 100644 --- a/lib/include/cldemoteintrin.h +++ b/lib/include/cldemoteintrin.h @@ -1,22 +1,8 @@ /*===---- cldemoteintrin.h - CLDEMOTE intrinsic ----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/clflushoptintrin.h b/lib/include/clflushoptintrin.h index 79bb4589fc..060eb36f30 100644 --- a/lib/include/clflushoptintrin.h +++ b/lib/include/clflushoptintrin.h @@ -1,22 +1,8 @@ /*===---- clflushoptintrin.h - CLFLUSHOPT intrinsic ------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/clwbintrin.h b/lib/include/clwbintrin.h index c09286ba67..3360d203f7 100644 --- a/lib/include/clwbintrin.h +++ b/lib/include/clwbintrin.h @@ -1,22 +1,8 @@ /*===---- clwbintrin.h - CLWB intrinsic ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/clzerointrin.h b/lib/include/clzerointrin.h index f4e920839b..a180984a3f 100644 --- a/lib/include/clzerointrin.h +++ b/lib/include/clzerointrin.h @@ -1,22 +1,8 @@ /*===----------------------- clzerointrin.h - CLZERO ----------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/cpuid.h b/lib/include/cpuid.h index fce6af52dd..02ffac26c0 100644 --- a/lib/include/cpuid.h +++ b/lib/include/cpuid.h @@ -1,22 +1,8 @@ /*===---- cpuid.h - X86 cpu model detection --------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -191,6 +177,7 @@ #define bit_CLDEMOTE 0x02000000 #define bit_MOVDIRI 0x08000000 #define bit_MOVDIR64B 0x10000000 +#define bit_ENQCMD 0x20000000 /* Features in %edx for leaf 7 sub-leaf 0 */ #define bit_AVX5124VNNIW 0x00000004 @@ -198,6 +185,9 @@ #define bit_PCONFIG 0x00040000 #define bit_IBT 0x00100000 +/* Features in %eax for leaf 7 sub-leaf 1 */ +#define bit_AVX512BF16 0x00000020 + /* Features in %eax for leaf 13 sub-leaf 1 */ #define bit_XSAVEOPT 0x00000001 #define bit_XSAVEC 0x00000002 diff --git a/lib/include/emmintrin.h b/lib/include/emmintrin.h index 6d61f97199..3d55f5f271 100644 --- a/lib/include/emmintrin.h +++ b/lib/include/emmintrin.h @@ -1,22 +1,8 @@ /*===---- emmintrin.h - SSE2 intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -26,8 +12,11 @@ #include -typedef double __m128d __attribute__((__vector_size__(16))); -typedef long long __m128i __attribute__((__vector_size__(16))); +typedef double __m128d __attribute__((__vector_size__(16), __aligned__(16))); +typedef long long __m128i __attribute__((__vector_size__(16), __aligned__(16))); + +typedef double __m128d_u __attribute__((__vector_size__(16), __aligned__(1))); +typedef long long __m128i_u __attribute__((__vector_size__(16), __aligned__(1))); /* Type defines. */ typedef double __v2df __attribute__ ((__vector_size__ (16))); @@ -1652,7 +1641,7 @@ static __inline__ __m128d __DEFAULT_FN_ATTRS _mm_loadu_pd(double const *__dp) { struct __loadu_pd { - __m128d __v; + __m128d_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_pd*)__dp)->__v; } @@ -2042,7 +2031,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_pd(double *__dp, __m128d __a) { struct __storeu_pd { - __m128d __v; + __m128d_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_pd*)__dp)->__v = __a; } @@ -2316,11 +2305,7 @@ _mm_adds_epu16(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a, __m128i __b) { - typedef unsigned short __v16hu __attribute__ ((__vector_size__ (32))); - return (__m128i)__builtin_convertvector( - ((__builtin_convertvector((__v16qu)__a, __v16hu) + - __builtin_convertvector((__v16qu)__b, __v16hu)) + 1) - >> 1, __v16qu); + return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b); } /// Computes the rounded avarages of corresponding elements of two @@ -2340,11 +2325,7 @@ _mm_avg_epu8(__m128i __a, __m128i __b) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a, __m128i __b) { - typedef unsigned int __v8su __attribute__ ((__vector_size__ (32))); - return (__m128i)__builtin_convertvector( - ((__builtin_convertvector((__v8hu)__a, __v8su) + - __builtin_convertvector((__v8hu)__b, __v8su)) + 1) - >> 1, __v8hu); + return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b); } /// Multiplies the corresponding elements of two 128-bit signed [8 x i16] @@ -3564,10 +3545,10 @@ _mm_load_si128(__m128i const *__p) /// A pointer to a memory location containing integer values. /// \returns A 128-bit integer vector containing the moved values. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_loadu_si128(__m128i const *__p) +_mm_loadu_si128(__m128i_u const *__p) { struct __loadu_si128 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_si128*)__p)->__v; } @@ -3585,7 +3566,7 @@ _mm_loadu_si128(__m128i const *__p) /// \returns A 128-bit vector of [2 x i64]. The lower order bits contain the /// moved value. The higher order bits are cleared. static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_loadl_epi64(__m128i const *__p) +_mm_loadl_epi64(__m128i_u const *__p) { struct __mm_loadl_epi64_struct { long long __u; @@ -4027,10 +4008,10 @@ _mm_store_si128(__m128i *__p, __m128i __b) /// \param __b /// A 128-bit integer vector containing the values to be moved. static __inline__ void __DEFAULT_FN_ATTRS -_mm_storeu_si128(__m128i *__p, __m128i __b) +_mm_storeu_si128(__m128i_u *__p, __m128i __b) { struct __storeu_si128 { - __m128i __v; + __m128i_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_si128*)__p)->__v = __b; } @@ -4139,7 +4120,7 @@ _mm_maskmoveu_si128(__m128i __d, __m128i __n, char *__p) /// A 128-bit integer vector of [2 x i64]. The lower 64 bits contain the /// value to be stored. static __inline__ void __DEFAULT_FN_ATTRS -_mm_storel_epi64(__m128i *__p, __m128i __a) +_mm_storel_epi64(__m128i_u *__p, __m128i __a) { struct __mm_storel_epi64_struct { long long __u; diff --git a/lib/include/enqcmdintrin.h b/lib/include/enqcmdintrin.h new file mode 100644 index 0000000000..30af67f6b4 --- /dev/null +++ b/lib/include/enqcmdintrin.h @@ -0,0 +1,63 @@ +/*===------------------ enqcmdintrin.h - enqcmd intrinsics -----------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __ENQCMDINTRIN_H +#define __ENQCMDINTRIN_H + +/* Define the default attributes for the functions in this file */ +#define _DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("enqcmd"))) + +/// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store +/// data, and performs 64-byte enqueue store to memory pointed by \a __dst. +/// This intrinsics may only be used in User mode. +/// +/// \headerfile +/// +/// This intrinsics corresponds to the ENQCMD instruction. +/// +/// \param __dst +/// Pointer to the destination of the enqueue store. +/// \param __src +/// Pointer to 64-byte command data. +/// \returns If the command data is successfully written to \a __dst then 0 is +/// returned. Otherwise 1 is returned. +static __inline__ int _DEFAULT_FN_ATTRS +_enqcmd (void *__dst, const void *__src) +{ + return __builtin_ia32_enqcmd(__dst, __src); +} + +/// Reads 64-byte command pointed by \a __src, formats 64-byte enqueue store +/// data, and performs 64-byte enqueue store to memory pointed by \a __dst +/// This intrinsic may only be used in Privileged mode. +/// +/// \headerfile +/// +/// This intrinsics corresponds to the ENQCMDS instruction. +/// +/// \param __dst +/// Pointer to the destination of the enqueue store. +/// \param __src +/// Pointer to 64-byte command data. +/// \returns If the command data is successfully written to \a __dst then 0 is +/// returned. Otherwise 1 is returned. +static __inline__ int _DEFAULT_FN_ATTRS +_enqcmds (void *__dst, const void *__src) +{ + return __builtin_ia32_enqcmds(__dst, __src); +} + +#undef _DEFAULT_FN_ATTRS + +#endif /* __ENQCMDINTRIN_H */ diff --git a/lib/include/f16cintrin.h b/lib/include/f16cintrin.h index 3d35f28eb3..109b604ada 100644 --- a/lib/include/f16cintrin.h +++ b/lib/include/f16cintrin.h @@ -1,22 +1,8 @@ /*===---- f16cintrin.h - F16C intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -52,9 +38,9 @@ static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { - __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; - __v4sf r = __builtin_ia32_vcvtph2ps(v); - return r[0]; + __v8hi __v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; + __v4sf __r = __builtin_ia32_vcvtph2ps(__v); + return __r[0]; } /// Converts a 32-bit single-precision float value to a 16-bit diff --git a/lib/include/float.h b/lib/include/float.h index 56215cd624..ed610b24aa 100644 --- a/lib/include/float.h +++ b/lib/include/float.h @@ -1,22 +1,8 @@ /*===---- float.h - Characteristics of floating point types ----------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -51,7 +37,7 @@ # undef FLT_MANT_DIG # undef DBL_MANT_DIG # undef LDBL_MANT_DIG -# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) +# if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L # undef DECIMAL_DIG # endif # undef FLT_DIG @@ -78,7 +64,7 @@ # undef FLT_MIN # undef DBL_MIN # undef LDBL_MIN -# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +# if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L # undef FLT_TRUE_MIN # undef DBL_TRUE_MIN # undef LDBL_TRUE_MIN @@ -101,7 +87,7 @@ #define DBL_MANT_DIG __DBL_MANT_DIG__ #define LDBL_MANT_DIG __LDBL_MANT_DIG__ -#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) +#if __STDC_VERSION__ >= 199901L || !defined(__STRICT_ANSI__) || __cplusplus >= 201103L # define DECIMAL_DIG __DECIMAL_DIG__ #endif @@ -137,7 +123,7 @@ #define DBL_MIN __DBL_MIN__ #define LDBL_MIN __LDBL_MIN__ -#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) +#if __STDC_VERSION__ >= 201112L || !defined(__STRICT_ANSI__) || __cplusplus >= 201703L # define FLT_TRUE_MIN __FLT_DENORM_MIN__ # define DBL_TRUE_MIN __DBL_DENORM_MIN__ # define LDBL_TRUE_MIN __LDBL_DENORM_MIN__ diff --git a/lib/include/fma4intrin.h b/lib/include/fma4intrin.h index 7bae2f4a31..694801b3e8 100644 --- a/lib/include/fma4intrin.h +++ b/lib/include/fma4intrin.h @@ -1,22 +1,8 @@ /*===---- fma4intrin.h - FMA4 intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/fmaintrin.h b/lib/include/fmaintrin.h index 094d13afea..d889b7c5e2 100644 --- a/lib/include/fmaintrin.h +++ b/lib/include/fmaintrin.h @@ -1,22 +1,8 @@ /*===---- fmaintrin.h - FMA intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/fxsrintrin.h b/lib/include/fxsrintrin.h index 704b5ad60a..afee6aa976 100644 --- a/lib/include/fxsrintrin.h +++ b/lib/include/fxsrintrin.h @@ -1,22 +1,8 @@ /*===---- fxsrintrin.h - FXSR intrinsic ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/gfniintrin.h b/lib/include/gfniintrin.h index 804d4f3d06..9bff0fcb60 100644 --- a/lib/include/gfniintrin.h +++ b/lib/include/gfniintrin.h @@ -1,23 +1,9 @@ /*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/htmintrin.h b/lib/include/htmintrin.h index 69c8d7bb57..49c2b98607 100644 --- a/lib/include/htmintrin.h +++ b/lib/include/htmintrin.h @@ -1,22 +1,8 @@ /*===---- htmintrin.h - Standard header for PowerPC HTM ---------------===*\ * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ diff --git a/lib/include/htmxlintrin.h b/lib/include/htmxlintrin.h index 049dbd61df..6ef6f4b342 100644 --- a/lib/include/htmxlintrin.h +++ b/lib/include/htmxlintrin.h @@ -1,22 +1,8 @@ /*===---- htmxlintrin.h - XL compiler HTM execution intrinsics-------------===*\ * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ diff --git a/lib/include/ia32intrin.h b/lib/include/ia32intrin.h index f8972e3053..8e38df7318 100644 --- a/lib/include/ia32intrin.h +++ b/lib/include/ia32intrin.h @@ -1,22 +1,8 @@ /* ===-------- ia32intrin.h ---------------------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -28,6 +14,160 @@ #ifndef __IA32INTRIN_H #define __IA32INTRIN_H +/** Find the first set bit starting from the lsb. Result is undefined if + * input is 0. + * + * \headerfile + * + * This intrinsic corresponds to the BSF instruction or the + * TZCNT instruction. + * + * \param __A + * A 32-bit integer operand. + * \returns A 32-bit integer containing the bit number. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__bsfd(int __A) { + return __builtin_ctz(__A); +} + +/** Find the first set bit starting from the msb. Result is undefined if + * input is 0. + * + * \headerfile + * + * This intrinsic corresponds to the BSR instruction or the + * LZCNT instruction and an XOR . + * + * \param __A + * A 32-bit integer operand. + * \returns A 32-bit integer containing the bit number. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__bsrd(int __A) { + return 31 - __builtin_clz(__A); +} + +/** Swaps the bytes in the input. Converting little endian to big endian or + * vice versa. + * + * \headerfile + * + * This intrinsic corresponds to the BSWAP instruction. + * + * \param __A + * A 32-bit integer operand. + * \returns A 32-bit integer containing the swapped bytes. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__bswapd(int __A) { + return __builtin_bswap32(__A); +} + +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +_bswap(int __A) { + return __builtin_bswap32(__A); +} + +#define _bit_scan_forward(A) __bsfd((A)) +#define _bit_scan_reverse(A) __bsrd((A)) + +#ifdef __x86_64__ +/** Find the first set bit starting from the lsb. Result is undefined if + * input is 0. + * + * \headerfile + * + * This intrinsic corresponds to the BSF instruction or the + * TZCNT instruction. + * + * \param __A + * A 64-bit integer operand. + * \returns A 32-bit integer containing the bit number. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__bsfq(long long __A) { + return __builtin_ctzll(__A); +} + +/** Find the first set bit starting from the msb. Result is undefined if + * input is 0. + * + * \headerfile + * + * This intrinsic corresponds to the BSR instruction or the + * LZCNT instruction and an XOR . + * + * \param __A + * A 64-bit integer operand. + * \returns A 32-bit integer containing the bit number. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__bsrq(long long __A) { + return 63 - __builtin_clzll(__A); +} + +/** Swaps the bytes in the input. Converting little endian to big endian or + * vice versa. + * + * \headerfile + * + * This intrinsic corresponds to the BSWAP instruction. + * + * \param __A + * A 64-bit integer operand. + * \returns A 64-bit integer containing the swapped bytes. + */ +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +__bswapq(long long __A) { + return __builtin_bswap64(__A); +} + +#define _bswap64(A) __bswapq((A)) +#endif + +/** Counts the number of bits in the source operand having a value of 1. + * + * \headerfile + * + * This intrinsic corresponds to the POPCNT instruction or a + * a sequence of arithmetic and logic ops to calculate it. + * + * \param __A + * An unsigned 32-bit integer operand. + * \returns A 32-bit integer containing the number of bits with value 1 in the + * source operand. + */ +static __inline__ int __attribute__((__always_inline__, __nodebug__)) +__popcntd(unsigned int __A) +{ + return __builtin_popcount(__A); +} + +#define _popcnt32(A) __popcntd((A)) + +#ifdef __x86_64__ +/** Counts the number of bits in the source operand having a value of 1. + * + * \headerfile + * + * This intrinsic corresponds to the POPCNT instruction or a + * a sequence of arithmetic and logic ops to calculate it. + * + * \param __A + * An unsigned 64-bit integer operand. + * \returns A 64-bit integer containing the number of bits with value 1 in the + * source operand. + */ +static __inline__ long long __attribute__((__always_inline__, __nodebug__)) +__popcntq(unsigned long long __A) +{ + return __builtin_popcountll(__A); +} + +#define _popcnt64(A) __popcntq((A)) +#endif /* __x86_64__ */ + #ifdef __x86_64__ static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __readeflags(void) @@ -55,6 +195,92 @@ __writeeflags(unsigned int __f) } #endif /* !__x86_64__ */ +/** Adds the unsigned integer operand to the CRC-32C checksum of the + * unsigned char operand. + * + * \headerfile + * + * This intrinsic corresponds to the CRC32B instruction. + * + * \param __C + * An unsigned integer operand to add to the CRC-32C checksum of operand + * \a __D. + * \param __D + * An unsigned 8-bit integer operand used to compute the CRC-32C checksum. + * \returns The result of adding operand \a __C to the CRC-32C checksum of + * operand \a __D. + */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +__crc32b(unsigned int __C, unsigned char __D) +{ + return __builtin_ia32_crc32qi(__C, __D); +} + +/** Adds the unsigned integer operand to the CRC-32C checksum of the + * unsigned short operand. + * + * \headerfile + * + * This intrinsic corresponds to the CRC32W instruction. + * + * \param __C + * An unsigned integer operand to add to the CRC-32C checksum of operand + * \a __D. + * \param __D + * An unsigned 16-bit integer operand used to compute the CRC-32C checksum. + * \returns The result of adding operand \a __C to the CRC-32C checksum of + * operand \a __D. + */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +__crc32w(unsigned int __C, unsigned short __D) +{ + return __builtin_ia32_crc32hi(__C, __D); +} + +/** Adds the unsigned integer operand to the CRC-32C checksum of the + * second unsigned integer operand. + * + * \headerfile + * + * This intrinsic corresponds to the CRC32D instruction. + * + * \param __C + * An unsigned integer operand to add to the CRC-32C checksum of operand + * \a __D. + * \param __D + * An unsigned 32-bit integer operand used to compute the CRC-32C checksum. + * \returns The result of adding operand \a __C to the CRC-32C checksum of + * operand \a __D. + */ +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +__crc32d(unsigned int __C, unsigned int __D) +{ + return __builtin_ia32_crc32si(__C, __D); +} + +#ifdef __x86_64__ +/** Adds the unsigned integer operand to the CRC-32C checksum of the + * unsigned 64-bit integer operand. + * + * \headerfile + * + * This intrinsic corresponds to the CRC32Q instruction. + * + * \param __C + * An unsigned integer operand to add to the CRC-32C checksum of operand + * \a __D. + * \param __D + * An unsigned 64-bit integer operand used to compute the CRC-32C checksum. + * \returns The result of adding operand \a __C to the CRC-32C checksum of + * operand \a __D. + */ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__, __target__("sse4.2"))) +__crc32q(unsigned long long __C, unsigned long long __D) +{ + return __builtin_ia32_crc32di(__C, __D); +} +#endif /* __x86_64__ */ + static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) __rdpmc(int __A) { return __builtin_ia32_rdpmc(__A); @@ -75,4 +301,64 @@ _wbinvd(void) { __builtin_ia32_wbinvd(); } +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +__rolb(unsigned char __X, int __C) { + return __builtin_rotateleft8(__X, __C); +} + +static __inline__ unsigned char __attribute__((__always_inline__, __nodebug__)) +__rorb(unsigned char __X, int __C) { + return __builtin_rotateright8(__X, __C); +} + +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__rolw(unsigned short __X, int __C) { + return __builtin_rotateleft16(__X, __C); +} + +static __inline__ unsigned short __attribute__((__always_inline__, __nodebug__)) +__rorw(unsigned short __X, int __C) { + return __builtin_rotateright16(__X, __C); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__rold(unsigned int __X, int __C) { + return __builtin_rotateleft32(__X, __C); +} + +static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__)) +__rord(unsigned int __X, int __C) { + return __builtin_rotateright32(__X, __C); +} + +#ifdef __x86_64__ +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__rolq(unsigned long long __X, int __C) { + return __builtin_rotateleft64(__X, __C); +} + +static __inline__ unsigned long long __attribute__((__always_inline__, __nodebug__)) +__rorq(unsigned long long __X, int __C) { + return __builtin_rotateright64(__X, __C); +} +#endif /* __x86_64__ */ + +#ifndef _MSC_VER +/* These are already provided as builtins for MSVC. */ +/* Select the correct function based on the size of long. */ +#ifdef __LP64__ +#define _lrotl(a,b) __rolq((a), (b)) +#define _lrotr(a,b) __rorq((a), (b)) +#else +#define _lrotl(a,b) __rold((a), (b)) +#define _lrotr(a,b) __rord((a), (b)) +#endif +#define _rotl(a,b) __rold((a), (b)) +#define _rotr(a,b) __rord((a), (b)) +#endif // _MSC_VER + +/* These are not builtins so need to be provided in all modes. */ +#define _rotwl(a,b) __rolw((a), (b)) +#define _rotwr(a,b) __rorw((a), (b)) + #endif /* __IA32INTRIN_H */ diff --git a/lib/include/immintrin.h b/lib/include/immintrin.h index 7d0722ec76..7555ad82fa 100644 --- a/lib/include/immintrin.h +++ b/lib/include/immintrin.h @@ -1,22 +1,8 @@ /*===---- immintrin.h - Intel intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -195,6 +181,15 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BF16__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BF16__)) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__PKU__) #include #endif @@ -241,18 +236,6 @@ _rdrand64_step(unsigned long long *__p) #endif #endif /* __RDRND__ */ -/* __bit_scan_forward */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) -_bit_scan_forward(int __A) { - return __builtin_ctz(__A); -} - -/* __bit_scan_reverse */ -static __inline__ int __attribute__((__always_inline__, __nodebug__)) -_bit_scan_reverse(int __A) { - return 31 - __builtin_clz(__A); -} - #if !defined(_MSC_VER) || __has_feature(modules) || defined(__FSGSBASE__) #ifdef __x86_64__ static __inline__ unsigned int __attribute__((__always_inline__, __nodebug__, __target__("fsgsbase"))) @@ -378,9 +361,8 @@ _storebe_i64(void * __P, long long __D) { #include #endif -#if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVE__) +/* No feature check desired due to internal MSC_VER checks */ #include -#endif #if !defined(_MSC_VER) || __has_feature(modules) || defined(__XSAVEOPT__) #include @@ -439,7 +421,21 @@ _storebe_i64(void * __P, long long __D) { #include #endif -#ifdef _MSC_VER +#if !defined(_MSC_VER) || __has_feature(modules) || \ + defined(__AVX512VP2INTERSECT__) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512VP2INTERSECT__)) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__ENQCMD__) +#include +#endif + +#if defined(_MSC_VER) && __has_extension(gnu_asm) /* Define the default attributes for these intrinsics */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__)) #ifdef __cplusplus @@ -521,6 +517,6 @@ _InterlockedCompareExchange64_HLERelease(__int64 volatile *_Destination, #undef __DEFAULT_FN_ATTRS -#endif /* _MSC_VER */ +#endif /* defined(_MSC_VER) && __has_extension(gnu_asm) */ #endif /* __IMMINTRIN_H */ diff --git a/lib/include/intrin.h b/lib/include/intrin.h index 966258bab4..9786ba147f 100644 --- a/lib/include/intrin.h +++ b/lib/include/intrin.h @@ -1,22 +1,8 @@ /* ===-------- intrin.h ---------------------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -200,10 +186,6 @@ __attribute__((__deprecated__("use other intrinsics or C++11 atomics instead"))) _WriteBarrier(void); unsigned __int32 xbegin(void); void _xend(void); -static __inline__ -#define _XCR_XFEATURE_ENABLED_MASK 0 -unsigned __int64 __cdecl _xgetbv(unsigned int); -void __cdecl _xsetbv(unsigned int, unsigned __int64); /* These additional intrinsics are turned on in x64/amd64/x86_64 mode. */ #ifdef __x86_64__ @@ -539,12 +521,6 @@ __cpuidex(int __info[4], int __level, int __ecx) { __asm__ ("cpuid" : "=a"(__info[0]), "=b" (__info[1]), "=c"(__info[2]), "=d"(__info[3]) : "a"(__level), "c"(__ecx)); } -static __inline__ unsigned __int64 __cdecl __DEFAULT_FN_ATTRS -_xgetbv(unsigned int __xcr_no) { - unsigned int __eax, __edx; - __asm__ ("xgetbv" : "=a" (__eax), "=d" (__edx) : "c" (__xcr_no)); - return ((unsigned __int64)__edx << 32) | __eax; -} static __inline__ void __DEFAULT_FN_ATTRS __halt(void) { __asm__ volatile ("hlt"); @@ -567,15 +543,9 @@ long _InterlockedAdd(long volatile *Addend, long Value); __int64 _ReadStatusReg(int); void _WriteStatusReg(int, __int64); -static inline unsigned short _byteswap_ushort (unsigned short val) { - return __builtin_bswap16(val); -} -static inline unsigned long _byteswap_ulong (unsigned long val) { - return __builtin_bswap32(val); -} -static inline unsigned __int64 _byteswap_uint64 (unsigned __int64 val) { - return __builtin_bswap64(val); -} +unsigned short __cdecl _byteswap_ushort(unsigned short val); +unsigned long __cdecl _byteswap_ulong (unsigned long val); +unsigned __int64 __cdecl _byteswap_uint64(unsigned __int64 val); #endif /*----------------------------------------------------------------------------*\ diff --git a/lib/include/inttypes.h b/lib/include/inttypes.h index 1d8eabab0f..1c894c4aca 100644 --- a/lib/include/inttypes.h +++ b/lib/include/inttypes.h @@ -1,27 +1,18 @@ /*===---- inttypes.h - Standard header for integer printf macros ----------===*\ * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_INTTYPES_H +// AIX system headers need inttypes.h to be re-enterable while _STD_TYPES_T +// is defined until an inclusion of it without _STD_TYPES_T occurs, in which +// case the header guard macro is defined. +#if !defined(_AIX) || !defined(_STD_TYPES_T) #define __CLANG_INTTYPES_H +#endif #if defined(_MSC_VER) && _MSC_VER < 1800 #error MSVC does not have inttypes.h prior to Visual Studio 2013 diff --git a/lib/include/invpcidintrin.h b/lib/include/invpcidintrin.h index c30a19fa3d..48dae0a86f 100644 --- a/lib/include/invpcidintrin.h +++ b/lib/include/invpcidintrin.h @@ -1,22 +1,8 @@ /*===------------- invpcidintrin.h - INVPCID intrinsic ---------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/iso646.h b/lib/include/iso646.h index dca13c5bab..e0a20c6f18 100644 --- a/lib/include/iso646.h +++ b/lib/include/iso646.h @@ -1,24 +1,8 @@ /*===---- iso646.h - Standard header for alternate spellings of operators---=== * - * Copyright (c) 2008 Eli Friedman - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/limits.h b/lib/include/limits.h index f04187ced2..c653580bac 100644 --- a/lib/include/limits.h +++ b/lib/include/limits.h @@ -1,24 +1,8 @@ /*===---- limits.h - Standard header for integer sizes --------------------===*\ * - * Copyright (c) 2009 Chris Lattner - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ diff --git a/lib/include/lwpintrin.h b/lib/include/lwpintrin.h index 0b28d73582..d8ab0db037 100644 --- a/lib/include/lwpintrin.h +++ b/lib/include/lwpintrin.h @@ -1,22 +1,8 @@ /*===---- lwpintrin.h - LWP intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/lzcntintrin.h b/lib/include/lzcntintrin.h index 35c1651cc4..f4ddce9d0e 100644 --- a/lib/include/lzcntintrin.h +++ b/lib/include/lzcntintrin.h @@ -1,22 +1,8 @@ /*===---- lzcntintrin.h - LZCNT intrinsics ---------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/mm3dnow.h b/lib/include/mm3dnow.h index b0288757a3..22ab13aa33 100644 --- a/lib/include/mm3dnow.h +++ b/lib/include/mm3dnow.h @@ -1,22 +1,8 @@ /*===---- mm3dnow.h - 3DNow! intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/mm_malloc.h b/lib/include/mm_malloc.h index 305afd31ad..0ea32517ae 100644 --- a/lib/include/mm_malloc.h +++ b/lib/include/mm_malloc.h @@ -1,22 +1,8 @@ /*===---- mm_malloc.h - Allocating and Freeing Aligned Memory Blocks -------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/mmintrin.h b/lib/include/mmintrin.h index a73539942a..79a8b55016 100644 --- a/lib/include/mmintrin.h +++ b/lib/include/mmintrin.h @@ -1,22 +1,8 @@ /*===---- mmintrin.h - MMX intrinsics --------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -24,7 +10,7 @@ #ifndef __MMINTRIN_H #define __MMINTRIN_H -typedef long long __m64 __attribute__((__vector_size__(8))); +typedef long long __m64 __attribute__((__vector_size__(8), __aligned__(8))); typedef long long __v1di __attribute__((__vector_size__(8))); typedef int __v2si __attribute__((__vector_size__(8))); diff --git a/lib/include/module.modulemap b/lib/include/module.modulemap index 1d1af57fd0..7954a77a41 100644 --- a/lib/include/module.modulemap +++ b/lib/include/module.modulemap @@ -1,22 +1,8 @@ /*===---- module.modulemap - intrinsics module map -------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -168,4 +154,5 @@ module _Builtin_stddef_max_align_t [system] [extern_c] { module opencl_c { requires opencl header "opencl-c.h" + header "opencl-c-base.h" } diff --git a/lib/include/movdirintrin.h b/lib/include/movdirintrin.h index ec20c53709..30c4d02c83 100644 --- a/lib/include/movdirintrin.h +++ b/lib/include/movdirintrin.h @@ -1,22 +1,8 @@ /*===------------------------- movdirintrin.h ------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/msa.h b/lib/include/msa.h index da680f5ca9..19ea6071aa 100644 --- a/lib/include/msa.h +++ b/lib/include/msa.h @@ -1,22 +1,8 @@ /*===---- msa.h - MIPS MSA intrinsics --------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/mwaitxintrin.h b/lib/include/mwaitxintrin.h index 2921eadfa5..bca395b0e0 100644 --- a/lib/include/mwaitxintrin.h +++ b/lib/include/mwaitxintrin.h @@ -1,22 +1,8 @@ /*===---- mwaitxintrin.h - MONITORX/MWAITX intrinsics ----------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/nmmintrin.h b/lib/include/nmmintrin.h index 348fb8c7c1..672aea4966 100644 --- a/lib/include/nmmintrin.h +++ b/lib/include/nmmintrin.h @@ -1,22 +1,8 @@ /*===---- nmmintrin.h - SSE4 intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/opencl-c-base.h b/lib/include/opencl-c-base.h new file mode 100644 index 0000000000..a82954ddd3 --- /dev/null +++ b/lib/include/opencl-c-base.h @@ -0,0 +1,578 @@ +//===----- opencl-c-base.h - OpenCL C language base definitions -----------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _OPENCL_BASE_H_ +#define _OPENCL_BASE_H_ + +// built-in scalar data types: + +/** + * An unsigned 8-bit integer. + */ +typedef unsigned char uchar; + +/** + * An unsigned 16-bit integer. + */ +typedef unsigned short ushort; + +/** + * An unsigned 32-bit integer. + */ +typedef unsigned int uint; + +/** + * An unsigned 64-bit integer. + */ +typedef unsigned long ulong; + +/** + * The unsigned integer type of the result of the sizeof operator. This + * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS + * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if + * CL_DEVICE_ADDRESS_BITS is 64-bits. + */ +typedef __SIZE_TYPE__ size_t; + +/** + * A signed integer type that is the result of subtracting two pointers. + * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS + * defined in table 4.3 is 32-bits and is a 64-bit signed integer if + * CL_DEVICE_ADDRESS_BITS is 64-bits. + */ +typedef __PTRDIFF_TYPE__ ptrdiff_t; + +/** + * A signed integer type with the property that any valid pointer to + * void can be converted to this type, then converted back to pointer + * to void, and the result will compare equal to the original pointer. + */ +typedef __INTPTR_TYPE__ intptr_t; + +/** + * An unsigned integer type with the property that any valid pointer to + * void can be converted to this type, then converted back to pointer + * to void, and the result will compare equal to the original pointer. + */ +typedef __UINTPTR_TYPE__ uintptr_t; + +// built-in vector data types: +typedef char char2 __attribute__((ext_vector_type(2))); +typedef char char3 __attribute__((ext_vector_type(3))); +typedef char char4 __attribute__((ext_vector_type(4))); +typedef char char8 __attribute__((ext_vector_type(8))); +typedef char char16 __attribute__((ext_vector_type(16))); +typedef uchar uchar2 __attribute__((ext_vector_type(2))); +typedef uchar uchar3 __attribute__((ext_vector_type(3))); +typedef uchar uchar4 __attribute__((ext_vector_type(4))); +typedef uchar uchar8 __attribute__((ext_vector_type(8))); +typedef uchar uchar16 __attribute__((ext_vector_type(16))); +typedef short short2 __attribute__((ext_vector_type(2))); +typedef short short3 __attribute__((ext_vector_type(3))); +typedef short short4 __attribute__((ext_vector_type(4))); +typedef short short8 __attribute__((ext_vector_type(8))); +typedef short short16 __attribute__((ext_vector_type(16))); +typedef ushort ushort2 __attribute__((ext_vector_type(2))); +typedef ushort ushort3 __attribute__((ext_vector_type(3))); +typedef ushort ushort4 __attribute__((ext_vector_type(4))); +typedef ushort ushort8 __attribute__((ext_vector_type(8))); +typedef ushort ushort16 __attribute__((ext_vector_type(16))); +typedef int int2 __attribute__((ext_vector_type(2))); +typedef int int3 __attribute__((ext_vector_type(3))); +typedef int int4 __attribute__((ext_vector_type(4))); +typedef int int8 __attribute__((ext_vector_type(8))); +typedef int int16 __attribute__((ext_vector_type(16))); +typedef uint uint2 __attribute__((ext_vector_type(2))); +typedef uint uint3 __attribute__((ext_vector_type(3))); +typedef uint uint4 __attribute__((ext_vector_type(4))); +typedef uint uint8 __attribute__((ext_vector_type(8))); +typedef uint uint16 __attribute__((ext_vector_type(16))); +typedef long long2 __attribute__((ext_vector_type(2))); +typedef long long3 __attribute__((ext_vector_type(3))); +typedef long long4 __attribute__((ext_vector_type(4))); +typedef long long8 __attribute__((ext_vector_type(8))); +typedef long long16 __attribute__((ext_vector_type(16))); +typedef ulong ulong2 __attribute__((ext_vector_type(2))); +typedef ulong ulong3 __attribute__((ext_vector_type(3))); +typedef ulong ulong4 __attribute__((ext_vector_type(4))); +typedef ulong ulong8 __attribute__((ext_vector_type(8))); +typedef ulong ulong16 __attribute__((ext_vector_type(16))); +typedef float float2 __attribute__((ext_vector_type(2))); +typedef float float3 __attribute__((ext_vector_type(3))); +typedef float float4 __attribute__((ext_vector_type(4))); +typedef float float8 __attribute__((ext_vector_type(8))); +typedef float float16 __attribute__((ext_vector_type(16))); +#ifdef cl_khr_fp16 +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +typedef half half2 __attribute__((ext_vector_type(2))); +typedef half half3 __attribute__((ext_vector_type(3))); +typedef half half4 __attribute__((ext_vector_type(4))); +typedef half half8 __attribute__((ext_vector_type(8))); +typedef half half16 __attribute__((ext_vector_type(16))); +#endif +#ifdef cl_khr_fp64 +#if __OPENCL_C_VERSION__ < CL_VERSION_1_2 +#pragma OPENCL EXTENSION cl_khr_fp64 : enable +#endif +typedef double double2 __attribute__((ext_vector_type(2))); +typedef double double3 __attribute__((ext_vector_type(3))); +typedef double double4 __attribute__((ext_vector_type(4))); +typedef double double8 __attribute__((ext_vector_type(8))); +typedef double double16 __attribute__((ext_vector_type(16))); +#endif + +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#define NULL ((void*)0) +#endif + +/** + * Value of maximum non-infinite single-precision floating-point + * number. + */ +#define MAXFLOAT 0x1.fffffep127f + +/** + * A positive float constant expression. HUGE_VALF evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VALF (__builtin_huge_valf()) + +/** + * A positive double constant expression. HUGE_VAL evaluates + * to +infinity. Used as an error value returned by the built-in + * math functions. + */ +#define HUGE_VAL (__builtin_huge_val()) + +/** + * A constant expression of type float representing positive or + * unsigned infinity. + */ +#define INFINITY (__builtin_inff()) + +/** + * A constant expression of type float representing a quiet NaN. + */ +#define NAN as_float(INT_MAX) + +#define FP_ILOGB0 INT_MIN +#define FP_ILOGBNAN INT_MAX + +#define FLT_DIG 6 +#define FLT_MANT_DIG 24 +#define FLT_MAX_10_EXP +38 +#define FLT_MAX_EXP +128 +#define FLT_MIN_10_EXP -37 +#define FLT_MIN_EXP -125 +#define FLT_RADIX 2 +#define FLT_MAX 0x1.fffffep127f +#define FLT_MIN 0x1.0p-126f +#define FLT_EPSILON 0x1.0p-23f + +#define M_E_F 2.71828182845904523536028747135266250f +#define M_LOG2E_F 1.44269504088896340735992468100189214f +#define M_LOG10E_F 0.434294481903251827651128918916605082f +#define M_LN2_F 0.693147180559945309417232121458176568f +#define M_LN10_F 2.30258509299404568401799145468436421f +#define M_PI_F 3.14159265358979323846264338327950288f +#define M_PI_2_F 1.57079632679489661923132169163975144f +#define M_PI_4_F 0.785398163397448309615660845819875721f +#define M_1_PI_F 0.318309886183790671537767526745028724f +#define M_2_PI_F 0.636619772367581343075535053490057448f +#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f +#define M_SQRT2_F 1.41421356237309504880168872420969808f +#define M_SQRT1_2_F 0.707106781186547524400844362104849039f + +#define DBL_DIG 15 +#define DBL_MANT_DIG 53 +#define DBL_MAX_10_EXP +308 +#define DBL_MAX_EXP +1024 +#define DBL_MIN_10_EXP -307 +#define DBL_MIN_EXP -1021 +#define DBL_RADIX 2 +#define DBL_MAX 0x1.fffffffffffffp1023 +#define DBL_MIN 0x1.0p-1022 +#define DBL_EPSILON 0x1.0p-52 + +#define M_E 0x1.5bf0a8b145769p+1 +#define M_LOG2E 0x1.71547652b82fep+0 +#define M_LOG10E 0x1.bcb7b1526e50ep-2 +#define M_LN2 0x1.62e42fefa39efp-1 +#define M_LN10 0x1.26bb1bbb55516p+1 +#define M_PI 0x1.921fb54442d18p+1 +#define M_PI_2 0x1.921fb54442d18p+0 +#define M_PI_4 0x1.921fb54442d18p-1 +#define M_1_PI 0x1.45f306dc9c883p-2 +#define M_2_PI 0x1.45f306dc9c883p-1 +#define M_2_SQRTPI 0x1.20dd750429b6dp+0 +#define M_SQRT2 0x1.6a09e667f3bcdp+0 +#define M_SQRT1_2 0x1.6a09e667f3bcdp-1 + +#ifdef cl_khr_fp16 + +#define HALF_DIG 3 +#define HALF_MANT_DIG 11 +#define HALF_MAX_10_EXP +4 +#define HALF_MAX_EXP +16 +#define HALF_MIN_10_EXP -4 +#define HALF_MIN_EXP -13 +#define HALF_RADIX 2 +#define HALF_MAX ((0x1.ffcp15h)) +#define HALF_MIN ((0x1.0p-14h)) +#define HALF_EPSILON ((0x1.0p-10h)) + +#define M_E_H 2.71828182845904523536028747135266250h +#define M_LOG2E_H 1.44269504088896340735992468100189214h +#define M_LOG10E_H 0.434294481903251827651128918916605082h +#define M_LN2_H 0.693147180559945309417232121458176568h +#define M_LN10_H 2.30258509299404568401799145468436421h +#define M_PI_H 3.14159265358979323846264338327950288h +#define M_PI_2_H 1.57079632679489661923132169163975144h +#define M_PI_4_H 0.785398163397448309615660845819875721h +#define M_1_PI_H 0.318309886183790671537767526745028724h +#define M_2_PI_H 0.636619772367581343075535053490057448h +#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h +#define M_SQRT2_H 1.41421356237309504880168872420969808h +#define M_SQRT1_2_H 0.707106781186547524400844362104849039h + +#endif //cl_khr_fp16 + +#define CHAR_BIT 8 +#define SCHAR_MAX 127 +#define SCHAR_MIN (-128) +#define UCHAR_MAX 255 +#define CHAR_MAX SCHAR_MAX +#define CHAR_MIN SCHAR_MIN +#define USHRT_MAX 65535 +#define SHRT_MAX 32767 +#define SHRT_MIN (-32768) +#define UINT_MAX 0xffffffff +#define INT_MAX 2147483647 +#define INT_MIN (-2147483647-1) +#define ULONG_MAX 0xffffffffffffffffUL +#define LONG_MAX 0x7fffffffffffffffL +#define LONG_MIN (-0x7fffffffffffffffL-1) + +// OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions + +// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence +typedef uint cl_mem_fence_flags; + +/** + * Queue a memory fence to ensure correct + * ordering of memory operations to local memory + */ +#define CLK_LOCAL_MEM_FENCE 0x01 + +/** + * Queue a memory fence to ensure correct + * ordering of memory operations to global memory + */ +#define CLK_GLOBAL_MEM_FENCE 0x02 + +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +typedef enum memory_scope { + memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, + memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, + memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, + memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, +#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) + memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP +#endif +} memory_scope; + +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +/** + * Queue a memory fence to ensure correct ordering of memory + * operations between work-items of a work-group to + * image memory. + */ +#define CLK_IMAGE_MEM_FENCE 0x04 + +#ifndef ATOMIC_VAR_INIT +#define ATOMIC_VAR_INIT(x) (x) +#endif //ATOMIC_VAR_INIT +#define ATOMIC_FLAG_INIT 0 + +// enum values aligned with what clang uses in EmitAtomicExpr() +typedef enum memory_order +{ + memory_order_relaxed = __ATOMIC_RELAXED, + memory_order_acquire = __ATOMIC_ACQUIRE, + memory_order_release = __ATOMIC_RELEASE, + memory_order_acq_rel = __ATOMIC_ACQ_REL, + memory_order_seq_cst = __ATOMIC_SEQ_CST +} memory_order; + +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +// OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions + +// These values need to match the runtime equivalent +// +// Addressing Mode. +// +#define CLK_ADDRESS_NONE 0 +#define CLK_ADDRESS_CLAMP_TO_EDGE 2 +#define CLK_ADDRESS_CLAMP 4 +#define CLK_ADDRESS_REPEAT 6 +#define CLK_ADDRESS_MIRRORED_REPEAT 8 + +// +// Coordination Normalization +// +#define CLK_NORMALIZED_COORDS_FALSE 0 +#define CLK_NORMALIZED_COORDS_TRUE 1 + +// +// Filtering Mode. +// +#define CLK_FILTER_NEAREST 0x10 +#define CLK_FILTER_LINEAR 0x20 + +#ifdef cl_khr_gl_msaa_sharing +#pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable +#endif //cl_khr_gl_msaa_sharing + +// +// Channel Datatype. +// +#define CLK_SNORM_INT8 0x10D0 +#define CLK_SNORM_INT16 0x10D1 +#define CLK_UNORM_INT8 0x10D2 +#define CLK_UNORM_INT16 0x10D3 +#define CLK_UNORM_SHORT_565 0x10D4 +#define CLK_UNORM_SHORT_555 0x10D5 +#define CLK_UNORM_INT_101010 0x10D6 +#define CLK_SIGNED_INT8 0x10D7 +#define CLK_SIGNED_INT16 0x10D8 +#define CLK_SIGNED_INT32 0x10D9 +#define CLK_UNSIGNED_INT8 0x10DA +#define CLK_UNSIGNED_INT16 0x10DB +#define CLK_UNSIGNED_INT32 0x10DC +#define CLK_HALF_FLOAT 0x10DD +#define CLK_FLOAT 0x10DE +#define CLK_UNORM_INT24 0x10DF + +// Channel order, numbering must be aligned with cl_channel_order in cl.h +// +#define CLK_R 0x10B0 +#define CLK_A 0x10B1 +#define CLK_RG 0x10B2 +#define CLK_RA 0x10B3 +#define CLK_RGB 0x10B4 +#define CLK_RGBA 0x10B5 +#define CLK_BGRA 0x10B6 +#define CLK_ARGB 0x10B7 +#define CLK_INTENSITY 0x10B8 +#define CLK_LUMINANCE 0x10B9 +#define CLK_Rx 0x10BA +#define CLK_RGx 0x10BB +#define CLK_RGBx 0x10BC +#define CLK_DEPTH 0x10BD +#define CLK_DEPTH_STENCIL 0x10BE +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#define CLK_sRGB 0x10BF +#define CLK_sRGBx 0x10C0 +#define CLK_sRGBA 0x10C1 +#define CLK_sBGRA 0x10C2 +#define CLK_ABGR 0x10C3 +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +// OpenCL v2.0 s6.13.16 - Pipe Functions +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 +#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + + +// OpenCL v2.0 s6.13.17 - Enqueue Kernels +#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +#define CL_COMPLETE 0x0 +#define CL_RUNNING 0x1 +#define CL_SUBMITTED 0x2 +#define CL_QUEUED 0x3 + +#define CLK_SUCCESS 0 +#define CLK_ENQUEUE_FAILURE -101 +#define CLK_INVALID_QUEUE -102 +#define CLK_INVALID_NDRANGE -160 +#define CLK_INVALID_EVENT_WAIT_LIST -57 +#define CLK_DEVICE_QUEUE_FULL -161 +#define CLK_INVALID_ARG_SIZE -51 +#define CLK_EVENT_ALLOCATION_FAILURE -100 +#define CLK_OUT_OF_RESOURCES -5 + +#define CLK_NULL_QUEUE 0 +#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t)) + +// execution model related definitions +#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 +#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1 +#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2 + +typedef int kernel_enqueue_flags_t; +typedef int clk_profiling_info; + +// Profiling info name (see capture_event_profiling_info) +#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 + +#define MAX_WORK_DIM 3 + +typedef struct { + unsigned int workDimension; + size_t globalWorkOffset[MAX_WORK_DIM]; + size_t globalWorkSize[MAX_WORK_DIM]; + size_t localWorkSize[MAX_WORK_DIM]; +} ndrange_t; + +#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 + +#ifdef cl_intel_device_side_avc_motion_estimation +#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin + +#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0 +#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1 +#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2 +#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3 + +#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0 +#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1 +#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2 +#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3 + +#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0 +#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 +#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 + +#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 +#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E +#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D +#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B +#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 +#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F +#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F +#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F + +#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 +#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 +#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 + +#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 +#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 +#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 +#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 +#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 +#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 +#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 +#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 +#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 + +#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 +#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 + +#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 +#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 +#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 + +#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 +#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 +#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 +#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 + +#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 +#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 +#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 +#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B +#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 + +#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 +#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 +#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 +#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 + +#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0 +#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1 +#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2 + +#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 +#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 + +#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30) +#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30) + +#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 +#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 + +#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0 +#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 +#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 +#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 + +#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 +#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 +#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 +#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 + +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 +#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 +#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 +#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 +#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 +#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 + +#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1 +#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2 +#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3 + +#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 +#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 + +#define CLK_AVC_ME_INITIALIZE_INTEL 0x0 + +#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0 +#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0 +#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0 + +#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0 +#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0 +#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0 + +#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 +#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 +#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 +#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 + +#pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : end +#endif // cl_intel_device_side_avc_motion_estimation + +#endif //_OPENCL_BASE_H_ diff --git a/lib/include/opencl-c.h b/lib/include/opencl-c.h index 3d3dfb7490..4207c53cce 100644 --- a/lib/include/opencl-c.h +++ b/lib/include/opencl-c.h @@ -1,15 +1,16 @@ //===--- opencl-c.h - OpenCL C language builtin function header -----------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef _OPENCL_H_ #define _OPENCL_H_ +#include "opencl-c-base.h" + #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 #ifndef cl_khr_depth_images #define cl_khr_depth_images @@ -23,9 +24,6 @@ #endif //__OPENCL_C_VERSION__ < CL_VERSION_2_0 #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 -#ifndef cl_intel_planar_yuv -#define cl_intel_planar_yuv -#endif // cl_intel_planar_yuv #pragma OPENCL EXTENSION cl_intel_planar_yuv : begin #pragma OPENCL EXTENSION cl_intel_planar_yuv : end #endif // __OPENCL_C_VERSION__ >= CL_VERSION_1_2 @@ -37,255 +35,6 @@ #define __purefn __attribute__((pure)) #define __cnfn __attribute__((const)) -// built-in scalar data types: - -/** - * An unsigned 8-bit integer. - */ -typedef unsigned char uchar; - -/** - * An unsigned 16-bit integer. - */ -typedef unsigned short ushort; - -/** - * An unsigned 32-bit integer. - */ -typedef unsigned int uint; - -/** - * An unsigned 64-bit integer. - */ -typedef unsigned long ulong; - -/** - * The unsigned integer type of the result of the sizeof operator. This - * is a 32-bit unsigned integer if CL_DEVICE_ADDRESS_BITS - * defined in table 4.3 is 32-bits and is a 64-bit unsigned integer if - * CL_DEVICE_ADDRESS_BITS is 64-bits. - */ -typedef __SIZE_TYPE__ size_t; - -/** - * A signed integer type that is the result of subtracting two pointers. - * This is a 32-bit signed integer if CL_DEVICE_ADDRESS_BITS - * defined in table 4.3 is 32-bits and is a 64-bit signed integer if - * CL_DEVICE_ADDRESS_BITS is 64-bits. - */ -typedef __PTRDIFF_TYPE__ ptrdiff_t; - -/** -* A signed integer type with the property that any valid pointer to -* void can be converted to this type, then converted back to pointer -* to void, and the result will compare equal to the original pointer. -*/ -typedef __INTPTR_TYPE__ intptr_t; - -/** -* An unsigned integer type with the property that any valid pointer to -* void can be converted to this type, then converted back to pointer -* to void, and the result will compare equal to the original pointer. -*/ -typedef __UINTPTR_TYPE__ uintptr_t; - -// built-in vector data types: -typedef char char2 __attribute__((ext_vector_type(2))); -typedef char char3 __attribute__((ext_vector_type(3))); -typedef char char4 __attribute__((ext_vector_type(4))); -typedef char char8 __attribute__((ext_vector_type(8))); -typedef char char16 __attribute__((ext_vector_type(16))); -typedef uchar uchar2 __attribute__((ext_vector_type(2))); -typedef uchar uchar3 __attribute__((ext_vector_type(3))); -typedef uchar uchar4 __attribute__((ext_vector_type(4))); -typedef uchar uchar8 __attribute__((ext_vector_type(8))); -typedef uchar uchar16 __attribute__((ext_vector_type(16))); -typedef short short2 __attribute__((ext_vector_type(2))); -typedef short short3 __attribute__((ext_vector_type(3))); -typedef short short4 __attribute__((ext_vector_type(4))); -typedef short short8 __attribute__((ext_vector_type(8))); -typedef short short16 __attribute__((ext_vector_type(16))); -typedef ushort ushort2 __attribute__((ext_vector_type(2))); -typedef ushort ushort3 __attribute__((ext_vector_type(3))); -typedef ushort ushort4 __attribute__((ext_vector_type(4))); -typedef ushort ushort8 __attribute__((ext_vector_type(8))); -typedef ushort ushort16 __attribute__((ext_vector_type(16))); -typedef int int2 __attribute__((ext_vector_type(2))); -typedef int int3 __attribute__((ext_vector_type(3))); -typedef int int4 __attribute__((ext_vector_type(4))); -typedef int int8 __attribute__((ext_vector_type(8))); -typedef int int16 __attribute__((ext_vector_type(16))); -typedef uint uint2 __attribute__((ext_vector_type(2))); -typedef uint uint3 __attribute__((ext_vector_type(3))); -typedef uint uint4 __attribute__((ext_vector_type(4))); -typedef uint uint8 __attribute__((ext_vector_type(8))); -typedef uint uint16 __attribute__((ext_vector_type(16))); -typedef long long2 __attribute__((ext_vector_type(2))); -typedef long long3 __attribute__((ext_vector_type(3))); -typedef long long4 __attribute__((ext_vector_type(4))); -typedef long long8 __attribute__((ext_vector_type(8))); -typedef long long16 __attribute__((ext_vector_type(16))); -typedef ulong ulong2 __attribute__((ext_vector_type(2))); -typedef ulong ulong3 __attribute__((ext_vector_type(3))); -typedef ulong ulong4 __attribute__((ext_vector_type(4))); -typedef ulong ulong8 __attribute__((ext_vector_type(8))); -typedef ulong ulong16 __attribute__((ext_vector_type(16))); -typedef float float2 __attribute__((ext_vector_type(2))); -typedef float float3 __attribute__((ext_vector_type(3))); -typedef float float4 __attribute__((ext_vector_type(4))); -typedef float float8 __attribute__((ext_vector_type(8))); -typedef float float16 __attribute__((ext_vector_type(16))); -#ifdef cl_khr_fp16 -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -typedef half half2 __attribute__((ext_vector_type(2))); -typedef half half3 __attribute__((ext_vector_type(3))); -typedef half half4 __attribute__((ext_vector_type(4))); -typedef half half8 __attribute__((ext_vector_type(8))); -typedef half half16 __attribute__((ext_vector_type(16))); -#endif -#ifdef cl_khr_fp64 -#if __OPENCL_C_VERSION__ < CL_VERSION_1_2 -#pragma OPENCL EXTENSION cl_khr_fp64 : enable -#endif -typedef double double2 __attribute__((ext_vector_type(2))); -typedef double double3 __attribute__((ext_vector_type(3))); -typedef double double4 __attribute__((ext_vector_type(4))); -typedef double double8 __attribute__((ext_vector_type(8))); -typedef double double16 __attribute__((ext_vector_type(16))); -#endif - -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -#define NULL ((void*)0) -#endif - -/** - * Value of maximum non-infinite single-precision floating-point - * number. - */ -#define MAXFLOAT 0x1.fffffep127f - -/** - * A positive float constant expression. HUGE_VALF evaluates - * to +infinity. Used as an error value returned by the built-in - * math functions. - */ -#define HUGE_VALF (__builtin_huge_valf()) - -/** - * A positive double constant expression. HUGE_VAL evaluates - * to +infinity. Used as an error value returned by the built-in - * math functions. - */ -#define HUGE_VAL (__builtin_huge_val()) - -/** - * A constant expression of type float representing positive or - * unsigned infinity. - */ -#define INFINITY (__builtin_inff()) - -/** - * A constant expression of type float representing a quiet NaN. - */ -#define NAN as_float(INT_MAX) - -#define FP_ILOGB0 INT_MIN -#define FP_ILOGBNAN INT_MAX - -#define FLT_DIG 6 -#define FLT_MANT_DIG 24 -#define FLT_MAX_10_EXP +38 -#define FLT_MAX_EXP +128 -#define FLT_MIN_10_EXP -37 -#define FLT_MIN_EXP -125 -#define FLT_RADIX 2 -#define FLT_MAX 0x1.fffffep127f -#define FLT_MIN 0x1.0p-126f -#define FLT_EPSILON 0x1.0p-23f - -#define M_E_F 2.71828182845904523536028747135266250f -#define M_LOG2E_F 1.44269504088896340735992468100189214f -#define M_LOG10E_F 0.434294481903251827651128918916605082f -#define M_LN2_F 0.693147180559945309417232121458176568f -#define M_LN10_F 2.30258509299404568401799145468436421f -#define M_PI_F 3.14159265358979323846264338327950288f -#define M_PI_2_F 1.57079632679489661923132169163975144f -#define M_PI_4_F 0.785398163397448309615660845819875721f -#define M_1_PI_F 0.318309886183790671537767526745028724f -#define M_2_PI_F 0.636619772367581343075535053490057448f -#define M_2_SQRTPI_F 1.12837916709551257389615890312154517f -#define M_SQRT2_F 1.41421356237309504880168872420969808f -#define M_SQRT1_2_F 0.707106781186547524400844362104849039f - -#define DBL_DIG 15 -#define DBL_MANT_DIG 53 -#define DBL_MAX_10_EXP +308 -#define DBL_MAX_EXP +1024 -#define DBL_MIN_10_EXP -307 -#define DBL_MIN_EXP -1021 -#define DBL_RADIX 2 -#define DBL_MAX 0x1.fffffffffffffp1023 -#define DBL_MIN 0x1.0p-1022 -#define DBL_EPSILON 0x1.0p-52 - -#define M_E 0x1.5bf0a8b145769p+1 -#define M_LOG2E 0x1.71547652b82fep+0 -#define M_LOG10E 0x1.bcb7b1526e50ep-2 -#define M_LN2 0x1.62e42fefa39efp-1 -#define M_LN10 0x1.26bb1bbb55516p+1 -#define M_PI 0x1.921fb54442d18p+1 -#define M_PI_2 0x1.921fb54442d18p+0 -#define M_PI_4 0x1.921fb54442d18p-1 -#define M_1_PI 0x1.45f306dc9c883p-2 -#define M_2_PI 0x1.45f306dc9c883p-1 -#define M_2_SQRTPI 0x1.20dd750429b6dp+0 -#define M_SQRT2 0x1.6a09e667f3bcdp+0 -#define M_SQRT1_2 0x1.6a09e667f3bcdp-1 - -#ifdef cl_khr_fp16 - -#define HALF_DIG 3 -#define HALF_MANT_DIG 11 -#define HALF_MAX_10_EXP +4 -#define HALF_MAX_EXP +16 -#define HALF_MIN_10_EXP -4 -#define HALF_MIN_EXP -13 -#define HALF_RADIX 2 -#define HALF_MAX ((0x1.ffcp15h)) -#define HALF_MIN ((0x1.0p-14h)) -#define HALF_EPSILON ((0x1.0p-10h)) - -#define M_E_H 2.71828182845904523536028747135266250h -#define M_LOG2E_H 1.44269504088896340735992468100189214h -#define M_LOG10E_H 0.434294481903251827651128918916605082h -#define M_LN2_H 0.693147180559945309417232121458176568h -#define M_LN10_H 2.30258509299404568401799145468436421h -#define M_PI_H 3.14159265358979323846264338327950288h -#define M_PI_2_H 1.57079632679489661923132169163975144h -#define M_PI_4_H 0.785398163397448309615660845819875721h -#define M_1_PI_H 0.318309886183790671537767526745028724h -#define M_2_PI_H 0.636619772367581343075535053490057448h -#define M_2_SQRTPI_H 1.12837916709551257389615890312154517h -#define M_SQRT2_H 1.41421356237309504880168872420969808h -#define M_SQRT1_2_H 0.707106781186547524400844362104849039h - -#endif //cl_khr_fp16 - -#define CHAR_BIT 8 -#define SCHAR_MAX 127 -#define SCHAR_MIN (-128) -#define UCHAR_MAX 255 -#define CHAR_MAX SCHAR_MAX -#define CHAR_MIN SCHAR_MIN -#define USHRT_MAX 65535 -#define SHRT_MAX 32767 -#define SHRT_MIN (-32768) -#define UINT_MAX 0xffffffff -#define INT_MAX 2147483647 -#define INT_MIN (-2147483647-1) -#define ULONG_MAX 0xffffffffffffffffUL -#define LONG_MAX 0x7fffffffffffffffL -#define LONG_MIN (-0x7fffffffffffffffL-1) // OpenCL v1.1/1.2/2.0 s6.2.3 - Explicit conversions @@ -9598,8 +9347,6 @@ long8 __ovld __cnfn clamp(long8 x, long8 minval, long8 maxval); ulong8 __ovld __cnfn clamp(ulong8 x, ulong8 minval, ulong8 maxval); long16 __ovld __cnfn clamp(long16 x, long16 minval, long16 maxval); ulong16 __ovld __cnfn clamp(ulong16 x, ulong16 minval, ulong16 maxval); -char __ovld __cnfn clamp(char x, char minval, char maxval); -uchar __ovld __cnfn clamp(uchar x, uchar minval, uchar maxval); char2 __ovld __cnfn clamp(char2 x, char minval, char maxval); uchar2 __ovld __cnfn clamp(uchar2 x, uchar minval, uchar maxval); char3 __ovld __cnfn clamp(char3 x, char minval, char maxval); @@ -9610,8 +9357,6 @@ char8 __ovld __cnfn clamp(char8 x, char minval, char maxval); uchar8 __ovld __cnfn clamp(uchar8 x, uchar minval, uchar maxval); char16 __ovld __cnfn clamp(char16 x, char minval, char maxval); uchar16 __ovld __cnfn clamp(uchar16 x, uchar minval, uchar maxval); -short __ovld __cnfn clamp(short x, short minval, short maxval); -ushort __ovld __cnfn clamp(ushort x, ushort minval, ushort maxval); short2 __ovld __cnfn clamp(short2 x, short minval, short maxval); ushort2 __ovld __cnfn clamp(ushort2 x, ushort minval, ushort maxval); short3 __ovld __cnfn clamp(short3 x, short minval, short maxval); @@ -9622,8 +9367,6 @@ short8 __ovld __cnfn clamp(short8 x, short minval, short maxval); ushort8 __ovld __cnfn clamp(ushort8 x, ushort minval, ushort maxval); short16 __ovld __cnfn clamp(short16 x, short minval, short maxval); ushort16 __ovld __cnfn clamp(ushort16 x, ushort minval, ushort maxval); -int __ovld __cnfn clamp(int x, int minval, int maxval); -uint __ovld __cnfn clamp(uint x, uint minval, uint maxval); int2 __ovld __cnfn clamp(int2 x, int minval, int maxval); uint2 __ovld __cnfn clamp(uint2 x, uint minval, uint maxval); int3 __ovld __cnfn clamp(int3 x, int minval, int maxval); @@ -9634,8 +9377,6 @@ int8 __ovld __cnfn clamp(int8 x, int minval, int maxval); uint8 __ovld __cnfn clamp(uint8 x, uint minval, uint maxval); int16 __ovld __cnfn clamp(int16 x, int minval, int maxval); uint16 __ovld __cnfn clamp(uint16 x, uint minval, uint maxval); -long __ovld __cnfn clamp(long x, long minval, long maxval); -ulong __ovld __cnfn clamp(ulong x, ulong minval, ulong maxval); long2 __ovld __cnfn clamp(long2 x, long minval, long maxval); ulong2 __ovld __cnfn clamp(ulong2 x, ulong minval, ulong maxval); long3 __ovld __cnfn clamp(long3 x, long minval, long maxval); @@ -9911,8 +9652,6 @@ long8 __ovld __cnfn max(long8 x, long8 y); ulong8 __ovld __cnfn max(ulong8 x, ulong8 y); long16 __ovld __cnfn max(long16 x, long16 y); ulong16 __ovld __cnfn max(ulong16 x, ulong16 y); -char __ovld __cnfn max(char x, char y); -uchar __ovld __cnfn max(uchar x, uchar y); char2 __ovld __cnfn max(char2 x, char y); uchar2 __ovld __cnfn max(uchar2 x, uchar y); char3 __ovld __cnfn max(char3 x, char y); @@ -9923,8 +9662,6 @@ char8 __ovld __cnfn max(char8 x, char y); uchar8 __ovld __cnfn max(uchar8 x, uchar y); char16 __ovld __cnfn max(char16 x, char y); uchar16 __ovld __cnfn max(uchar16 x, uchar y); -short __ovld __cnfn max(short x, short y); -ushort __ovld __cnfn max(ushort x, ushort y); short2 __ovld __cnfn max(short2 x, short y); ushort2 __ovld __cnfn max(ushort2 x, ushort y); short3 __ovld __cnfn max(short3 x, short y); @@ -9935,8 +9672,6 @@ short8 __ovld __cnfn max(short8 x, short y); ushort8 __ovld __cnfn max(ushort8 x, ushort y); short16 __ovld __cnfn max(short16 x, short y); ushort16 __ovld __cnfn max(ushort16 x, ushort y); -int __ovld __cnfn max(int x, int y); -uint __ovld __cnfn max(uint x, uint y); int2 __ovld __cnfn max(int2 x, int y); uint2 __ovld __cnfn max(uint2 x, uint y); int3 __ovld __cnfn max(int3 x, int y); @@ -9947,8 +9682,6 @@ int8 __ovld __cnfn max(int8 x, int y); uint8 __ovld __cnfn max(uint8 x, uint y); int16 __ovld __cnfn max(int16 x, int y); uint16 __ovld __cnfn max(uint16 x, uint y); -long __ovld __cnfn max(long x, long y); -ulong __ovld __cnfn max(ulong x, ulong y); long2 __ovld __cnfn max(long2 x, long y); ulong2 __ovld __cnfn max(ulong2 x, ulong y); long3 __ovld __cnfn max(long3 x, long y); @@ -10011,8 +9744,6 @@ long8 __ovld __cnfn min(long8 x, long8 y); ulong8 __ovld __cnfn min(ulong8 x, ulong8 y); long16 __ovld __cnfn min(long16 x, long16 y); ulong16 __ovld __cnfn min(ulong16 x, ulong16 y); -char __ovld __cnfn min(char x, char y); -uchar __ovld __cnfn min(uchar x, uchar y); char2 __ovld __cnfn min(char2 x, char y); uchar2 __ovld __cnfn min(uchar2 x, uchar y); char3 __ovld __cnfn min(char3 x, char y); @@ -10023,8 +9754,6 @@ char8 __ovld __cnfn min(char8 x, char y); uchar8 __ovld __cnfn min(uchar8 x, uchar y); char16 __ovld __cnfn min(char16 x, char y); uchar16 __ovld __cnfn min(uchar16 x, uchar y); -short __ovld __cnfn min(short x, short y); -ushort __ovld __cnfn min(ushort x, ushort y); short2 __ovld __cnfn min(short2 x, short y); ushort2 __ovld __cnfn min(ushort2 x, ushort y); short3 __ovld __cnfn min(short3 x, short y); @@ -10035,8 +9764,6 @@ short8 __ovld __cnfn min(short8 x, short y); ushort8 __ovld __cnfn min(ushort8 x, ushort y); short16 __ovld __cnfn min(short16 x, short y); ushort16 __ovld __cnfn min(ushort16 x, ushort y); -int __ovld __cnfn min(int x, int y); -uint __ovld __cnfn min(uint x, uint y); int2 __ovld __cnfn min(int2 x, int y); uint2 __ovld __cnfn min(uint2 x, uint y); int3 __ovld __cnfn min(int3 x, int y); @@ -10047,8 +9774,6 @@ int8 __ovld __cnfn min(int8 x, int y); uint8 __ovld __cnfn min(uint8 x, uint y); int16 __ovld __cnfn min(int16 x, int y); uint16 __ovld __cnfn min(uint16 x, uint y); -long __ovld __cnfn min(long x, long y); -ulong __ovld __cnfn min(ulong x, ulong y); long2 __ovld __cnfn min(long2 x, long y); ulong2 __ovld __cnfn min(ulong2 x, ulong y); long3 __ovld __cnfn min(long3 x, long y); @@ -10627,7 +10352,6 @@ half3 __ovld __cnfn step(half3 edge, half3 x); half4 __ovld __cnfn step(half4 edge, half4 x); half8 __ovld __cnfn step(half8 edge, half8 x); half16 __ovld __cnfn step(half16 edge, half16 x); -half __ovld __cnfn step(half edge, half x); half2 __ovld __cnfn step(half edge, half2 x); half3 __ovld __cnfn step(half edge, half3 x); half4 __ovld __cnfn step(half edge, half4 x); @@ -10679,7 +10403,6 @@ half3 __ovld __cnfn smoothstep(half3 edge0, half3 edge1, half3 x); half4 __ovld __cnfn smoothstep(half4 edge0, half4 edge1, half4 x); half8 __ovld __cnfn smoothstep(half8 edge0, half8 edge1, half8 x); half16 __ovld __cnfn smoothstep(half16 edge0, half16 edge1, half16 x); -half __ovld __cnfn smoothstep(half edge0, half edge1, half x); half2 __ovld __cnfn smoothstep(half edge0, half edge1, half2 x); half3 __ovld __cnfn smoothstep(half edge0, half edge1, half3 x); half4 __ovld __cnfn smoothstep(half edge0, half edge1, half4 x); @@ -12777,30 +12500,6 @@ void __ovld vstorea_half16_rtn(double16 data,size_t offset, __private half *p); // OpenCL v1.1 s6.11.8, v1.2 s6.12.8, v2.0 s6.13.8 - Synchronization Functions -// Flag type and values for barrier, mem_fence, read_mem_fence, write_mem_fence -typedef uint cl_mem_fence_flags; - -/** - * Queue a memory fence to ensure correct - * ordering of memory operations to local memory - */ -#define CLK_LOCAL_MEM_FENCE 0x01 - -/** - * Queue a memory fence to ensure correct - * ordering of memory operations to global memory - */ -#define CLK_GLOBAL_MEM_FENCE 0x02 - -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -/** - * Queue a memory fence to ensure correct ordering of memory - * operations between work-items of a work-group to - * image memory. - */ -#define CLK_IMAGE_MEM_FENCE 0x04 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 - /** * All work-items in a work-group executing the kernel * on a processor must execute this function before any @@ -12834,17 +12533,6 @@ typedef uint cl_mem_fence_flags; void __ovld __conv barrier(cl_mem_fence_flags flags); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 - -typedef enum memory_scope { - memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, - memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, - memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, - memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, -#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) - memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP -#endif -} memory_scope; - void __ovld __conv work_group_barrier(cl_mem_fence_flags flags, memory_scope scope); void __ovld __conv work_group_barrier(cl_mem_fence_flags flags); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -13341,6 +13029,10 @@ int __ovld atomic_add(volatile __global int *p, int val); unsigned int __ovld atomic_add(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_add(volatile __local int *p, int val); unsigned int __ovld atomic_add(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_add(volatile int *p, int val); +unsigned int __ovld atomic_add(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_add(volatile __global int *p, int val); @@ -13367,6 +13059,10 @@ int __ovld atomic_sub(volatile __global int *p, int val); unsigned int __ovld atomic_sub(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_sub(volatile __local int *p, int val); unsigned int __ovld atomic_sub(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_sub(volatile int *p, int val); +unsigned int __ovld atomic_sub(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_sub(volatile __global int *p, int val); @@ -13395,6 +13091,11 @@ int __ovld atomic_xchg(volatile __local int *p, int val); unsigned int __ovld atomic_xchg(volatile __local unsigned int *p, unsigned int val); float __ovld atomic_xchg(volatile __global float *p, float val); float __ovld atomic_xchg(volatile __local float *p, float val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_xchg(volatile int *p, int val); +unsigned int __ovld atomic_xchg(volatile unsigned int *p, unsigned int val); +float __ovld atomic_xchg(volatile float *p, float val); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_xchg(volatile __global int *p, int val); @@ -13422,6 +13123,10 @@ int __ovld atomic_inc(volatile __global int *p); unsigned int __ovld atomic_inc(volatile __global unsigned int *p); int __ovld atomic_inc(volatile __local int *p); unsigned int __ovld atomic_inc(volatile __local unsigned int *p); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_inc(volatile int *p); +unsigned int __ovld atomic_inc(volatile unsigned int *p); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_inc(volatile __global int *p); @@ -13449,6 +13154,10 @@ int __ovld atomic_dec(volatile __global int *p); unsigned int __ovld atomic_dec(volatile __global unsigned int *p); int __ovld atomic_dec(volatile __local int *p); unsigned int __ovld atomic_dec(volatile __local unsigned int *p); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_dec(volatile int *p); +unsigned int __ovld atomic_dec(volatile unsigned int *p); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_dec(volatile __global int *p); @@ -13477,6 +13186,10 @@ int __ovld atomic_cmpxchg(volatile __global int *p, int cmp, int val); unsigned int __ovld atomic_cmpxchg(volatile __global unsigned int *p, unsigned int cmp, unsigned int val); int __ovld atomic_cmpxchg(volatile __local int *p, int cmp, int val); unsigned int __ovld atomic_cmpxchg(volatile __local unsigned int *p, unsigned int cmp, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_cmpxchg(volatile int *p, int cmp, int val); +unsigned int __ovld atomic_cmpxchg(volatile unsigned int *p, unsigned int cmp, unsigned int val); +#endif #if defined(cl_khr_global_int32_base_atomics) int __ovld atom_cmpxchg(volatile __global int *p, int cmp, int val); @@ -13505,6 +13218,10 @@ int __ovld atomic_min(volatile __global int *p, int val); unsigned int __ovld atomic_min(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_min(volatile __local int *p, int val); unsigned int __ovld atomic_min(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_min(volatile int *p, int val); +unsigned int __ovld atomic_min(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_min(volatile __global int *p, int val); @@ -13533,6 +13250,10 @@ int __ovld atomic_max(volatile __global int *p, int val); unsigned int __ovld atomic_max(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_max(volatile __local int *p, int val); unsigned int __ovld atomic_max(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_max(volatile int *p, int val); +unsigned int __ovld atomic_max(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_max(volatile __global int *p, int val); @@ -13560,6 +13281,10 @@ int __ovld atomic_and(volatile __global int *p, int val); unsigned int __ovld atomic_and(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_and(volatile __local int *p, int val); unsigned int __ovld atomic_and(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_and(volatile int *p, int val); +unsigned int __ovld atomic_and(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_and(volatile __global int *p, int val); @@ -13587,6 +13312,10 @@ int __ovld atomic_or(volatile __global int *p, int val); unsigned int __ovld atomic_or(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_or(volatile __local int *p, int val); unsigned int __ovld atomic_or(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_or(volatile int *p, int val); +unsigned int __ovld atomic_or(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_or(volatile __global int *p, int val); @@ -13614,6 +13343,10 @@ int __ovld atomic_xor(volatile __global int *p, int val); unsigned int __ovld atomic_xor(volatile __global unsigned int *p, unsigned int val); int __ovld atomic_xor(volatile __local int *p, int val); unsigned int __ovld atomic_xor(volatile __local unsigned int *p, unsigned int val); +#ifdef __OPENCL_CPP_VERSION__ +int __ovld atomic_xor(volatile int *p, int val); +unsigned int __ovld atomic_xor(volatile unsigned int *p, unsigned int val); +#endif #if defined(cl_khr_global_int32_extended_atomics) int __ovld atom_xor(volatile __global int *p, int val); @@ -13639,20 +13372,6 @@ unsigned long __ovld atom_xor(volatile __local unsigned long *p, unsigned long v // OpenCL v2.0 s6.13.11 - Atomics Functions #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -#ifndef ATOMIC_VAR_INIT -#define ATOMIC_VAR_INIT(x) (x) -#endif //ATOMIC_VAR_INIT -#define ATOMIC_FLAG_INIT 0 - -// enum values aligned with what clang uses in EmitAtomicExpr() -typedef enum memory_order -{ - memory_order_relaxed = __ATOMIC_RELAXED, - memory_order_acquire = __ATOMIC_ACQUIRE, - memory_order_release = __ATOMIC_RELEASE, - memory_order_acq_rel = __ATOMIC_ACQ_REL, - memory_order_seq_cst = __ATOMIC_SEQ_CST -} memory_order; // double atomics support requires extensions cl_khr_int64_base_atomics and cl_khr_int64_extended_atomics #if defined(cl_khr_int64_base_atomics) && defined(cl_khr_int64_extended_atomics) @@ -14470,33 +14189,11 @@ half16 __ovld __cnfn shuffle2(half16 x, half16 y, ushort16 mask); #if __OPENCL_C_VERSION__ >= CL_VERSION_1_2 // OpenCL v1.2 s6.12.13, v2.0 s6.13.13 - printf -int printf(__constant const char* st, ...); +int printf(__constant const char* st, ...) __attribute__((format(printf, 1, 2))); #endif // OpenCL v1.1 s6.11.3, v1.2 s6.12.14, v2.0 s6.13.14 - Image Read and Write Functions -// These values need to match the runtime equivalent -// -// Addressing Mode. -// -#define CLK_ADDRESS_NONE 0 -#define CLK_ADDRESS_CLAMP_TO_EDGE 2 -#define CLK_ADDRESS_CLAMP 4 -#define CLK_ADDRESS_REPEAT 6 -#define CLK_ADDRESS_MIRRORED_REPEAT 8 - -// -// Coordination Normalization -// -#define CLK_NORMALIZED_COORDS_FALSE 0 -#define CLK_NORMALIZED_COORDS_TRUE 1 - -// -// Filtering Mode. -// -#define CLK_FILTER_NEAREST 0x10 -#define CLK_FILTER_LINEAR 0x20 - #ifdef cl_khr_gl_msaa_sharing #pragma OPENCL EXTENSION cl_khr_gl_msaa_sharing : enable #endif //cl_khr_gl_msaa_sharing @@ -14712,30 +14409,6 @@ float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); -float4 __purefn __ovld read_imagef(read_only image1d_t image, sampler_t sampler, float coord, float lod); -int4 __purefn __ovld read_imagei(read_only image1d_t image, sampler_t sampler, float coord, float lod); -uint4 __purefn __ovld read_imageui(read_only image1d_t image, sampler_t sampler, float coord, float lod); - -float4 __purefn __ovld read_imagef(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); -int4 __purefn __ovld read_imagei(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); -uint4 __purefn __ovld read_imageui(read_only image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); - -float4 __purefn __ovld read_imagef(read_only image2d_t image, sampler_t sampler, float2 coord, float lod); -int4 __purefn __ovld read_imagei(read_only image2d_t image, sampler_t sampler, float2 coord, float lod); -uint4 __purefn __ovld read_imageui(read_only image2d_t image, sampler_t sampler, float2 coord, float lod); - -float __purefn __ovld read_imagef(read_only image2d_depth_t image, sampler_t sampler, float2 coord, float lod); - -float4 __purefn __ovld read_imagef(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); -int4 __purefn __ovld read_imagei(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); -uint4 __purefn __ovld read_imageui(read_only image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); - -float __purefn __ovld read_imagef(read_only image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod); - -float4 __purefn __ovld read_imagef(read_only image3d_t image, sampler_t sampler, float4 coord, float lod); -int4 __purefn __ovld read_imagei(read_only image3d_t image, sampler_t sampler, float4 coord, float lod); -uint4 __purefn __ovld read_imageui(read_only image3d_t image, sampler_t sampler, float4 coord, float lod); - #endif //cl_khr_mipmap_image #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -14895,29 +14568,6 @@ float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float4 gradientX, float4 gradientY); -float4 __purefn __ovld read_imagef(read_write image1d_t image, sampler_t sampler, float coord, float lod); -int4 __purefn __ovld read_imagei(read_write image1d_t image, sampler_t sampler, float coord, float lod); -uint4 __purefn __ovld read_imageui(read_write image1d_t image, sampler_t sampler, float coord, float lod); - -float4 __purefn __ovld read_imagef(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); -int4 __purefn __ovld read_imagei(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); -uint4 __purefn __ovld read_imageui(read_write image1d_array_t image_array, sampler_t sampler, float2 coord, float lod); - -float4 __purefn __ovld read_imagef(read_write image2d_t image, sampler_t sampler, float2 coord, float lod); -int4 __purefn __ovld read_imagei(read_write image2d_t image, sampler_t sampler, float2 coord, float lod); -uint4 __purefn __ovld read_imageui(read_write image2d_t image, sampler_t sampler, float2 coord, float lod); - -float __purefn __ovld read_imagef(read_write image2d_depth_t image, sampler_t sampler, float2 coord, float lod); - -float4 __purefn __ovld read_imagef(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); -int4 __purefn __ovld read_imagei(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); -uint4 __purefn __ovld read_imageui(read_write image2d_array_t image_array, sampler_t sampler, float4 coord, float lod); - -float __purefn __ovld read_imagef(read_write image2d_array_depth_t image, sampler_t sampler, float4 coord, float lod); - -float4 __purefn __ovld read_imagef(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); -int4 __purefn __ovld read_imagei(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); -uint4 __purefn __ovld read_imageui(read_write image3d_t image, sampler_t sampler, float4 coord, float lod); #endif //cl_khr_mipmap_image #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -15332,26 +14982,6 @@ int __ovld get_image_num_mip_levels(read_write image2d_depth_t image); * CLK_FLOAT */ -// -// Channel Datatype. -// -#define CLK_SNORM_INT8 0x10D0 -#define CLK_SNORM_INT16 0x10D1 -#define CLK_UNORM_INT8 0x10D2 -#define CLK_UNORM_INT16 0x10D3 -#define CLK_UNORM_SHORT_565 0x10D4 -#define CLK_UNORM_SHORT_555 0x10D5 -#define CLK_UNORM_INT_101010 0x10D6 -#define CLK_SIGNED_INT8 0x10D7 -#define CLK_SIGNED_INT16 0x10D8 -#define CLK_SIGNED_INT32 0x10D9 -#define CLK_UNSIGNED_INT8 0x10DA -#define CLK_UNSIGNED_INT16 0x10DB -#define CLK_UNSIGNED_INT32 0x10DC -#define CLK_HALF_FLOAT 0x10DD -#define CLK_FLOAT 0x10DE -#define CLK_UNORM_INT24 0x10DF - int __ovld __cnfn get_image_channel_data_type(read_only image1d_t image); int __ovld __cnfn get_image_channel_data_type(read_only image1d_buffer_t image); int __ovld __cnfn get_image_channel_data_type(read_only image2d_t image); @@ -15423,30 +15053,6 @@ int __ovld __cnfn get_image_channel_data_type(read_write image2d_array_msaa_dept * CLK_INTENSITY * CLK_LUMINANCE */ -// Channel order, numbering must be aligned with cl_channel_order in cl.h -// -#define CLK_R 0x10B0 -#define CLK_A 0x10B1 -#define CLK_RG 0x10B2 -#define CLK_RA 0x10B3 -#define CLK_RGB 0x10B4 -#define CLK_RGBA 0x10B5 -#define CLK_BGRA 0x10B6 -#define CLK_ARGB 0x10B7 -#define CLK_INTENSITY 0x10B8 -#define CLK_LUMINANCE 0x10B9 -#define CLK_Rx 0x10BA -#define CLK_RGx 0x10BB -#define CLK_RGBx 0x10BC -#define CLK_DEPTH 0x10BD -#define CLK_DEPTH_STENCIL 0x10BE -#if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -#define CLK_sRGB 0x10BF -#define CLK_sRGBx 0x10C0 -#define CLK_sRGBA 0x10C1 -#define CLK_sBGRA 0x10C2 -#define CLK_ABGR 0x10C3 -#endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 int __ovld __cnfn get_image_channel_order(read_only image1d_t image); int __ovld __cnfn get_image_channel_order(read_only image1d_buffer_t image); @@ -15605,20 +15211,17 @@ size_t __ovld __cnfn get_image_array_size(read_write image2d_array_msaa_depth_t #if defined(cl_khr_gl_msaa_sharing) int __ovld get_image_num_samples(read_only image2d_msaa_t image); int __ovld get_image_num_samples(read_only image2d_msaa_depth_t image); -int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image); int __ovld get_image_num_samples(read_only image2d_array_msaa_t image); int __ovld get_image_num_samples(read_only image2d_array_msaa_depth_t image); int __ovld get_image_num_samples(write_only image2d_msaa_t image); int __ovld get_image_num_samples(write_only image2d_msaa_depth_t image); -int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); int __ovld get_image_num_samples(write_only image2d_array_msaa_t image); int __ovld get_image_num_samples(write_only image2d_array_msaa_depth_t image); #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 int __ovld get_image_num_samples(read_write image2d_msaa_t image); int __ovld get_image_num_samples(read_write image2d_msaa_depth_t image); -int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); int __ovld get_image_num_samples(read_write image2d_array_msaa_t image); int __ovld get_image_num_samples(read_write image2d_array_msaa_depth_t image); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -15728,7 +15331,6 @@ double __ovld __conv work_group_scan_inclusive_max(double x); // OpenCL v2.0 s6.13.16 - Pipe Functions #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -#define CLK_NULL_RESERVE_ID (__builtin_astype(((void*)(__SIZE_MAX__)), reserve_id_t)) bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); #endif //__OPENCL_C_VERSION__ >= CL_VERSION_2_0 @@ -15736,44 +15338,6 @@ bool __ovld is_valid_reserve_id(reserve_id_t reserve_id); // OpenCL v2.0 s6.13.17 - Enqueue Kernels #if __OPENCL_C_VERSION__ >= CL_VERSION_2_0 -#define CL_COMPLETE 0x0 -#define CL_RUNNING 0x1 -#define CL_SUBMITTED 0x2 -#define CL_QUEUED 0x3 - -#define CLK_SUCCESS 0 -#define CLK_ENQUEUE_FAILURE -101 -#define CLK_INVALID_QUEUE -102 -#define CLK_INVALID_NDRANGE -160 -#define CLK_INVALID_EVENT_WAIT_LIST -57 -#define CLK_DEVICE_QUEUE_FULL -161 -#define CLK_INVALID_ARG_SIZE -51 -#define CLK_EVENT_ALLOCATION_FAILURE -100 -#define CLK_OUT_OF_RESOURCES -5 - -#define CLK_NULL_QUEUE 0 -#define CLK_NULL_EVENT (__builtin_astype(((void*)(__SIZE_MAX__)), clk_event_t)) - -// execution model related definitions -#define CLK_ENQUEUE_FLAGS_NO_WAIT 0x0 -#define CLK_ENQUEUE_FLAGS_WAIT_KERNEL 0x1 -#define CLK_ENQUEUE_FLAGS_WAIT_WORK_GROUP 0x2 - -typedef int kernel_enqueue_flags_t; -typedef int clk_profiling_info; - -// Profiling info name (see capture_event_profiling_info) -#define CLK_PROFILING_COMMAND_EXEC_TIME 0x1 - -#define MAX_WORK_DIM 3 - -typedef struct { - unsigned int workDimension; - size_t globalWorkOffset[MAX_WORK_DIM]; - size_t globalWorkSize[MAX_WORK_DIM]; - size_t localWorkSize[MAX_WORK_DIM]; -} ndrange_t; - ndrange_t __ovld ndrange_1D(size_t); ndrange_t __ovld ndrange_1D(size_t, size_t); ndrange_t __ovld ndrange_1D(size_t, size_t, size_t); @@ -16216,138 +15780,6 @@ void __ovld __conv intel_sub_group_block_write_us8( __global ushort* p, u #ifdef cl_intel_device_side_avc_motion_estimation #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation : begin -#define CLK_AVC_ME_MAJOR_16x16_INTEL 0x0 -#define CLK_AVC_ME_MAJOR_16x8_INTEL 0x1 -#define CLK_AVC_ME_MAJOR_8x16_INTEL 0x2 -#define CLK_AVC_ME_MAJOR_8x8_INTEL 0x3 - -#define CLK_AVC_ME_MINOR_8x8_INTEL 0x0 -#define CLK_AVC_ME_MINOR_8x4_INTEL 0x1 -#define CLK_AVC_ME_MINOR_4x8_INTEL 0x2 -#define CLK_AVC_ME_MINOR_4x4_INTEL 0x3 - -#define CLK_AVC_ME_MAJOR_FORWARD_INTEL 0x0 -#define CLK_AVC_ME_MAJOR_BACKWARD_INTEL 0x1 -#define CLK_AVC_ME_MAJOR_BIDIRECTIONAL_INTEL 0x2 - -#define CLK_AVC_ME_PARTITION_MASK_ALL_INTEL 0x0 -#define CLK_AVC_ME_PARTITION_MASK_16x16_INTEL 0x7E -#define CLK_AVC_ME_PARTITION_MASK_16x8_INTEL 0x7D -#define CLK_AVC_ME_PARTITION_MASK_8x16_INTEL 0x7B -#define CLK_AVC_ME_PARTITION_MASK_8x8_INTEL 0x77 -#define CLK_AVC_ME_PARTITION_MASK_8x4_INTEL 0x6F -#define CLK_AVC_ME_PARTITION_MASK_4x8_INTEL 0x5F -#define CLK_AVC_ME_PARTITION_MASK_4x4_INTEL 0x3F - -#define CLK_AVC_ME_SLICE_TYPE_PRED_INTEL 0x0 -#define CLK_AVC_ME_SLICE_TYPE_BPRED_INTEL 0x1 -#define CLK_AVC_ME_SLICE_TYPE_INTRA_INTEL 0x2 - -#define CLK_AVC_ME_SEARCH_WINDOW_EXHAUSTIVE_INTEL 0x0 -#define CLK_AVC_ME_SEARCH_WINDOW_SMALL_INTEL 0x1 -#define CLK_AVC_ME_SEARCH_WINDOW_TINY_INTEL 0x2 -#define CLK_AVC_ME_SEARCH_WINDOW_EXTRA_TINY_INTEL 0x3 -#define CLK_AVC_ME_SEARCH_WINDOW_DIAMOND_INTEL 0x4 -#define CLK_AVC_ME_SEARCH_WINDOW_LARGE_DIAMOND_INTEL 0x5 -#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED0_INTEL 0x6 -#define CLK_AVC_ME_SEARCH_WINDOW_RESERVED1_INTEL 0x7 -#define CLK_AVC_ME_SEARCH_WINDOW_CUSTOM_INTEL 0x8 - -#define CLK_AVC_ME_SAD_ADJUST_MODE_NONE_INTEL 0x0 -#define CLK_AVC_ME_SAD_ADJUST_MODE_HAAR_INTEL 0x2 - -#define CLK_AVC_ME_SUBPIXEL_MODE_INTEGER_INTEL 0x0 -#define CLK_AVC_ME_SUBPIXEL_MODE_HPEL_INTEL 0x1 -#define CLK_AVC_ME_SUBPIXEL_MODE_QPEL_INTEL 0x3 - -#define CLK_AVC_ME_COST_PRECISION_QPEL_INTEL 0x0 -#define CLK_AVC_ME_COST_PRECISION_HPEL_INTEL 0x1 -#define CLK_AVC_ME_COST_PRECISION_PEL_INTEL 0x2 -#define CLK_AVC_ME_COST_PRECISION_DPEL_INTEL 0x3 - -#define CLK_AVC_ME_BIDIR_WEIGHT_QUARTER_INTEL 0x10 -#define CLK_AVC_ME_BIDIR_WEIGHT_THIRD_INTEL 0x15 -#define CLK_AVC_ME_BIDIR_WEIGHT_HALF_INTEL 0x20 -#define CLK_AVC_ME_BIDIR_WEIGHT_TWO_THIRD_INTEL 0x2B -#define CLK_AVC_ME_BIDIR_WEIGHT_THREE_QUARTER_INTEL 0x30 - -#define CLK_AVC_ME_BORDER_REACHED_LEFT_INTEL 0x0 -#define CLK_AVC_ME_BORDER_REACHED_RIGHT_INTEL 0x2 -#define CLK_AVC_ME_BORDER_REACHED_TOP_INTEL 0x4 -#define CLK_AVC_ME_BORDER_REACHED_BOTTOM_INTEL 0x8 - -#define CLK_AVC_ME_INTRA_16x16_INTEL 0x0 -#define CLK_AVC_ME_INTRA_8x8_INTEL 0x1 -#define CLK_AVC_ME_INTRA_4x4_INTEL 0x2 - -#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_16x16_INTEL 0x0 -#define CLK_AVC_ME_SKIP_BLOCK_PARTITION_8x8_INTEL 0x4000 - -#define CLK_AVC_ME_SKIP_BLOCK_16x16_FORWARD_ENABLE_INTEL (0x1 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_16x16_BACKWARD_ENABLE_INTEL (0x2 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_16x16_DUAL_ENABLE_INTEL (0x3 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_FORWARD_ENABLE_INTEL (0x55 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_BACKWARD_ENABLE_INTEL (0xAA << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_DUAL_ENABLE_INTEL (0xFF << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_FORWARD_ENABLE_INTEL (0x1 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_0_BACKWARD_ENABLE_INTEL (0x2 << 24) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_FORWARD_ENABLE_INTEL (0x1 << 26) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_1_BACKWARD_ENABLE_INTEL (0x2 << 26) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_FORWARD_ENABLE_INTEL (0x1 << 28) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_2_BACKWARD_ENABLE_INTEL (0x2 << 28) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_FORWARD_ENABLE_INTEL (0x1 << 30) -#define CLK_AVC_ME_SKIP_BLOCK_8x8_3_BACKWARD_ENABLE_INTEL (0x2 << 30) - -#define CLK_AVC_ME_BLOCK_BASED_SKIP_4x4_INTEL 0x00 -#define CLK_AVC_ME_BLOCK_BASED_SKIP_8x8_INTEL 0x80 - -#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_ALL_INTEL 0x0 -#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_16x16_INTEL 0x6 -#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_8x8_INTEL 0x5 -#define CLK_AVC_ME_INTRA_LUMA_PARTITION_MASK_4x4_INTEL 0x3 - -#define CLK_AVC_ME_INTRA_NEIGHBOR_LEFT_MASK_ENABLE_INTEL 0x60 -#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_MASK_ENABLE_INTEL 0x10 -#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_RIGHT_MASK_ENABLE_INTEL 0x8 -#define CLK_AVC_ME_INTRA_NEIGHBOR_UPPER_LEFT_MASK_ENABLE_INTEL 0x4 - -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_INTEL 0x0 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DC_INTEL 0x2 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_LEFT_INTEL 0x3 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_DIAGONAL_DOWN_RIGHT_INTEL 0x4 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_PLANE_INTEL 0x4 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_RIGHT_INTEL 0x5 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_DOWN_INTEL 0x6 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_VERTICAL_LEFT_INTEL 0x7 -#define CLK_AVC_ME_LUMA_PREDICTOR_MODE_HORIZONTAL_UP_INTEL 0x8 -#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_DC_INTEL 0x0 -#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_HORIZONTAL_INTEL 0x1 -#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_VERTICAL_INTEL 0x2 -#define CLK_AVC_ME_CHROMA_PREDICTOR_MODE_PLANE_INTEL 0x3 - -#define CLK_AVC_ME_FRAME_FORWARD_INTEL 0x1 -#define CLK_AVC_ME_FRAME_BACKWARD_INTEL 0x2 -#define CLK_AVC_ME_FRAME_DUAL_INTEL 0x3 - -#define CLK_AVC_ME_INTERLACED_SCAN_TOP_FIELD_INTEL 0x0 -#define CLK_AVC_ME_INTERLACED_SCAN_BOTTOM_FIELD_INTEL 0x1 - -#define CLK_AVC_ME_INITIALIZE_INTEL 0x0 - -#define CLK_AVC_IME_PAYLOAD_INITIALIZE_INTEL 0x0 -#define CLK_AVC_REF_PAYLOAD_INITIALIZE_INTEL 0x0 -#define CLK_AVC_SIC_PAYLOAD_INITIALIZE_INTEL 0x0 - -#define CLK_AVC_IME_RESULT_INITIALIZE_INTEL 0x0 -#define CLK_AVC_REF_RESULT_INITIALIZE_INTEL 0x0 -#define CLK_AVC_SIC_RESULT_INITIALIZE_INTEL 0x0 - -#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 -#define CLK_AVC_IME_RESULT_SINGLE_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 -#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMOUT_INITIALIZE_INTEL 0x0 -#define CLK_AVC_IME_RESULT_DUAL_REFERENCE_STREAMIN_INITIALIZE_INTEL 0x0 - // MCE built-in functions uchar __ovld intel_sub_group_avc_mce_get_default_inter_base_multi_reference_penalty( @@ -17034,6 +16466,34 @@ uint8 __ovld amd_sadw(uint8 src0, uint8 src1, uint8 src2); uint16 __ovld amd_sadw(uint16 src0, uint16 src1, uint16 src2); #endif // cl_amd_media_ops2 +#if defined(cl_arm_integer_dot_product_int8) +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : begin +uint __ovld arm_dot(uchar4 a, uchar4 b); +int __ovld arm_dot(char4 a, char4 b); +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : end +#endif // defined(cl_arm_integer_dot_product_int8) + +#if defined(cl_arm_integer_dot_product_accumulate_int8) +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : begin +uint __ovld arm_dot_acc(uchar4 a, uchar4 b, uint c); +int __ovld arm_dot_acc(char4 a, char4 b, int c); +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : end +#endif // defined(cl_arm_integer_dot_product_accumulate_int8) + +#if defined(cl_arm_integer_dot_product_accumulate_int16) +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : begin +uint __ovld arm_dot_acc(ushort2 a, ushort2 b, uint c); +int __ovld arm_dot_acc(short2 a, short2 b, int c); +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int16 : end +#endif // defined(cl_arm_integer_dot_product_accumulate_int16) + +#if defined(cl_arm_integer_dot_product_accumulate_saturate_int8) +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : begin +uint __ovld arm_dot_acc_sat(uchar4 a, uchar4 b, uint c); +int __ovld arm_dot_acc_sat(char4 a, char4 b, int c); +#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_saturate_int8 : end +#endif // defined(cl_arm_integer_dot_product_accumulate_saturate_int8) + // Disable any extensions we may have enabled previously. #pragma OPENCL EXTENSION all : disable diff --git a/lib/include/openmp_wrappers/__clang_openmp_math.h b/lib/include/openmp_wrappers/__clang_openmp_math.h new file mode 100644 index 0000000000..5d7ce9a965 --- /dev/null +++ b/lib/include/openmp_wrappers/__clang_openmp_math.h @@ -0,0 +1,35 @@ +/*===---- __clang_openmp_math.h - OpenMP target math support ---------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#if defined(__NVPTX__) && defined(_OPENMP) +/// TODO: +/// We are currently reusing the functionality of the Clang-CUDA code path +/// as an alternative to the host declarations provided by math.h and cmath. +/// This is suboptimal. +/// +/// We should instead declare the device functions in a similar way, e.g., +/// through OpenMP 5.0 variants, and afterwards populate the module with the +/// host declarations by unconditionally including the host math.h or cmath, +/// respectively. This is actually what the Clang-CUDA code path does, using +/// __device__ instead of variants to avoid redeclarations and get the desired +/// overload resolution. + +#define __CUDA__ + +#if defined(__cplusplus) + #include <__clang_cuda_cmath.h> +#endif + +#undef __CUDA__ + +/// Magic macro for stopping the math.h/cmath host header from being included. +#define __CLANG_NO_HOST_MATH__ + +#endif + diff --git a/lib/include/openmp_wrappers/__clang_openmp_math_declares.h b/lib/include/openmp_wrappers/__clang_openmp_math_declares.h new file mode 100644 index 0000000000..a422c98bf9 --- /dev/null +++ b/lib/include/openmp_wrappers/__clang_openmp_math_declares.h @@ -0,0 +1,33 @@ +/*===---- __clang_openmp_math_declares.h - OpenMP math declares ------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef __CLANG_OPENMP_MATH_DECLARES_H__ +#define __CLANG_OPENMP_MATH_DECLARES_H__ + +#ifndef _OPENMP +#error "This file is for OpenMP compilation only." +#endif + +#if defined(__NVPTX__) && defined(_OPENMP) + +#define __CUDA__ + +#if defined(__cplusplus) + #include <__clang_cuda_math_forward_declares.h> +#endif + +/// Include declarations for libdevice functions. +#include <__clang_cuda_libdevice_declares.h> +/// Provide definitions for these functions. +#include <__clang_cuda_device_functions.h> + +#undef __CUDA__ + +#endif +#endif diff --git a/lib/include/openmp_wrappers/cmath b/lib/include/openmp_wrappers/cmath new file mode 100644 index 0000000000..a5183a1d8d --- /dev/null +++ b/lib/include/openmp_wrappers/cmath @@ -0,0 +1,16 @@ +/*===-------------- cmath - Alternative cmath header -----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#include <__clang_openmp_math.h> + +#ifndef __CLANG_NO_HOST_MATH__ +#include_next +#else +#undef __CLANG_NO_HOST_MATH__ +#endif diff --git a/lib/include/openmp_wrappers/math.h b/lib/include/openmp_wrappers/math.h new file mode 100644 index 0000000000..d2786ecb24 --- /dev/null +++ b/lib/include/openmp_wrappers/math.h @@ -0,0 +1,17 @@ +/*===------------- math.h - Alternative math.h header ----------------------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#include <__clang_openmp_math.h> + +#ifndef __CLANG_NO_HOST_MATH__ +#include_next +#else +#undef __CLANG_NO_HOST_MATH__ +#endif + diff --git a/lib/include/pconfigintrin.h b/lib/include/pconfigintrin.h index fee3cad388..d2014b026f 100644 --- a/lib/include/pconfigintrin.h +++ b/lib/include/pconfigintrin.h @@ -1,22 +1,8 @@ /*===---- pconfigintrin.h - X86 platform configuration ---------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -30,6 +16,8 @@ #define __PCONFIG_KEY_PROGRAM 0x00000001 +#if __has_extension(gnu_asm) + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("pconfig"))) @@ -47,4 +35,6 @@ _pconfig_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) #undef __DEFAULT_FN_ATTRS +#endif /* __has_extension(gnu_asm) */ + #endif diff --git a/lib/include/pkuintrin.h b/lib/include/pkuintrin.h index 6976924d82..c62080becb 100644 --- a/lib/include/pkuintrin.h +++ b/lib/include/pkuintrin.h @@ -1,23 +1,9 @@ /*===---- pkuintrin.h - PKU intrinsics -------------------------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/pmmintrin.h b/lib/include/pmmintrin.h index 7e1a9eae59..c376f298cc 100644 --- a/lib/include/pmmintrin.h +++ b/lib/include/pmmintrin.h @@ -1,22 +1,8 @@ /*===---- pmmintrin.h - SSE3 intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/popcntintrin.h b/lib/include/popcntintrin.h index 75ceab9e15..3129010147 100644 --- a/lib/include/popcntintrin.h +++ b/lib/include/popcntintrin.h @@ -1,22 +1,8 @@ /*===---- popcntintrin.h - POPCNT intrinsics -------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -43,22 +29,6 @@ _mm_popcnt_u32(unsigned int __A) return __builtin_popcount(__A); } -/// Counts the number of bits in the source operand having a value of 1. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the POPCNT instruction. -/// -/// \param __A -/// A signed 32-bit integer operand. -/// \returns A 32-bit integer containing the number of bits with value 1 in the -/// source operand. -static __inline__ int __DEFAULT_FN_ATTRS -_popcnt32(int __A) -{ - return __builtin_popcount(__A); -} - #ifdef __x86_64__ /// Counts the number of bits in the source operand having a value of 1. /// @@ -75,22 +45,6 @@ _mm_popcnt_u64(unsigned long long __A) { return __builtin_popcountll(__A); } - -/// Counts the number of bits in the source operand having a value of 1. -/// -/// \headerfile -/// -/// This intrinsic corresponds to the POPCNT instruction. -/// -/// \param __A -/// A signed 64-bit integer operand. -/// \returns A 64-bit integer containing the number of bits with value 1 in the -/// source operand. -static __inline__ long long __DEFAULT_FN_ATTRS -_popcnt64(long long __A) -{ - return __builtin_popcountll(__A); -} #endif /* __x86_64__ */ #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/ppc_wrappers/emmintrin.h b/lib/include/ppc_wrappers/emmintrin.h new file mode 100644 index 0000000000..617ce24acd --- /dev/null +++ b/lib/include/ppc_wrappers/emmintrin.h @@ -0,0 +1,2318 @@ +/*===---- emmintrin.h - Implementation of SSE2 intrinsics on PowerPC -------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header file is to help porting code using Intel intrinsics + explicitly from x86_64 to powerpc64/powerpc64le. + + Since X86 SSE2 intrinsics mainly handles __m128i and __m128d type, + PowerPC VMX/VSX ISA is a good match for vector float SIMD operations. + However scalar float operations in vector (XMM) registers require + the POWER8 VSX ISA (2.07) level. There are differences for data + format and placement of float scalars in the vector register, which + require extra steps to match SSE2 scalar float semantics on POWER. + + It should be noted that there's much difference between X86_64's + MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use + portable instead of access MXSCR directly. + + Most SSE2 scalar float intrinsic operations can be performed more + efficiently as C language float scalar operations or optimized to + use vector SIMD operations. We recommend this for new applications. +*/ +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#endif + +#ifndef EMMINTRIN_H_ +#define EMMINTRIN_H_ + +#include + +/* We need definitions from the SSE header files. */ +#include + +/* SSE2 */ +typedef __vector double __v2df; +typedef __vector long long __v2di; +typedef __vector unsigned long long __v2du; +typedef __vector int __v4si; +typedef __vector unsigned int __v4su; +typedef __vector short __v8hi; +typedef __vector unsigned short __v8hu; +typedef __vector signed char __v16qi; +typedef __vector unsigned char __v16qu; + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); +typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Unaligned version of the same types. */ +typedef long long __m128i_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); +typedef double __m128d_u __attribute__ ((__vector_size__ (16), __may_alias__, __aligned__ (1))); + +/* Define two value permute mask. */ +#define _MM_SHUFFLE2(x,y) (((x) << 1) | (y)) + +/* Create a vector with element 0 as F and the rest zero. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_sd (double __F) +{ + return __extension__ (__m128d){ __F, 0.0 }; +} + +/* Create a vector with both elements equal to F. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_pd (double __F) +{ + return __extension__ (__m128d){ __F, __F }; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pd1 (double __F) +{ + return _mm_set1_pd (__F); +} + +/* Create a vector with the lower value X and upper value W. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_pd (double __W, double __X) +{ + return __extension__ (__m128d){ __X, __W }; +} + +/* Create a vector with the lower value W and upper value X. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_pd (double __W, double __X) +{ + return __extension__ (__m128d){ __W, __X }; +} + +/* Create an undefined vector. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_pd (void) +{ + __m128d __Y = __Y; + return __Y; +} + +/* Create a vector of zeros. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_pd (void) +{ + return (__m128d) vec_splats (0); +} + +/* Sets the low DPFP value of A from the low value of B. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_sd (__m128d __A, __m128d __B) +{ + __v2df result = (__v2df) __A; + result [0] = ((__v2df) __B)[0]; + return (__m128d) result; +} + +/* Load two DPFP values from P. The address must be 16-byte aligned. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_pd (double const *__P) +{ + return ((__m128d)vec_ld(0, (__v16qu*)__P)); +} + +/* Load two DPFP values from P. The address need not be 16-byte aligned. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_pd (double const *__P) +{ + return (vec_vsx_ld(0, __P)); +} + +/* Create a vector with all two elements equal to *P. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load1_pd (double const *__P) +{ + return (vec_splats (*__P)); +} + +/* Create a vector with element 0 as *P and the rest zero. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_sd (double const *__P) +{ + return _mm_set_sd (*__P); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_pd1 (double const *__P) +{ + return _mm_load1_pd (__P); +} + +/* Load two DPFP values in reverse order. The address must be aligned. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadr_pd (double const *__P) +{ + __v2df __tmp = _mm_load_pd (__P); + return (__m128d)vec_xxpermdi (__tmp, __tmp, 2); +} + +/* Store two DPFP values. The address must be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_pd (double *__P, __m128d __A) +{ + vec_st((__v16qu)__A, 0, (__v16qu*)__P); +} + +/* Store two DPFP values. The address need not be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_pd (double *__P, __m128d __A) +{ + *(__m128d_u *)__P = __A; +} + +/* Stores the lower DPFP value. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_sd (double *__P, __m128d __A) +{ + *__P = ((__v2df)__A)[0]; +} + +extern __inline double __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_f64 (__m128d __A) +{ + return ((__v2df)__A)[0]; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storel_pd (double *__P, __m128d __A) +{ + _mm_store_sd (__P, __A); +} + +/* Stores the upper DPFP value. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeh_pd (double *__P, __m128d __A) +{ + *__P = ((__v2df)__A)[1]; +} +/* Store the lower DPFP value across two words. + The address must be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store1_pd (double *__P, __m128d __A) +{ + _mm_store_pd (__P, vec_splat (__A, 0)); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_pd1 (double *__P, __m128d __A) +{ + _mm_store1_pd (__P, __A); +} + +/* Store two DPFP values in reverse order. The address must be aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storer_pd (double *__P, __m128d __A) +{ + _mm_store_pd (__P, vec_xxpermdi (__A, __A, 2)); +} + +/* Intel intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi128_si64 (__m128i __A) +{ + return ((__v2di)__A)[0]; +} + +/* Microsoft intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi128_si64x (__m128i __A) +{ + return ((__v2di)__A)[0]; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_pd (__m128d __A, __m128d __B) +{ + return (__m128d) ((__v2df)__A + (__v2df)__B); +} + +/* Add the lower double-precision (64-bit) floating-point element in + a and b, store the result in the lower element of dst, and copy + the upper element from a to the upper element of dst. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_sd (__m128d __A, __m128d __B) +{ + __A[0] = __A[0] + __B[0]; + return (__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_pd (__m128d __A, __m128d __B) +{ + return (__m128d) ((__v2df)__A - (__v2df)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_sd (__m128d __A, __m128d __B) +{ + __A[0] = __A[0] - __B[0]; + return (__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_pd (__m128d __A, __m128d __B) +{ + return (__m128d) ((__v2df)__A * (__v2df)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_sd (__m128d __A, __m128d __B) +{ + __A[0] = __A[0] * __B[0]; + return (__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_pd (__m128d __A, __m128d __B) +{ + return (__m128d) ((__v2df)__A / (__v2df)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_sd (__m128d __A, __m128d __B) +{ + __A[0] = __A[0] / __B[0]; + return (__A); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_pd (__m128d __A) +{ + return (vec_sqrt (__A)); +} + +/* Return pair {sqrt (B[0]), A[1]}. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_sd (__m128d __A, __m128d __B) +{ + __v2df c; + c = vec_sqrt ((__v2df) _mm_set1_pd (__B[0])); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pd (__m128d __A, __m128d __B) +{ + return (vec_min (__A, __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = vec_min (a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_pd (__m128d __A, __m128d __B) +{ + return (vec_max (__A, __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = vec_max (a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmpeq ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmplt ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmple ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmpgt ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmpge ((__v2df) __A,(__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_pd (__m128d __A, __m128d __B) +{ + __v2df temp = (__v2df) vec_cmpeq ((__v2df) __A, (__v2df)__B); + return ((__m128d)vec_nor (temp, temp)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmpge ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmpgt ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmple ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_pd (__m128d __A, __m128d __B) +{ + return ((__m128d)vec_cmplt ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_pd (__m128d __A, __m128d __B) +{ +#if _ARCH_PWR8 + __v2du c, d; + /* Compare against self will return false (0's) if NAN. */ + c = (__v2du)vec_cmpeq (__A, __A); + d = (__v2du)vec_cmpeq (__B, __B); +#else + __v2du a, b; + __v2du c, d; + const __v2du double_exp_mask = {0x7ff0000000000000, 0x7ff0000000000000}; + a = (__v2du)vec_abs ((__v2df)__A); + b = (__v2du)vec_abs ((__v2df)__B); + c = (__v2du)vec_cmpgt (double_exp_mask, a); + d = (__v2du)vec_cmpgt (double_exp_mask, b); +#endif + /* A != NAN and B != NAN. */ + return ((__m128d)vec_and(c, d)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_pd (__m128d __A, __m128d __B) +{ +#if _ARCH_PWR8 + __v2du c, d; + /* Compare against self will return false (0's) if NAN. */ + c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); + d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); + /* A == NAN OR B == NAN converts too: + NOT(A != NAN) OR NOT(B != NAN). */ + c = vec_nor (c, c); + return ((__m128d)vec_orc(c, d)); +#else + __v2du c, d; + /* Compare against self will return false (0's) if NAN. */ + c = (__v2du)vec_cmpeq ((__v2df)__A, (__v2df)__A); + d = (__v2du)vec_cmpeq ((__v2df)__B, (__v2df)__B); + /* Convert the true ('1's) is NAN. */ + c = vec_nor (c, c); + d = vec_nor (d, d); + return ((__m128d)vec_or(c, d)); +#endif +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_sd(__m128d __A, __m128d __B) +{ + __v2df a, b, c; + /* PowerISA VSX does not allow partial (for just lower double) + results. So to insure we don't generate spurious exceptions + (from the upper double values) we splat the lower double + before we do the operation. */ + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmpeq(a, b); + /* Then we merge the lower double result with the original upper + double from __A. */ + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmplt(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmple(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmpgt(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmpge(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + c = (__v2df) vec_cmpeq(a, b); + c = vec_nor (c, c); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + /* Not less than is just greater than or equal. */ + c = (__v2df) vec_cmpge(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + /* Not less than or equal is just greater than. */ + c = (__v2df) vec_cmpge(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + /* Not greater than is just less than or equal. */ + c = (__v2df) vec_cmple(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_sd (__m128d __A, __m128d __B) +{ + __v2df a, b, c; + a = vec_splats (__A[0]); + b = vec_splats (__B[0]); + /* Not greater than or equal is just less than. */ + c = (__v2df) vec_cmplt(a, b); + return (__m128d) _mm_setr_pd (c[0], __A[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_sd (__m128d __A, __m128d __B) +{ + __v2df r; + r = (__v2df)_mm_cmpord_pd (vec_splats (__A[0]), vec_splats (__B[0])); + return (__m128d) _mm_setr_pd (r[0], ((__v2df)__A)[1]); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_sd (__m128d __A, __m128d __B) +{ + __v2df r; + r = _mm_cmpunord_pd (vec_splats (__A[0]), vec_splats (__B[0])); + return (__m128d) _mm_setr_pd (r[0], __A[1]); +} + +/* FIXME + The __mm_comi??_sd and __mm_ucomi??_sd implementations below are + exactly the same because GCC for PowerPC only generates unordered + compares (scalar and vector). + Technically __mm_comieq_sp et all should be using the ordered + compare and signal for QNaNs. The __mm_ucomieq_sd et all should + be OK. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comieq_sd (__m128d __A, __m128d __B) +{ + return (__A[0] == __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comilt_sd (__m128d __A, __m128d __B) +{ + return (__A[0] < __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comile_sd (__m128d __A, __m128d __B) +{ + return (__A[0] <= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comigt_sd (__m128d __A, __m128d __B) +{ + return (__A[0] > __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comige_sd (__m128d __A, __m128d __B) +{ + return (__A[0] >= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comineq_sd (__m128d __A, __m128d __B) +{ + return (__A[0] != __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomieq_sd (__m128d __A, __m128d __B) +{ + return (__A[0] == __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomilt_sd (__m128d __A, __m128d __B) +{ + return (__A[0] < __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomile_sd (__m128d __A, __m128d __B) +{ + return (__A[0] <= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomigt_sd (__m128d __A, __m128d __B) +{ + return (__A[0] > __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomige_sd (__m128d __A, __m128d __B) +{ + return (__A[0] >= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomineq_sd (__m128d __A, __m128d __B) +{ + return (__A[0] != __B[0]); +} + +/* Create a vector of Qi, where i is the element number. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi64x (long long __q1, long long __q0) +{ + return __extension__ (__m128i)(__v2di){ __q0, __q1 }; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi64 (__m64 __q1, __m64 __q0) +{ + return _mm_set_epi64x ((long long)__q1, (long long)__q0); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi32 (int __q3, int __q2, int __q1, int __q0) +{ + return __extension__ (__m128i)(__v4si){ __q0, __q1, __q2, __q3 }; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi16 (short __q7, short __q6, short __q5, short __q4, + short __q3, short __q2, short __q1, short __q0) +{ + return __extension__ (__m128i)(__v8hi){ + __q0, __q1, __q2, __q3, __q4, __q5, __q6, __q7 }; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_epi8 (char __q15, char __q14, char __q13, char __q12, + char __q11, char __q10, char __q09, char __q08, + char __q07, char __q06, char __q05, char __q04, + char __q03, char __q02, char __q01, char __q00) +{ + return __extension__ (__m128i)(__v16qi){ + __q00, __q01, __q02, __q03, __q04, __q05, __q06, __q07, + __q08, __q09, __q10, __q11, __q12, __q13, __q14, __q15 + }; +} + +/* Set all of the elements of the vector to A. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_epi64x (long long __A) +{ + return _mm_set_epi64x (__A, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_epi64 (__m64 __A) +{ + return _mm_set_epi64 (__A, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_epi32 (int __A) +{ + return _mm_set_epi32 (__A, __A, __A, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_epi16 (short __A) +{ + return _mm_set_epi16 (__A, __A, __A, __A, __A, __A, __A, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_epi8 (char __A) +{ + return _mm_set_epi8 (__A, __A, __A, __A, __A, __A, __A, __A, + __A, __A, __A, __A, __A, __A, __A, __A); +} + +/* Create a vector of Qi, where i is the element number. + The parameter order is reversed from the _mm_set_epi* functions. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_epi64 (__m64 __q0, __m64 __q1) +{ + return _mm_set_epi64 (__q1, __q0); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_epi32 (int __q0, int __q1, int __q2, int __q3) +{ + return _mm_set_epi32 (__q3, __q2, __q1, __q0); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_epi16 (short __q0, short __q1, short __q2, short __q3, + short __q4, short __q5, short __q6, short __q7) +{ + return _mm_set_epi16 (__q7, __q6, __q5, __q4, __q3, __q2, __q1, __q0); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_epi8 (char __q00, char __q01, char __q02, char __q03, + char __q04, char __q05, char __q06, char __q07, + char __q08, char __q09, char __q10, char __q11, + char __q12, char __q13, char __q14, char __q15) +{ + return _mm_set_epi8 (__q15, __q14, __q13, __q12, __q11, __q10, __q09, __q08, + __q07, __q06, __q05, __q04, __q03, __q02, __q01, __q00); +} + +/* Create a vector with element 0 as *P and the rest zero. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_si128 (__m128i const *__P) +{ + return *__P; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_si128 (__m128i_u const *__P) +{ + return (__m128i) (vec_vsx_ld(0, (signed int const *)__P)); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadl_epi64 (__m128i_u const *__P) +{ + return _mm_set_epi64 ((__m64)0LL, *(__m64 *)__P); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_si128 (__m128i *__P, __m128i __B) +{ + vec_st ((__v16qu) __B, 0, (__v16qu*)__P); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_si128 (__m128i_u *__P, __m128i __B) +{ + *__P = __B; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storel_epi64 (__m128i_u *__P, __m128i __B) +{ + *(long long *)__P = ((__v2di)__B)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movepi64_pi64 (__m128i_u __B) +{ + return (__m64) ((__v2di)__B)[0]; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movpi64_epi64 (__m64 __A) +{ + return _mm_set_epi64 ((__m64)0LL, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_epi64 (__m128i __A) +{ + return _mm_set_epi64 ((__m64)0LL, (__m64)__A[0]); +} + +/* Create an undefined vector. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_si128 (void) +{ + __m128i __Y = __Y; + return __Y; +} + +/* Create a vector of zeros. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_si128 (void) +{ + return __extension__ (__m128i)(__v4si){ 0, 0, 0, 0 }; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi32_pd (__m128i __A) +{ + __v2di val; + /* For LE need to generate Vector Unpack Low Signed Word. + Which is generated from unpackh. */ + val = (__v2di)vec_unpackh ((__v4si)__A); + + return (__m128d)vec_ctf (val, 0); +} +#endif + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtepi32_ps (__m128i __A) +{ + return ((__m128)vec_ctf((__v4si)__A, 0)); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_epi32 (__m128d __A) +{ + __v2df rounded = vec_rint (__A); + __v4si result, temp; + const __v4si vzero = + { 0, 0, 0, 0 }; + + /* VSX Vector truncate Double-Precision to integer and Convert to + Signed Integer Word format with Saturate. */ + __asm__( + "xvcvdpsxws %x0,%x1" + : "=wa" (temp) + : "wa" (rounded) + : ); + +#ifdef _ARCH_PWR8 + temp = vec_mergeo (temp, temp); + result = (__v4si) vec_vpkudum ((__vector long long) temp, + (__vector long long) vzero); +#else + { + const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; + result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + } +#endif + return (__m128i) result; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_pi32 (__m128d __A) +{ + __m128i result = _mm_cvtpd_epi32(__A); + + return (__m64) result[0]; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpd_ps (__m128d __A) +{ + __v4sf result; + __v4si temp; + const __v4si vzero = { 0, 0, 0, 0 }; + + __asm__( + "xvcvdpsp %x0,%x1" + : "=wa" (temp) + : "wa" (__A) + : ); + +#ifdef _ARCH_PWR8 + temp = vec_mergeo (temp, temp); + result = (__v4sf) vec_vpkudum ((__vector long long) temp, + (__vector long long) vzero); +#else + { + const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; + result = (__v4sf) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + } +#endif + return ((__m128)result); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttpd_epi32 (__m128d __A) +{ + __v4si result; + __v4si temp; + const __v4si vzero = { 0, 0, 0, 0 }; + + /* VSX Vector truncate Double-Precision to integer and Convert to + Signed Integer Word format with Saturate. */ + __asm__( + "xvcvdpsxws %x0,%x1" + : "=wa" (temp) + : "wa" (__A) + : ); + +#ifdef _ARCH_PWR8 + temp = vec_mergeo (temp, temp); + result = (__v4si) vec_vpkudum ((__vector long long) temp, + (__vector long long) vzero); +#else + { + const __v16qu pkperm = {0x00, 0x01, 0x02, 0x03, 0x08, 0x09, 0x0a, 0x0b, + 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f }; + result = (__v4si) vec_perm ((__v16qu) temp, (__v16qu) vzero, pkperm); + } +#endif + + return ((__m128i) result); +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttpd_pi32 (__m128d __A) +{ + __m128i result = _mm_cvttpd_epi32 (__A); + + return (__m64) result[0]; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi128_si32 (__m128i __A) +{ + return ((__v4si)__A)[0]; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi32_pd (__m64 __A) +{ + __v4si temp; + __v2di tmp2; + __v2df result; + + temp = (__v4si)vec_splats (__A); + tmp2 = (__v2di)vec_unpackl (temp); + result = vec_ctf ((__vector signed long long) tmp2, 0); + return (__m128d)result; +} +#endif + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_epi32 (__m128 __A) +{ + __v4sf rounded; + __v4si result; + + rounded = vec_rint((__v4sf) __A); + result = vec_cts (rounded, 0); + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttps_epi32 (__m128 __A) +{ + __v4si result; + + result = vec_cts ((__v4sf) __A, 0); + return (__m128i) result; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pd (__m128 __A) +{ + /* Check if vec_doubleh is defined by . If so use that. */ +#ifdef vec_doubleh + return (__m128d) vec_doubleh ((__v4sf)__A); +#else + /* Otherwise the compiler is not current and so need to generate the + equivalent code. */ + __v4sf a = (__v4sf)__A; + __v4sf temp; + __v2df result; +#ifdef __LITTLE_ENDIAN__ + /* The input float values are in elements {[0], [1]} but the convert + instruction needs them in elements {[1], [3]}, So we use two + shift left double vector word immediates to get the elements + lined up. */ + temp = __builtin_vsx_xxsldwi (a, a, 3); + temp = __builtin_vsx_xxsldwi (a, temp, 2); +#else + /* The input float values are in elements {[0], [1]} but the convert + instruction needs them in elements {[0], [2]}, So we use two + shift left double vector word immediates to get the elements + lined up. */ + temp = vec_vmrghw (a, a); +#endif + __asm__( + " xvcvspdp %x0,%x1" + : "=wa" (result) + : "wa" (temp) + : ); + return (__m128d) result; +#endif +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_si32 (__m128d __A) +{ + __v2df rounded = vec_rint((__v2df) __A); + int result = ((__v2df)rounded)[0]; + + return result; +} +/* Intel intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_si64 (__m128d __A) +{ + __v2df rounded = vec_rint ((__v2df) __A ); + long long result = ((__v2df) rounded)[0]; + + return result; +} + +/* Microsoft intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_si64x (__m128d __A) +{ + return _mm_cvtsd_si64 ((__v2df)__A); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_si32 (__m128d __A) +{ + int result = ((__v2df)__A)[0]; + + return result; +} + +/* Intel intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_si64 (__m128d __A) +{ + long long result = ((__v2df)__A)[0]; + + return result; +} + +/* Microsoft intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttsd_si64x (__m128d __A) +{ + return _mm_cvttsd_si64 (__A); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsd_ss (__m128 __A, __m128d __B) +{ + __v4sf result = (__v4sf)__A; + +#ifdef __LITTLE_ENDIAN__ + __v4sf temp_s; + /* Copy double element[0] to element [1] for conversion. */ + __v2df temp_b = vec_splat((__v2df)__B, 0); + + /* Pre-rotate __A left 3 (logically right 1) elements. */ + result = __builtin_vsx_xxsldwi (result, result, 3); + /* Convert double to single float scalar in a vector. */ + __asm__( + "xscvdpsp %x0,%x1" + : "=wa" (temp_s) + : "wa" (temp_b) + : ); + /* Shift the resulting scalar into vector element [0]. */ + result = __builtin_vsx_xxsldwi (result, temp_s, 1); +#else + result [0] = ((__v2df)__B)[0]; +#endif + return (__m128) result; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi32_sd (__m128d __A, int __B) +{ + __v2df result = (__v2df)__A; + double db = __B; + result [0] = db; + return (__m128d)result; +} + +/* Intel intrinsic. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64_sd (__m128d __A, long long __B) +{ + __v2df result = (__v2df)__A; + double db = __B; + result [0] = db; + return (__m128d)result; +} + +/* Microsoft intrinsic. */ +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64x_sd (__m128d __A, long long __B) +{ + return _mm_cvtsi64_sd (__A, __B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_sd (__m128d __A, __m128 __B) +{ +#ifdef __LITTLE_ENDIAN__ + /* Use splat to move element [0] into position for the convert. */ + __v4sf temp = vec_splat ((__v4sf)__B, 0); + __v2df res; + /* Convert single float scalar to double in a vector. */ + __asm__( + "xscvspdp %x0,%x1" + : "=wa" (res) + : "wa" (temp) + : ); + return (__m128d) vec_mergel (res, (__v2df)__A); +#else + __v2df res = (__v2df)__A; + res [0] = ((__v4sf)__B) [0]; + return (__m128d) res; +#endif +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pd(__m128d __A, __m128d __B, const int __mask) +{ + __vector double result; + const int litmsk = __mask & 0x3; + + if (litmsk == 0) + result = vec_mergeh (__A, __B); +#if __GNUC__ < 6 + else if (litmsk == 1) + result = vec_xxpermdi (__B, __A, 2); + else if (litmsk == 2) + result = vec_xxpermdi (__B, __A, 1); +#else + else if (litmsk == 1) + result = vec_xxpermdi (__A, __B, 2); + else if (litmsk == 2) + result = vec_xxpermdi (__A, __B, 1); +#endif + else + result = vec_mergel (__A, __B); + + return result; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_pd (__m128d __A, __m128d __B) +{ + return (__m128d) vec_mergel ((__v2df)__A, (__v2df)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_pd (__m128d __A, __m128d __B) +{ + return (__m128d) vec_mergeh ((__v2df)__A, (__v2df)__B); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadh_pd (__m128d __A, double const *__B) +{ + __v2df result = (__v2df)__A; + result [1] = *__B; + return (__m128d)result; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadl_pd (__m128d __A, double const *__B) +{ + __v2df result = (__v2df)__A; + result [0] = *__B; + return (__m128d)result; +} + +#ifdef _ARCH_PWR8 +/* Intrinsic functions that require PowerISA 2.07 minimum. */ + +/* Creates a 2-bit mask from the most significant bits of the DPFP values. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_pd (__m128d __A) +{ + __vector unsigned long long result; + static const __vector unsigned int perm_mask = + { +#ifdef __LITTLE_ENDIAN__ + 0x80800040, 0x80808080, 0x80808080, 0x80808080 +#else + 0x80808080, 0x80808080, 0x80808080, 0x80804000 +#endif + }; + + result = ((__vector unsigned long long) + vec_vbpermq ((__vector unsigned char) __A, + (__vector unsigned char) perm_mask)); + +#ifdef __LITTLE_ENDIAN__ + return result[1]; +#else + return result[0]; +#endif +} +#endif /* _ARCH_PWR8 */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_packs ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packs_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_packs ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_packus_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_packsu ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergel ((__v16qu)__A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergel ((__v8hu)__A, (__v8hu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergel ((__v4su)__A, (__v4su)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergel ((__vector long long) __A, + (__vector long long) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergeh ((__v16qu)__A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergeh ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergeh ((__v4si)__A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_mergeh ((__vector long long) __A, + (__vector long long) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v16qu)__A + (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v8hu)__A + (__v8hu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v4su)__A + (__v4su)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v2du)__A + (__v2du)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_adds ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_adds ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_adds ((__v16qu)__A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_adds_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_adds ((__v8hu)__A, (__v8hu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v16qu)__A - (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v8hu)__A - (__v8hu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v4su)__A - (__v4su)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_epi64 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v2du)__A - (__v2du)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_subs_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_subs ((__v16qi)__A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_subs_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_subs ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_subs_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_subs ((__v16qu)__A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_subs_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_subs ((__v8hu)__A, (__v8hu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_madd_epi16 (__m128i __A, __m128i __B) +{ + __vector signed int zero = {0, 0, 0, 0}; + + return (__m128i) vec_vmsumshm ((__v8hi)__A, (__v8hi)__B, zero); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_epi16 (__m128i __A, __m128i __B) +{ + __vector signed int w0, w1; + + __vector unsigned char xform1 = { +#ifdef __LITTLE_ENDIAN__ + 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, + 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F +#else + 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, + 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D +#endif + }; + + w0 = vec_vmulesh ((__v8hi)__A, (__v8hi)__B); + w1 = vec_vmulosh ((__v8hi)__A, (__v8hi)__B); + return (__m128i) vec_perm (w0, w1, xform1); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mullo_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) ((__v8hi)__A * (__v8hi)__B); +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_su32 (__m64 __A, __m64 __B) +{ + unsigned int a = __A; + unsigned int b = __B; + + return ((__m64)a * (__m64)b); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_epu32 (__m128i __A, __m128i __B) +{ +#if __GNUC__ < 8 + __v2du result; + +#ifdef __LITTLE_ENDIAN__ + /* VMX Vector Multiply Odd Unsigned Word. */ + __asm__( + "vmulouw %0,%1,%2" + : "=v" (result) + : "v" (__A), "v" (__B) + : ); +#else + /* VMX Vector Multiply Even Unsigned Word. */ + __asm__( + "vmuleuw %0,%1,%2" + : "=v" (result) + : "v" (__A), "v" (__B) + : ); +#endif + return (__m128i) result; +#else + return (__m128i) vec_mule ((__v4su)__A, (__v4su)__B); +#endif +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_epi16 (__m128i __A, int __B) +{ + __v8hu lshift; + __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (__B >= 0 && __B < 16) + { + if (__builtin_constant_p(__B)) + lshift = (__v8hu) vec_splat_s16(__B); + else + lshift = vec_splats ((unsigned short) __B); + + result = vec_sl ((__v8hi) __A, lshift); + } + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_epi32 (__m128i __A, int __B) +{ + __v4su lshift; + __v4si result = { 0, 0, 0, 0 }; + + if (__B >= 0 && __B < 32) + { + if (__builtin_constant_p(__B) && __B < 16) + lshift = (__v4su) vec_splat_s32(__B); + else + lshift = vec_splats ((unsigned int) __B); + + result = vec_sl ((__v4si) __A, lshift); + } + + return (__m128i) result; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_epi64 (__m128i __A, int __B) +{ + __v2du lshift; + __v2di result = { 0, 0 }; + + if (__B >= 0 && __B < 64) + { + if (__builtin_constant_p(__B) && __B < 16) + lshift = (__v2du) vec_splat_s32(__B); + else + lshift = (__v2du) vec_splats ((unsigned int) __B); + + result = vec_sl ((__v2di) __A, lshift); + } + + return (__m128i) result; +} +#endif + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srai_epi16 (__m128i __A, int __B) +{ + __v8hu rshift = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hi result; + + if (__B < 16) + { + if (__builtin_constant_p(__B)) + rshift = (__v8hu) vec_splat_s16(__B); + else + rshift = vec_splats ((unsigned short) __B); + } + result = vec_sra ((__v8hi) __A, rshift); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srai_epi32 (__m128i __A, int __B) +{ + __v4su rshift = { 31, 31, 31, 31 }; + __v4si result; + + if (__B < 32) + { + if (__builtin_constant_p(__B)) + { + if (__B < 16) + rshift = (__v4su) vec_splat_s32(__B); + else + rshift = (__v4su) vec_splats((unsigned int)__B); + } + else + rshift = vec_splats ((unsigned int) __B); + } + result = vec_sra ((__v4si) __A, rshift); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_bslli_si128 (__m128i __A, const int __N) +{ + __v16qu result; + const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (__N < 16) + result = vec_sld ((__v16qu) __A, zeros, __N); + else + result = zeros; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_bsrli_si128 (__m128i __A, const int __N) +{ + __v16qu result; + const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (__N < 16) +#ifdef __LITTLE_ENDIAN__ + if (__builtin_constant_p(__N)) + /* Would like to use Vector Shift Left Double by Octet + Immediate here to use the immediate form and avoid + load of __N * 8 value into a separate VR. */ + result = vec_sld (zeros, (__v16qu) __A, (16 - __N)); + else +#endif + { + __v16qu shift = vec_splats((unsigned char)(__N*8)); +#ifdef __LITTLE_ENDIAN__ + result = vec_sro ((__v16qu)__A, shift); +#else + result = vec_slo ((__v16qu)__A, shift); +#endif + } + else + result = zeros; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_si128 (__m128i __A, const int __N) +{ + return _mm_bsrli_si128 (__A, __N); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_slli_si128 (__m128i __A, const int _imm5) +{ + __v16qu result; + const __v16qu zeros = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (_imm5 < 16) +#ifdef __LITTLE_ENDIAN__ + result = vec_sld ((__v16qu) __A, zeros, _imm5); +#else + result = vec_sld (zeros, (__v16qu) __A, (16 - _imm5)); +#endif + else + result = zeros; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + +_mm_srli_epi16 (__m128i __A, int __B) +{ + __v8hu rshift; + __v8hi result = { 0, 0, 0, 0, 0, 0, 0, 0 }; + + if (__B < 16) + { + if (__builtin_constant_p(__B)) + rshift = (__v8hu) vec_splat_s16(__B); + else + rshift = vec_splats ((unsigned short) __B); + + result = vec_sr ((__v8hi) __A, rshift); + } + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_epi32 (__m128i __A, int __B) +{ + __v4su rshift; + __v4si result = { 0, 0, 0, 0 }; + + if (__B < 32) + { + if (__builtin_constant_p(__B)) + { + if (__B < 16) + rshift = (__v4su) vec_splat_s32(__B); + else + rshift = (__v4su) vec_splats((unsigned int)__B); + } + else + rshift = vec_splats ((unsigned int) __B); + + result = vec_sr ((__v4si) __A, rshift); + } + + return (__m128i) result; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srli_epi64 (__m128i __A, int __B) +{ + __v2du rshift; + __v2di result = { 0, 0 }; + + if (__B < 64) + { + if (__builtin_constant_p(__B)) + { + if (__B < 16) + rshift = (__v2du) vec_splat_s32(__B); + else + rshift = (__v2du) vec_splats((unsigned long long)__B); + } + else + rshift = (__v2du) vec_splats ((unsigned int) __B); + + result = vec_sr ((__v2di) __A, rshift); + } + + return (__m128i) result; +} +#endif + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sll_epi16 (__m128i __A, __m128i __B) +{ + __v8hu lshift; + __vector __bool short shmask; + const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu result; + +#ifdef __LITTLE_ENDIAN__ + lshift = vec_splat ((__v8hu) __B, 0); +#else + lshift = vec_splat ((__v8hu) __B, 3); +#endif + shmask = vec_cmple (lshift, shmax); + result = vec_sl ((__v8hu) __A, lshift); + result = vec_sel ((__v8hu) shmask, result, shmask); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sll_epi32 (__m128i __A, __m128i __B) +{ + __v4su lshift; + __vector __bool int shmask; + const __v4su shmax = { 32, 32, 32, 32 }; + __v4su result; +#ifdef __LITTLE_ENDIAN__ + lshift = vec_splat ((__v4su) __B, 0); +#else + lshift = vec_splat ((__v4su) __B, 1); +#endif + shmask = vec_cmplt (lshift, shmax); + result = vec_sl ((__v4su) __A, lshift); + result = vec_sel ((__v4su) shmask, result, shmask); + + return (__m128i) result; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sll_epi64 (__m128i __A, __m128i __B) +{ + __v2du lshift; + __vector __bool long long shmask; + const __v2du shmax = { 64, 64 }; + __v2du result; + + lshift = vec_splat ((__v2du) __B, 0); + shmask = vec_cmplt (lshift, shmax); + result = vec_sl ((__v2du) __A, lshift); + result = vec_sel ((__v2du) shmask, result, shmask); + + return (__m128i) result; +} +#endif + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sra_epi16 (__m128i __A, __m128i __B) +{ + const __v8hu rshmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu rshift; + __v8hi result; + +#ifdef __LITTLE_ENDIAN__ + rshift = vec_splat ((__v8hu)__B, 0); +#else + rshift = vec_splat ((__v8hu)__B, 3); +#endif + rshift = vec_min (rshift, rshmax); + result = vec_sra ((__v8hi) __A, rshift); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sra_epi32 (__m128i __A, __m128i __B) +{ + const __v4su rshmax = { 31, 31, 31, 31 }; + __v4su rshift; + __v4si result; + +#ifdef __LITTLE_ENDIAN__ + rshift = vec_splat ((__v4su)__B, 0); +#else + rshift = vec_splat ((__v4su)__B, 1); +#endif + rshift = vec_min (rshift, rshmax); + result = vec_sra ((__v4si) __A, rshift); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srl_epi16 (__m128i __A, __m128i __B) +{ + __v8hu rshift; + __vector __bool short shmask; + const __v8hu shmax = { 15, 15, 15, 15, 15, 15, 15, 15 }; + __v8hu result; + +#ifdef __LITTLE_ENDIAN__ + rshift = vec_splat ((__v8hu) __B, 0); +#else + rshift = vec_splat ((__v8hu) __B, 3); +#endif + shmask = vec_cmple (rshift, shmax); + result = vec_sr ((__v8hu) __A, rshift); + result = vec_sel ((__v8hu) shmask, result, shmask); + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srl_epi32 (__m128i __A, __m128i __B) +{ + __v4su rshift; + __vector __bool int shmask; + const __v4su shmax = { 32, 32, 32, 32 }; + __v4su result; + +#ifdef __LITTLE_ENDIAN__ + rshift = vec_splat ((__v4su) __B, 0); +#else + rshift = vec_splat ((__v4su) __B, 1); +#endif + shmask = vec_cmplt (rshift, shmax); + result = vec_sr ((__v4su) __A, rshift); + result = vec_sel ((__v4su) shmask, result, shmask); + + return (__m128i) result; +} + +#ifdef _ARCH_PWR8 +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_srl_epi64 (__m128i __A, __m128i __B) +{ + __v2du rshift; + __vector __bool long long shmask; + const __v2du shmax = { 64, 64 }; + __v2du result; + + rshift = vec_splat ((__v2du) __B, 0); + shmask = vec_cmplt (rshift, shmax); + result = vec_sr ((__v2du) __A, rshift); + result = vec_sel ((__v2du) shmask, result, shmask); + + return (__m128i) result; +} +#endif + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_pd (__m128d __A, __m128d __B) +{ + return (vec_and ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_andnot_pd (__m128d __A, __m128d __B) +{ + return (vec_andc ((__v2df) __B, (__v2df) __A)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_pd (__m128d __A, __m128d __B) +{ + return (vec_or ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_pd (__m128d __A, __m128d __B) +{ + return (vec_xor ((__v2df) __A, (__v2df) __B)); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_and ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_andnot_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_andc ((__v2di) __B, (__v2di) __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_or ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_si128 (__m128i __A, __m128i __B) +{ + return (__m128i)vec_xor ((__v2di) __A, (__v2di) __B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpeq ((__v16qi) __A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpeq ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpeq ((__v4si) __A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmplt ((__v16qi) __A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmplt ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmplt ((__v4si) __A, (__v4si)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpgt ((__v16qi) __A, (__v16qi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpgt ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_epi32 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_cmpgt ((__v4si) __A, (__v4si)__B); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_epi16 (__m128i const __A, int const __N) +{ + return (unsigned short) ((__v8hi)__A)[__N & 7]; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_epi16 (__m128i const __A, int const __D, int const __N) +{ + __v8hi result = (__v8hi)__A; + + result [(__N & 7)] = __D; + + return (__m128i) result; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_max ((__v8hi)__A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_max ((__v16qu) __A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_min ((__v8hi) __A, (__v8hi)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_min ((__v16qu) __A, (__v16qu)__B); +} + + +#ifdef _ARCH_PWR8 +/* Intrinsic functions that require PowerISA 2.07 minimum. */ + +/* Creates a 4-bit mask from the most significant bits of the SPFP values. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_epi8 (__m128i __A) +{ + __vector unsigned long long result; + static const __vector unsigned char perm_mask = + { + 0x78, 0x70, 0x68, 0x60, 0x58, 0x50, 0x48, 0x40, + 0x38, 0x30, 0x28, 0x20, 0x18, 0x10, 0x08, 0x00 + }; + + result = ((__vector unsigned long long) + vec_vbpermq ((__vector unsigned char) __A, + (__vector unsigned char) perm_mask)); + +#ifdef __LITTLE_ENDIAN__ + return result[1]; +#else + return result[0]; +#endif +} +#endif /* _ARCH_PWR8 */ + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_epu16 (__m128i __A, __m128i __B) +{ + __v4su w0, w1; + __v16qu xform1 = { +#ifdef __LITTLE_ENDIAN__ + 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, + 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F +#else + 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, + 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D +#endif + }; + + w0 = vec_vmuleuh ((__v8hu)__A, (__v8hu)__B); + w1 = vec_vmulouh ((__v8hu)__A, (__v8hu)__B); + return (__m128i) vec_perm (w0, w1, xform1); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shufflehi_epi16 (__m128i __A, const int __mask) +{ + unsigned long element_selector_98 = __mask & 0x03; + unsigned long element_selector_BA = (__mask >> 2) & 0x03; + unsigned long element_selector_DC = (__mask >> 4) & 0x03; + unsigned long element_selector_FE = (__mask >> 6) & 0x03; + static const unsigned short permute_selectors[4] = + { +#ifdef __LITTLE_ENDIAN__ + 0x0908, 0x0B0A, 0x0D0C, 0x0F0E +#else + 0x0809, 0x0A0B, 0x0C0D, 0x0E0F +#endif + }; + __v2du pmask = +#ifdef __LITTLE_ENDIAN__ + { 0x1716151413121110UL, 0UL}; +#else + { 0x1011121314151617UL, 0UL}; +#endif + __m64_union t; + __v2du a, r; + + t.as_short[0] = permute_selectors[element_selector_98]; + t.as_short[1] = permute_selectors[element_selector_BA]; + t.as_short[2] = permute_selectors[element_selector_DC]; + t.as_short[3] = permute_selectors[element_selector_FE]; + pmask[1] = t.as_m64; + a = (__v2du)__A; + r = vec_perm (a, a, (__vector unsigned char)pmask); + return (__m128i) r; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shufflelo_epi16 (__m128i __A, const int __mask) +{ + unsigned long element_selector_10 = __mask & 0x03; + unsigned long element_selector_32 = (__mask >> 2) & 0x03; + unsigned long element_selector_54 = (__mask >> 4) & 0x03; + unsigned long element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned short permute_selectors[4] = + { +#ifdef __LITTLE_ENDIAN__ + 0x0100, 0x0302, 0x0504, 0x0706 +#else + 0x0001, 0x0203, 0x0405, 0x0607 +#endif + }; + __v2du pmask = +#ifdef __LITTLE_ENDIAN__ + { 0UL, 0x1f1e1d1c1b1a1918UL}; +#else + { 0UL, 0x18191a1b1c1d1e1fUL}; +#endif + __m64_union t; + __v2du a, r; + t.as_short[0] = permute_selectors[element_selector_10]; + t.as_short[1] = permute_selectors[element_selector_32]; + t.as_short[2] = permute_selectors[element_selector_54]; + t.as_short[3] = permute_selectors[element_selector_76]; + pmask[0] = t.as_m64; + a = (__v2du)__A; + r = vec_perm (a, a, (__vector unsigned char)pmask); + return (__m128i) r; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_epi32 (__m128i __A, const int __mask) +{ + unsigned long element_selector_10 = __mask & 0x03; + unsigned long element_selector_32 = (__mask >> 2) & 0x03; + unsigned long element_selector_54 = (__mask >> 4) & 0x03; + unsigned long element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned int permute_selectors[4] = + { +#ifdef __LITTLE_ENDIAN__ + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C +#else + 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F +#endif + }; + __v4su t; + + t[0] = permute_selectors[element_selector_10]; + t[1] = permute_selectors[element_selector_32]; + t[2] = permute_selectors[element_selector_54] + 0x10101010; + t[3] = permute_selectors[element_selector_76] + 0x10101010; + return (__m128i)vec_perm ((__v4si) __A, (__v4si)__A, (__vector unsigned char)t); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskmoveu_si128 (__m128i __A, __m128i __B, char *__C) +{ + __v2du hibit = { 0x7f7f7f7f7f7f7f7fUL, 0x7f7f7f7f7f7f7f7fUL}; + __v16qu mask, tmp; + __m128i_u *p = (__m128i_u*)__C; + + tmp = (__v16qu)_mm_loadu_si128(p); + mask = (__v16qu)vec_cmpgt ((__v16qu)__B, (__v16qu)hibit); + tmp = vec_sel (tmp, (__v16qu)__A, mask); + _mm_storeu_si128 (p, (__m128i)tmp); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_epu8 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_avg ((__v16qu)__A, (__v16qu)__B); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_epu16 (__m128i __A, __m128i __B) +{ + return (__m128i) vec_avg ((__v8hu)__A, (__v8hu)__B); +} + + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sad_epu8 (__m128i __A, __m128i __B) +{ + __v16qu a, b; + __v16qu vmin, vmax, vabsdiff; + __v4si vsum; + const __v4su zero = { 0, 0, 0, 0 }; + __v4si result; + + a = (__v16qu) __A; + b = (__v16qu) __B; + vmin = vec_min (a, b); + vmax = vec_max (a, b); + vabsdiff = vec_sub (vmax, vmin); + /* Sum four groups of bytes into integers. */ + vsum = (__vector signed int) vec_sum4s (vabsdiff, zero); + /* Sum across four integers with two integer results. */ + result = vec_sum2s (vsum, (__vector signed int) zero); + /* Rotate the sums into the correct position. */ +#ifdef __LITTLE_ENDIAN__ + result = vec_sld (result, result, 4); +#else + result = vec_sld (result, result, 6); +#endif + /* Rotate the sums into the correct position. */ + return (__m128i) result; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_si32 (int *__A, int __B) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + "dcbtstt 0,%0" + : + : "b" (__A) + : "memory" + ); + *__A = __B; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_si64 (long long int *__A, long long int __B) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + " dcbtstt 0,%0" + : + : "b" (__A) + : "memory" + ); + *__A = __B; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_si128 (__m128i *__A, __m128i __B) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + "dcbtstt 0,%0" + : + : "b" (__A) + : "memory" + ); + *__A = __B; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_pd (double *__A, __m128d __B) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + "dcbtstt 0,%0" + : + : "b" (__A) + : "memory" + ); + *(__m128d*)__A = __B; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_clflush (void const *__A) +{ + /* Use the data cache block flush. */ + __asm__ ( + "dcbf 0,%0" + : + : "b" (__A) + : "memory" + ); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_lfence (void) +{ + /* Use light weight sync for load to load ordering. */ + __atomic_thread_fence (__ATOMIC_RELEASE); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mfence (void) +{ + /* Use heavy weight sync for any to any ordering. */ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi32_si128 (int __A) +{ + return _mm_set_epi32 (0, 0, 0, __A); +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64_si128 (long long __A) +{ + return __extension__ (__m128i)(__v2di){ __A, 0LL }; +} + +/* Microsoft intrinsic. */ +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64x_si128 (long long __A) +{ + return __extension__ (__m128i)(__v2di){ __A, 0LL }; +} + +/* Casts between various SP, DP, INT vector types. Note that these do no + conversion of values, they just change the type. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_ps(__m128d __A) +{ + return (__m128) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castpd_si128(__m128d __A) +{ + return (__m128i) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_pd(__m128 __A) +{ + return (__m128d) __A; +} + +extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castps_si128(__m128 __A) +{ + return (__m128i) __A; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_ps(__m128i __A) +{ + return (__m128) __A; +} + +extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_castsi128_pd(__m128i __A) +{ + return (__m128d) __A; +} + +#endif /* EMMINTRIN_H_ */ diff --git a/lib/include/ppc_wrappers/mm_malloc.h b/lib/include/ppc_wrappers/mm_malloc.h new file mode 100644 index 0000000000..d91d7865c8 --- /dev/null +++ b/lib/include/ppc_wrappers/mm_malloc.h @@ -0,0 +1,44 @@ +/*===---- mm_malloc.h - Implementation of _mm_malloc and _mm_free ----------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +#ifndef _MM_MALLOC_H_INCLUDED +#define _MM_MALLOC_H_INCLUDED + +#include + +/* We can't depend on since the prototype of posix_memalign + may not be visible. */ +#ifndef __cplusplus +extern int posix_memalign (void **, size_t, size_t); +#else +extern "C" int posix_memalign (void **, size_t, size_t) throw (); +#endif + +static __inline void * +_mm_malloc (size_t size, size_t alignment) +{ + /* PowerPC64 ELF V2 ABI requires quadword alignment. */ + size_t vec_align = sizeof (__vector float); + void *ptr; + + if (alignment < vec_align) + alignment = vec_align; + if (posix_memalign (&ptr, alignment, size) == 0) + return ptr; + else + return NULL; +} + +static __inline void +_mm_free (void * ptr) +{ + free (ptr); +} + +#endif /* _MM_MALLOC_H_INCLUDED */ diff --git a/lib/include/ppc_wrappers/mmintrin.h b/lib/include/ppc_wrappers/mmintrin.h new file mode 100644 index 0000000000..b949653adf --- /dev/null +++ b/lib/include/ppc_wrappers/mmintrin.h @@ -0,0 +1,1443 @@ +/*===---- mmintrin.h - Implementation of MMX intrinsics on PowerPC ---------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header file is to help porting code using Intel intrinsics + explicitly from x86_64 to powerpc64/powerpc64le. + + Since PowerPC target doesn't support native 64-bit vector type, we + typedef __m64 to 64-bit unsigned long long in MMX intrinsics, which + works well for _si64 and some _pi32 operations. + + For _pi16 and _pi8 operations, it's better to transfer __m64 into + 128-bit PowerPC vector first. Power8 introduced direct register + move instructions which helps for more efficient implementation. + + It's user's responsibility to determine if the results of such port + are acceptable or further changes are needed. Please note that much + code using Intel intrinsics CAN BE REWRITTEN in more portable and + efficient standard C or GNU C extensions with 64-bit scalar + operations, or 128-bit SSE/Altivec operations, which are more + recommended. */ +#error \ + "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#endif + +#ifndef _MMINTRIN_H_INCLUDED +#define _MMINTRIN_H_INCLUDED + +#include +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef __attribute__((__aligned__(8))) unsigned long long __m64; + +typedef __attribute__((__aligned__(8))) union { + __m64 as_m64; + char as_char[8]; + signed char as_signed_char[8]; + short as_short[4]; + int as_int[2]; + long long as_long_long; + float as_float[2]; + double as_double; +} __m64_union; + +/* Empty the multimedia state. */ +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_empty(void) { + /* nothing to do on PowerPC. */ +} + +extern __inline void + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_empty(void) { + /* nothing to do on PowerPC. */ +} + +/* Convert I to a __m64 object. The integer is zero-extended to 64-bits. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtsi32_si64(int __i) { + return (__m64)(unsigned int)__i; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_from_int(int __i) { + return _mm_cvtsi32_si64(__i); +} + +/* Convert the lower 32 bits of the __m64 object into an integer. */ +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtsi64_si32(__m64 __i) { + return ((int)__i); +} + +extern __inline int + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_to_int(__m64 __i) { + return _mm_cvtsi64_si32(__i); +} + +/* Convert I to a __m64 object. */ + +/* Intel intrinsic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_from_int64(long long __i) { + return (__m64)__i; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtsi64_m64(long long __i) { + return (__m64)__i; +} + +/* Microsoft intrinsic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtsi64x_si64(long long __i) { + return (__m64)__i; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set_pi64x(long long __i) { + return (__m64)__i; +} + +/* Convert the __m64 object to a 64bit integer. */ + +/* Intel intrinsic. */ +extern __inline long long + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_to_int64(__m64 __i) { + return (long long)__i; +} + +extern __inline long long + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtm64_si64(__m64 __i) { + return (long long)__i; +} + +/* Microsoft intrinsic. */ +extern __inline long long + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cvtsi64_si64x(__m64 __i) { + return (long long)__i; +} + +#ifdef _ARCH_PWR8 +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with signed saturation. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_packs_pi16(__m64 __m1, __m64 __m2) { + __vector signed short vm1; + __vector signed char vresult; + + vm1 = (__vector signed short)(__vector unsigned long long) +#ifdef __LITTLE_ENDIAN__ + {__m1, __m2}; +#else + {__m2, __m1}; +#endif + vresult = vec_packs(vm1, vm1); + return (__m64)((__vector long long)vresult)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_packsswb(__m64 __m1, __m64 __m2) { + return _mm_packs_pi16(__m1, __m2); +} + +/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of + the result, and the two 32-bit values from M2 into the upper two 16-bit + values of the result, all with signed saturation. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_packs_pi32(__m64 __m1, __m64 __m2) { + __vector signed int vm1; + __vector signed short vresult; + + vm1 = (__vector signed int)(__vector unsigned long long) +#ifdef __LITTLE_ENDIAN__ + {__m1, __m2}; +#else + {__m2, __m1}; +#endif + vresult = vec_packs(vm1, vm1); + return (__m64)((__vector long long)vresult)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_packssdw(__m64 __m1, __m64 __m2) { + return _mm_packs_pi32(__m1, __m2); +} + +/* Pack the four 16-bit values from M1 into the lower four 8-bit values of + the result, and the four 16-bit values from M2 into the upper four 8-bit + values of the result, all with unsigned saturation. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_packs_pu16(__m64 __m1, __m64 __m2) { + __vector unsigned char r; + __vector signed short vm1 = (__vector signed short)(__vector long long) +#ifdef __LITTLE_ENDIAN__ + {__m1, __m2}; +#else + {__m2, __m1}; +#endif + const __vector signed short __zero = {0}; + __vector __bool short __select = vec_cmplt(vm1, __zero); + r = vec_packs((__vector unsigned short)vm1, (__vector unsigned short)vm1); + __vector __bool char packsel = vec_pack(__select, __select); + r = vec_sel(r, (const __vector unsigned char)__zero, packsel); + return (__m64)((__vector long long)r)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_packuswb(__m64 __m1, __m64 __m2) { + return _mm_packs_pu16(__m1, __m2); +} +#endif /* end ARCH_PWR8 */ + +/* Interleave the four 8-bit values from the high half of M1 with the four + 8-bit values from the high half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpackhi_pi8(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector unsigned char a, b, c; + + a = (__vector unsigned char)vec_splats(__m1); + b = (__vector unsigned char)vec_splats(__m2); + c = vec_mergel(a, b); + return (__m64)((__vector long long)c)[1]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = m1.as_char[4]; + res.as_char[1] = m2.as_char[4]; + res.as_char[2] = m1.as_char[5]; + res.as_char[3] = m2.as_char[5]; + res.as_char[4] = m1.as_char[6]; + res.as_char[5] = m2.as_char[6]; + res.as_char[6] = m1.as_char[7]; + res.as_char[7] = m2.as_char[7]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpckhbw(__m64 __m1, __m64 __m2) { + return _mm_unpackhi_pi8(__m1, __m2); +} + +/* Interleave the two 16-bit values from the high half of M1 with the two + 16-bit values from the high half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpackhi_pi16(__m64 __m1, __m64 __m2) { + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = m1.as_short[2]; + res.as_short[1] = m2.as_short[2]; + res.as_short[2] = m1.as_short[3]; + res.as_short[3] = m2.as_short[3]; + + return (__m64)res.as_m64; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpckhwd(__m64 __m1, __m64 __m2) { + return _mm_unpackhi_pi16(__m1, __m2); +} +/* Interleave the 32-bit value from the high half of M1 with the 32-bit + value from the high half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpackhi_pi32(__m64 __m1, __m64 __m2) { + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = m1.as_int[1]; + res.as_int[1] = m2.as_int[1]; + + return (__m64)res.as_m64; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpckhdq(__m64 __m1, __m64 __m2) { + return _mm_unpackhi_pi32(__m1, __m2); +} +/* Interleave the four 8-bit values from the low half of M1 with the four + 8-bit values from the low half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpacklo_pi8(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector unsigned char a, b, c; + + a = (__vector unsigned char)vec_splats(__m1); + b = (__vector unsigned char)vec_splats(__m2); + c = vec_mergel(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = m1.as_char[0]; + res.as_char[1] = m2.as_char[0]; + res.as_char[2] = m1.as_char[1]; + res.as_char[3] = m2.as_char[1]; + res.as_char[4] = m1.as_char[2]; + res.as_char[5] = m2.as_char[2]; + res.as_char[6] = m1.as_char[3]; + res.as_char[7] = m2.as_char[3]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpcklbw(__m64 __m1, __m64 __m2) { + return _mm_unpacklo_pi8(__m1, __m2); +} +/* Interleave the two 16-bit values from the low half of M1 with the two + 16-bit values from the low half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpacklo_pi16(__m64 __m1, __m64 __m2) { + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = m1.as_short[0]; + res.as_short[1] = m2.as_short[0]; + res.as_short[2] = m1.as_short[1]; + res.as_short[3] = m2.as_short[1]; + + return (__m64)res.as_m64; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpcklwd(__m64 __m1, __m64 __m2) { + return _mm_unpacklo_pi16(__m1, __m2); +} + +/* Interleave the 32-bit value from the low half of M1 with the 32-bit + value from the low half of M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_unpacklo_pi32(__m64 __m1, __m64 __m2) { + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = m1.as_int[0]; + res.as_int[1] = m2.as_int[0]; + + return (__m64)res.as_m64; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_punpckldq(__m64 __m1, __m64 __m2) { + return _mm_unpacklo_pi32(__m1, __m2); +} + +/* Add the 8-bit values in M1 to the 8-bit values in M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_add_pi8(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed char a, b, c; + + a = (__vector signed char)vec_splats(__m1); + b = (__vector signed char)vec_splats(__m2); + c = vec_add(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = m1.as_char[0] + m2.as_char[0]; + res.as_char[1] = m1.as_char[1] + m2.as_char[1]; + res.as_char[2] = m1.as_char[2] + m2.as_char[2]; + res.as_char[3] = m1.as_char[3] + m2.as_char[3]; + res.as_char[4] = m1.as_char[4] + m2.as_char[4]; + res.as_char[5] = m1.as_char[5] + m2.as_char[5]; + res.as_char[6] = m1.as_char[6] + m2.as_char[6]; + res.as_char[7] = m1.as_char[7] + m2.as_char[7]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddb(__m64 __m1, __m64 __m2) { + return _mm_add_pi8(__m1, __m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_add_pi16(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = vec_add(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = m1.as_short[0] + m2.as_short[0]; + res.as_short[1] = m1.as_short[1] + m2.as_short[1]; + res.as_short[2] = m1.as_short[2] + m2.as_short[2]; + res.as_short[3] = m1.as_short[3] + m2.as_short[3]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddw(__m64 __m1, __m64 __m2) { + return _mm_add_pi16(__m1, __m2); +} + +/* Add the 32-bit values in M1 to the 32-bit values in M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_add_pi32(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR9 + __vector signed int a, b, c; + + a = (__vector signed int)vec_splats(__m1); + b = (__vector signed int)vec_splats(__m2); + c = vec_add(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = m1.as_int[0] + m2.as_int[0]; + res.as_int[1] = m1.as_int[1] + m2.as_int[1]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddd(__m64 __m1, __m64 __m2) { + return _mm_add_pi32(__m1, __m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sub_pi8(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed char a, b, c; + + a = (__vector signed char)vec_splats(__m1); + b = (__vector signed char)vec_splats(__m2); + c = vec_sub(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = m1.as_char[0] - m2.as_char[0]; + res.as_char[1] = m1.as_char[1] - m2.as_char[1]; + res.as_char[2] = m1.as_char[2] - m2.as_char[2]; + res.as_char[3] = m1.as_char[3] - m2.as_char[3]; + res.as_char[4] = m1.as_char[4] - m2.as_char[4]; + res.as_char[5] = m1.as_char[5] - m2.as_char[5]; + res.as_char[6] = m1.as_char[6] - m2.as_char[6]; + res.as_char[7] = m1.as_char[7] - m2.as_char[7]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubb(__m64 __m1, __m64 __m2) { + return _mm_sub_pi8(__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sub_pi16(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = vec_sub(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = m1.as_short[0] - m2.as_short[0]; + res.as_short[1] = m1.as_short[1] - m2.as_short[1]; + res.as_short[2] = m1.as_short[2] - m2.as_short[2]; + res.as_short[3] = m1.as_short[3] - m2.as_short[3]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubw(__m64 __m1, __m64 __m2) { + return _mm_sub_pi16(__m1, __m2); +} + +/* Subtract the 32-bit values in M2 from the 32-bit values in M1. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sub_pi32(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR9 + __vector signed int a, b, c; + + a = (__vector signed int)vec_splats(__m1); + b = (__vector signed int)vec_splats(__m2); + c = vec_sub(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = m1.as_int[0] - m2.as_int[0]; + res.as_int[1] = m1.as_int[1] - m2.as_int[1]; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubd(__m64 __m1, __m64 __m2) { + return _mm_sub_pi32(__m1, __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_add_si64(__m64 __m1, __m64 __m2) { + return (__m1 + __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sub_si64(__m64 __m1, __m64 __m2) { + return (__m1 - __m2); +} + +/* Shift the 64-bit value in M left by COUNT. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sll_si64(__m64 __m, __m64 __count) { + return (__m << __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psllq(__m64 __m, __m64 __count) { + return _mm_sll_si64(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_slli_si64(__m64 __m, const int __count) { + return (__m << __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psllqi(__m64 __m, const int __count) { + return _mm_slli_si64(__m, __count); +} + +/* Shift the 64-bit value in M left by COUNT; shift in zeros. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srl_si64(__m64 __m, __m64 __count) { + return (__m >> __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrlq(__m64 __m, __m64 __count) { + return _mm_srl_si64(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srli_si64(__m64 __m, const int __count) { + return (__m >> __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrlqi(__m64 __m, const int __count) { + return _mm_srli_si64(__m, __count); +} + +/* Bit-wise AND the 64-bit values in M1 and M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_and_si64(__m64 __m1, __m64 __m2) { + return (__m1 & __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pand(__m64 __m1, __m64 __m2) { + return _mm_and_si64(__m1, __m2); +} + +/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the + 64-bit value in M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_andnot_si64(__m64 __m1, __m64 __m2) { + return (~__m1 & __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pandn(__m64 __m1, __m64 __m2) { + return _mm_andnot_si64(__m1, __m2); +} + +/* Bit-wise inclusive OR the 64-bit values in M1 and M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_or_si64(__m64 __m1, __m64 __m2) { + return (__m1 | __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_por(__m64 __m1, __m64 __m2) { + return _mm_or_si64(__m1, __m2); +} + +/* Bit-wise exclusive OR the 64-bit values in M1 and M2. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_xor_si64(__m64 __m1, __m64 __m2) { + return (__m1 ^ __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pxor(__m64 __m1, __m64 __m2) { + return _mm_xor_si64(__m1, __m2); +} + +/* Creates a 64-bit zero. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_setzero_si64(void) { + return (__m64)0; +} + +/* Compare eight 8-bit values. The result of the comparison is 0xFF if the + test is true and zero if false. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpeq_pi8(__m64 __m1, __m64 __m2) { +#if defined(_ARCH_PWR6) && defined(__powerpc64__) + __m64 res; + __asm__("cmpb %0,%1,%2;\n" : "=r"(res) : "r"(__m1), "r"(__m2) :); + return (res); +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = (m1.as_char[0] == m2.as_char[0]) ? -1 : 0; + res.as_char[1] = (m1.as_char[1] == m2.as_char[1]) ? -1 : 0; + res.as_char[2] = (m1.as_char[2] == m2.as_char[2]) ? -1 : 0; + res.as_char[3] = (m1.as_char[3] == m2.as_char[3]) ? -1 : 0; + res.as_char[4] = (m1.as_char[4] == m2.as_char[4]) ? -1 : 0; + res.as_char[5] = (m1.as_char[5] == m2.as_char[5]) ? -1 : 0; + res.as_char[6] = (m1.as_char[6] == m2.as_char[6]) ? -1 : 0; + res.as_char[7] = (m1.as_char[7] == m2.as_char[7]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpeqb(__m64 __m1, __m64 __m2) { + return _mm_cmpeq_pi8(__m1, __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpgt_pi8(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed char a, b, c; + + a = (__vector signed char)vec_splats(__m1); + b = (__vector signed char)vec_splats(__m2); + c = (__vector signed char)vec_cmpgt(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_char[0] = (m1.as_char[0] > m2.as_char[0]) ? -1 : 0; + res.as_char[1] = (m1.as_char[1] > m2.as_char[1]) ? -1 : 0; + res.as_char[2] = (m1.as_char[2] > m2.as_char[2]) ? -1 : 0; + res.as_char[3] = (m1.as_char[3] > m2.as_char[3]) ? -1 : 0; + res.as_char[4] = (m1.as_char[4] > m2.as_char[4]) ? -1 : 0; + res.as_char[5] = (m1.as_char[5] > m2.as_char[5]) ? -1 : 0; + res.as_char[6] = (m1.as_char[6] > m2.as_char[6]) ? -1 : 0; + res.as_char[7] = (m1.as_char[7] > m2.as_char[7]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpgtb(__m64 __m1, __m64 __m2) { + return _mm_cmpgt_pi8(__m1, __m2); +} + +/* Compare four 16-bit values. The result of the comparison is 0xFFFF if + the test is true and zero if false. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpeq_pi16(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = (__vector signed short)vec_cmpeq(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = (m1.as_short[0] == m2.as_short[0]) ? -1 : 0; + res.as_short[1] = (m1.as_short[1] == m2.as_short[1]) ? -1 : 0; + res.as_short[2] = (m1.as_short[2] == m2.as_short[2]) ? -1 : 0; + res.as_short[3] = (m1.as_short[3] == m2.as_short[3]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpeqw(__m64 __m1, __m64 __m2) { + return _mm_cmpeq_pi16(__m1, __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpgt_pi16(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR8 + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = (__vector signed short)vec_cmpgt(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_short[0] = (m1.as_short[0] > m2.as_short[0]) ? -1 : 0; + res.as_short[1] = (m1.as_short[1] > m2.as_short[1]) ? -1 : 0; + res.as_short[2] = (m1.as_short[2] > m2.as_short[2]) ? -1 : 0; + res.as_short[3] = (m1.as_short[3] > m2.as_short[3]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpgtw(__m64 __m1, __m64 __m2) { + return _mm_cmpgt_pi16(__m1, __m2); +} + +/* Compare two 32-bit values. The result of the comparison is 0xFFFFFFFF if + the test is true and zero if false. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpeq_pi32(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR9 + __vector signed int a, b, c; + + a = (__vector signed int)vec_splats(__m1); + b = (__vector signed int)vec_splats(__m2); + c = (__vector signed int)vec_cmpeq(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = (m1.as_int[0] == m2.as_int[0]) ? -1 : 0; + res.as_int[1] = (m1.as_int[1] == m2.as_int[1]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpeqd(__m64 __m1, __m64 __m2) { + return _mm_cmpeq_pi32(__m1, __m2); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_cmpgt_pi32(__m64 __m1, __m64 __m2) { +#if _ARCH_PWR9 + __vector signed int a, b, c; + + a = (__vector signed int)vec_splats(__m1); + b = (__vector signed int)vec_splats(__m2); + c = (__vector signed int)vec_cmpgt(a, b); + return (__m64)((__vector long long)c)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __m1; + m2.as_m64 = __m2; + + res.as_int[0] = (m1.as_int[0] > m2.as_int[0]) ? -1 : 0; + res.as_int[1] = (m1.as_int[1] > m2.as_int[1]) ? -1 : 0; + + return (__m64)res.as_m64; +#endif +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pcmpgtd(__m64 __m1, __m64 __m2) { + return _mm_cmpgt_pi32(__m1, __m2); +} + +#if _ARCH_PWR8 +/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed + saturated arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_adds_pi8(__m64 __m1, __m64 __m2) { + __vector signed char a, b, c; + + a = (__vector signed char)vec_splats(__m1); + b = (__vector signed char)vec_splats(__m2); + c = vec_adds(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddsb(__m64 __m1, __m64 __m2) { + return _mm_adds_pi8(__m1, __m2); +} +/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed + saturated arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_adds_pi16(__m64 __m1, __m64 __m2) { + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = vec_adds(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddsw(__m64 __m1, __m64 __m2) { + return _mm_adds_pi16(__m1, __m2); +} +/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned + saturated arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_adds_pu8(__m64 __m1, __m64 __m2) { + __vector unsigned char a, b, c; + + a = (__vector unsigned char)vec_splats(__m1); + b = (__vector unsigned char)vec_splats(__m2); + c = vec_adds(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddusb(__m64 __m1, __m64 __m2) { + return _mm_adds_pu8(__m1, __m2); +} + +/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned + saturated arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_adds_pu16(__m64 __m1, __m64 __m2) { + __vector unsigned short a, b, c; + + a = (__vector unsigned short)vec_splats(__m1); + b = (__vector unsigned short)vec_splats(__m2); + c = vec_adds(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_paddusw(__m64 __m1, __m64 __m2) { + return _mm_adds_pu16(__m1, __m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed + saturating arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_subs_pi8(__m64 __m1, __m64 __m2) { + __vector signed char a, b, c; + + a = (__vector signed char)vec_splats(__m1); + b = (__vector signed char)vec_splats(__m2); + c = vec_subs(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubsb(__m64 __m1, __m64 __m2) { + return _mm_subs_pi8(__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + signed saturating arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_subs_pi16(__m64 __m1, __m64 __m2) { + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = vec_subs(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubsw(__m64 __m1, __m64 __m2) { + return _mm_subs_pi16(__m1, __m2); +} + +/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using + unsigned saturating arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_subs_pu8(__m64 __m1, __m64 __m2) { + __vector unsigned char a, b, c; + + a = (__vector unsigned char)vec_splats(__m1); + b = (__vector unsigned char)vec_splats(__m2); + c = vec_subs(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubusb(__m64 __m1, __m64 __m2) { + return _mm_subs_pu8(__m1, __m2); +} + +/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using + unsigned saturating arithmetic. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_subs_pu16(__m64 __m1, __m64 __m2) { + __vector unsigned short a, b, c; + + a = (__vector unsigned short)vec_splats(__m1); + b = (__vector unsigned short)vec_splats(__m2); + c = vec_subs(a, b); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psubusw(__m64 __m1, __m64 __m2) { + return _mm_subs_pu16(__m1, __m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing + four 32-bit intermediate results, which are then summed by pairs to + produce two 32-bit results. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_madd_pi16(__m64 __m1, __m64 __m2) { + __vector signed short a, b; + __vector signed int c; + __vector signed int zero = {0, 0, 0, 0}; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = vec_vmsumshm(a, b, zero); + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pmaddwd(__m64 __m1, __m64 __m2) { + return _mm_madd_pi16(__m1, __m2); +} +/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in + M2 and produce the high 16 bits of the 32-bit results. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_mulhi_pi16(__m64 __m1, __m64 __m2) { + __vector signed short a, b; + __vector signed short c; + __vector signed int w0, w1; + __vector unsigned char xform1 = { +#ifdef __LITTLE_ENDIAN__ + 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, 0x0A, + 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F +#else + 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x00, + 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15 +#endif + }; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + + w0 = vec_vmulesh(a, b); + w1 = vec_vmulosh(a, b); + c = (__vector signed short)vec_perm(w0, w1, xform1); + + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pmulhw(__m64 __m1, __m64 __m2) { + return _mm_mulhi_pi16(__m1, __m2); +} + +/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce + the low 16 bits of the results. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_mullo_pi16(__m64 __m1, __m64 __m2) { + __vector signed short a, b, c; + + a = (__vector signed short)vec_splats(__m1); + b = (__vector signed short)vec_splats(__m2); + c = a * b; + return (__m64)((__vector long long)c)[0]; +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pmullw(__m64 __m1, __m64 __m2) { + return _mm_mullo_pi16(__m1, __m2); +} + +/* Shift four 16-bit values in M left by COUNT. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sll_pi16(__m64 __m, __m64 __count) { + __vector signed short m, r; + __vector unsigned short c; + + if (__count <= 15) { + m = (__vector signed short)vec_splats(__m); + c = (__vector unsigned short)vec_splats((unsigned short)__count); + r = vec_sl(m, (__vector unsigned short)c); + return (__m64)((__vector long long)r)[0]; + } else + return (0); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psllw(__m64 __m, __m64 __count) { + return _mm_sll_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_slli_pi16(__m64 __m, int __count) { + /* Promote int to long then invoke mm_sll_pi16. */ + return _mm_sll_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psllwi(__m64 __m, int __count) { + return _mm_slli_pi16(__m, __count); +} + +/* Shift two 32-bit values in M left by COUNT. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sll_pi32(__m64 __m, __m64 __count) { + __m64_union m, res; + + m.as_m64 = __m; + + res.as_int[0] = m.as_int[0] << __count; + res.as_int[1] = m.as_int[1] << __count; + return (res.as_m64); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pslld(__m64 __m, __m64 __count) { + return _mm_sll_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_slli_pi32(__m64 __m, int __count) { + /* Promote int to long then invoke mm_sll_pi32. */ + return _mm_sll_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_pslldi(__m64 __m, int __count) { + return _mm_slli_pi32(__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in the sign bit. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sra_pi16(__m64 __m, __m64 __count) { + __vector signed short m, r; + __vector unsigned short c; + + if (__count <= 15) { + m = (__vector signed short)vec_splats(__m); + c = (__vector unsigned short)vec_splats((unsigned short)__count); + r = vec_sra(m, (__vector unsigned short)c); + return (__m64)((__vector long long)r)[0]; + } else + return (0); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psraw(__m64 __m, __m64 __count) { + return _mm_sra_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srai_pi16(__m64 __m, int __count) { + /* Promote int to long then invoke mm_sra_pi32. */ + return _mm_sra_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrawi(__m64 __m, int __count) { + return _mm_srai_pi16(__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in the sign bit. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_sra_pi32(__m64 __m, __m64 __count) { + __m64_union m, res; + + m.as_m64 = __m; + + res.as_int[0] = m.as_int[0] >> __count; + res.as_int[1] = m.as_int[1] >> __count; + return (res.as_m64); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrad(__m64 __m, __m64 __count) { + return _mm_sra_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srai_pi32(__m64 __m, int __count) { + /* Promote int to long then invoke mm_sra_pi32. */ + return _mm_sra_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psradi(__m64 __m, int __count) { + return _mm_srai_pi32(__m, __count); +} + +/* Shift four 16-bit values in M right by COUNT; shift in zeros. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srl_pi16(__m64 __m, __m64 __count) { + __vector unsigned short m, r; + __vector unsigned short c; + + if (__count <= 15) { + m = (__vector unsigned short)vec_splats(__m); + c = (__vector unsigned short)vec_splats((unsigned short)__count); + r = vec_sr(m, (__vector unsigned short)c); + return (__m64)((__vector long long)r)[0]; + } else + return (0); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrlw(__m64 __m, __m64 __count) { + return _mm_srl_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srli_pi16(__m64 __m, int __count) { + /* Promote int to long then invoke mm_sra_pi32. */ + return _mm_srl_pi16(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrlwi(__m64 __m, int __count) { + return _mm_srli_pi16(__m, __count); +} + +/* Shift two 32-bit values in M right by COUNT; shift in zeros. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srl_pi32(__m64 __m, __m64 __count) { + __m64_union m, res; + + m.as_m64 = __m; + + res.as_int[0] = (unsigned int)m.as_int[0] >> __count; + res.as_int[1] = (unsigned int)m.as_int[1] >> __count; + return (res.as_m64); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrld(__m64 __m, __m64 __count) { + return _mm_srl_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_srli_pi32(__m64 __m, int __count) { + /* Promote int to long then invoke mm_srl_pi32. */ + return _mm_srl_pi32(__m, __count); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _m_psrldi(__m64 __m, int __count) { + return _mm_srli_pi32(__m, __count); +} +#endif /* _ARCH_PWR8 */ + +/* Creates a vector of two 32-bit values; I0 is least significant. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set_pi32(int __i1, int __i0) { + __m64_union res; + + res.as_int[0] = __i0; + res.as_int[1] = __i1; + return (res.as_m64); +} + +/* Creates a vector of four 16-bit values; W0 is least significant. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set_pi16(short __w3, short __w2, short __w1, short __w0) { + __m64_union res; + + res.as_short[0] = __w0; + res.as_short[1] = __w1; + res.as_short[2] = __w2; + res.as_short[3] = __w3; + return (res.as_m64); +} + +/* Creates a vector of eight 8-bit values; B0 is least significant. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set_pi8(char __b7, char __b6, char __b5, char __b4, char __b3, + char __b2, char __b1, char __b0) { + __m64_union res; + + res.as_char[0] = __b0; + res.as_char[1] = __b1; + res.as_char[2] = __b2; + res.as_char[3] = __b3; + res.as_char[4] = __b4; + res.as_char[5] = __b5; + res.as_char[6] = __b6; + res.as_char[7] = __b7; + return (res.as_m64); +} + +/* Similar, but with the arguments in reverse order. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_setr_pi32(int __i0, int __i1) { + __m64_union res; + + res.as_int[0] = __i0; + res.as_int[1] = __i1; + return (res.as_m64); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_setr_pi16(short __w0, short __w1, short __w2, short __w3) { + return _mm_set_pi16(__w3, __w2, __w1, __w0); +} + +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_setr_pi8(char __b0, char __b1, char __b2, char __b3, char __b4, + char __b5, char __b6, char __b7) { + return _mm_set_pi8(__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0); +} + +/* Creates a vector of two 32-bit values, both elements containing I. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set1_pi32(int __i) { + __m64_union res; + + res.as_int[0] = __i; + res.as_int[1] = __i; + return (res.as_m64); +} + +/* Creates a vector of four 16-bit values, all elements containing W. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set1_pi16(short __w) { +#if _ARCH_PWR9 + __vector signed short w; + + w = (__vector signed short)vec_splats(__w); + return (__m64)((__vector long long)w)[0]; +#else + __m64_union res; + + res.as_short[0] = __w; + res.as_short[1] = __w; + res.as_short[2] = __w; + res.as_short[3] = __w; + return (res.as_m64); +#endif +} + +/* Creates a vector of eight 8-bit values, all elements containing B. */ +extern __inline __m64 + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + _mm_set1_pi8(signed char __b) { +#if _ARCH_PWR8 + __vector signed char b; + + b = (__vector signed char)vec_splats(__b); + return (__m64)((__vector long long)b)[0]; +#else + __m64_union res; + + res.as_char[0] = __b; + res.as_char[1] = __b; + res.as_char[2] = __b; + res.as_char[3] = __b; + res.as_char[4] = __b; + res.as_char[5] = __b; + res.as_char[6] = __b; + res.as_char[7] = __b; + return (res.as_m64); +#endif +} +#endif /* _MMINTRIN_H_INCLUDED */ diff --git a/lib/include/ppc_wrappers/xmmintrin.h b/lib/include/ppc_wrappers/xmmintrin.h new file mode 100644 index 0000000000..1b322b6651 --- /dev/null +++ b/lib/include/ppc_wrappers/xmmintrin.h @@ -0,0 +1,1838 @@ +/*===---- xmmintrin.h - Implementation of SSE intrinsics on PowerPC --------=== + * + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + *===-----------------------------------------------------------------------=== + */ + +/* Implemented from the specification included in the Intel C++ Compiler + User Guide and Reference, version 9.0. */ + +#ifndef NO_WARN_X86_INTRINSICS +/* This header file is to help porting code using Intel intrinsics + explicitly from x86_64 to powerpc64/powerpc64le. + + Since X86 SSE intrinsics mainly handles __m128 type, PowerPC + VMX/VSX ISA is a good match for vector float SIMD operations. + However scalar float operations in vector (XMM) registers require + the POWER8 VSX ISA (2.07) level. There are differences for data + format and placement of float scalars in the vector register, which + require extra steps to match SSE scalar float semantics on POWER. + + It should be noted that there's much difference between X86_64's + MXSCR and PowerISA's FPSCR/VSCR registers. It's recommended to use + portable instead of access MXSCR directly. + + Most SSE scalar float intrinsic operations can be performed more + efficiently as C language float scalar operations or optimized to + use vector SIMD operations. We recommend this for new applications. */ +#error "Please read comment above. Use -DNO_WARN_X86_INTRINSICS to disable this error." +#endif + +#ifndef _XMMINTRIN_H_INCLUDED +#define _XMMINTRIN_H_INCLUDED + +/* Define four value permute mask */ +#define _MM_SHUFFLE(w,x,y,z) (((w) << 6) | ((x) << 4) | ((y) << 2) | (z)) + +#include + +/* Avoid collisions between altivec.h and strict adherence to C++ and + C11 standards. This should eventually be done inside altivec.h itself, + but only after testing a full distro build. */ +#if defined(__STRICT_ANSI__) && (defined(__cplusplus) || \ + (defined(__STDC_VERSION__) && \ + __STDC_VERSION__ >= 201112L)) +#undef vector +#undef pixel +#undef bool +#endif + +/* We need type definitions from the MMX header file. */ +#include + +/* Get _mm_malloc () and _mm_free (). */ +#if __STDC_HOSTED__ +#include +#endif + +/* The Intel API is flexible enough that we must allow aliasing with other + vector types, and their scalar components. */ +typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); + +/* Unaligned version of the same type. */ +typedef float __m128_u __attribute__ ((__vector_size__ (16), __may_alias__, + __aligned__ (1))); + +/* Internal data types for implementing the intrinsics. */ +typedef float __v4sf __attribute__ ((__vector_size__ (16))); + +/* Create an undefined vector. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_undefined_ps (void) +{ + __m128 __Y = __Y; + return __Y; +} + +/* Create a vector of zeros. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setzero_ps (void) +{ + return __extension__ (__m128){ 0.0f, 0.0f, 0.0f, 0.0f }; +} + +/* Load four SPFP values from P. The address must be 16-byte aligned. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ps (float const *__P) +{ + return ((__m128)vec_ld(0, (__v4sf*)__P)); +} + +/* Load four SPFP values from P. The address need not be 16-byte aligned. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadu_ps (float const *__P) +{ + return (vec_vsx_ld(0, __P)); +} + +/* Load four SPFP values in reverse order. The address must be aligned. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadr_ps (float const *__P) +{ + __v4sf __tmp; + __m128 result; + static const __vector unsigned char permute_vector = + { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, + 0x17, 0x10, 0x11, 0x12, 0x13 }; + + __tmp = vec_ld (0, (__v4sf *) __P); + result = (__m128) vec_perm (__tmp, __tmp, permute_vector); + return result; +} + +/* Create a vector with all four elements equal to F. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set1_ps (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, __F, __F, __F }; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ps1 (float __F) +{ + return _mm_set1_ps (__F); +} + +/* Create the vector [Z Y X W]. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ps (const float __Z, const float __Y, const float __X, const float __W) +{ + return __extension__ (__m128)(__v4sf){ __W, __X, __Y, __Z }; +} + +/* Create the vector [W X Y Z]. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_setr_ps (float __Z, float __Y, float __X, float __W) +{ + return __extension__ (__m128)(__v4sf){ __Z, __Y, __X, __W }; +} + +/* Store four SPFP values. The address must be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ps (float *__P, __m128 __A) +{ + vec_st((__v4sf)__A, 0, (__v4sf*)__P); +} + +/* Store four SPFP values. The address need not be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeu_ps (float *__P, __m128 __A) +{ + *(__m128_u *)__P = __A; +} + +/* Store four SPFP values in reverse order. The address must be aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storer_ps (float *__P, __m128 __A) +{ + __v4sf __tmp; + static const __vector unsigned char permute_vector = + { 0x1C, 0x1D, 0x1E, 0x1F, 0x18, 0x19, 0x1A, 0x1B, 0x14, 0x15, 0x16, + 0x17, 0x10, 0x11, 0x12, 0x13 }; + + __tmp = (__m128) vec_perm (__A, __A, permute_vector); + + _mm_store_ps (__P, __tmp); +} + +/* Store the lower SPFP value across four words. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store1_ps (float *__P, __m128 __A) +{ + __v4sf __va = vec_splat((__v4sf)__A, 0); + _mm_store_ps (__P, __va); +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ps1 (float *__P, __m128 __A) +{ + _mm_store1_ps (__P, __A); +} + +/* Create a vector with element 0 as F and the rest zero. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_set_ss (float __F) +{ + return __extension__ (__m128)(__v4sf){ __F, 0.0f, 0.0f, 0.0f }; +} + +/* Sets the low SPFP value of A from the low value of B. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_move_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + + return (vec_sel ((__v4sf)__A, (__v4sf)__B, mask)); +} + +/* Create a vector with element 0 as *P and the rest zero. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ss (float const *__P) +{ + return _mm_set_ss (*__P); +} + +/* Stores the lower SPFP value. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_store_ss (float *__P, __m128 __A) +{ + *__P = ((__v4sf)__A)[0]; +} + +/* Perform the respective operation on the lower SPFP (single-precision + floating-point) values of A and B; the upper three SPFP values are + passed through from A. */ + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_ss (__m128 __A, __m128 __B) +{ +#ifdef _ARCH_PWR7 + __m128 a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + results. So to insure we don't generate spurious exceptions + (from the upper double values) we splat the lower double + before we to the operation. */ + a = vec_splat (__A, 0); + b = vec_splat (__B, 0); + c = a + b; + /* Then we merge the lower float result with the original upper + float elements from __A. */ + return (vec_sel (__A, c, mask)); +#else + __A[0] = __A[0] + __B[0]; + return (__A); +#endif +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_ss (__m128 __A, __m128 __B) +{ +#ifdef _ARCH_PWR7 + __m128 a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + results. So to insure we don't generate spurious exceptions + (from the upper double values) we splat the lower double + before we to the operation. */ + a = vec_splat (__A, 0); + b = vec_splat (__B, 0); + c = a - b; + /* Then we merge the lower float result with the original upper + float elements from __A. */ + return (vec_sel (__A, c, mask)); +#else + __A[0] = __A[0] - __B[0]; + return (__A); +#endif +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_ss (__m128 __A, __m128 __B) +{ +#ifdef _ARCH_PWR7 + __m128 a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + results. So to insure we don't generate spurious exceptions + (from the upper double values) we splat the lower double + before we to the operation. */ + a = vec_splat (__A, 0); + b = vec_splat (__B, 0); + c = a * b; + /* Then we merge the lower float result with the original upper + float elements from __A. */ + return (vec_sel (__A, c, mask)); +#else + __A[0] = __A[0] * __B[0]; + return (__A); +#endif +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_ss (__m128 __A, __m128 __B) +{ +#ifdef _ARCH_PWR7 + __m128 a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + results. So to insure we don't generate spurious exceptions + (from the upper double values) we splat the lower double + before we to the operation. */ + a = vec_splat (__A, 0); + b = vec_splat (__B, 0); + c = a / b; + /* Then we merge the lower float result with the original upper + float elements from __A. */ + return (vec_sel (__A, c, mask)); +#else + __A[0] = __A[0] / __B[0]; + return (__A); +#endif +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ss (__m128 __A) +{ + __m128 a, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + * results. So to insure we don't generate spurious exceptions + * (from the upper double values) we splat the lower double + * before we to the operation. */ + a = vec_splat (__A, 0); + c = vec_sqrt (a); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return (vec_sel (__A, c, mask)); +} + +/* Perform the respective operation on the four SPFP values in A and B. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_add_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A + (__v4sf)__B); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sub_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A - (__v4sf)__B); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mul_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A * (__v4sf)__B); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_div_ps (__m128 __A, __m128 __B) +{ + return (__m128) ((__v4sf)__A / (__v4sf)__B); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sqrt_ps (__m128 __A) +{ + return (vec_sqrt ((__v4sf)__A)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ps (__m128 __A) +{ + return (vec_re ((__v4sf)__A)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ps (__m128 __A) +{ + return (vec_rsqrte (__A)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rcp_ss (__m128 __A) +{ + __m128 a, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + * results. So to insure we don't generate spurious exceptions + * (from the upper double values) we splat the lower double + * before we to the operation. */ + a = vec_splat (__A, 0); + c = _mm_rcp_ps (a); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return (vec_sel (__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_rsqrt_ss (__m128 __A) +{ + __m128 a, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower double) + * results. So to insure we don't generate spurious exceptions + * (from the upper double values) we splat the lower double + * before we to the operation. */ + a = vec_splat (__A, 0); + c = vec_rsqrte (a); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return (vec_sel (__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_ss (__m128 __A, __m128 __B) +{ + __v4sf a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower float) + * results. So to insure we don't generate spurious exceptions + * (from the upper float values) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf)__A, 0); + b = vec_splat ((__v4sf)__B, 0); + c = vec_min (a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return (vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_ss (__m128 __A, __m128 __B) +{ + __v4sf a, b, c; + static const __vector unsigned int mask = {0xffffffff, 0, 0, 0}; + /* PowerISA VSX does not allow partial (for just lower float) + * results. So to insure we don't generate spurious exceptions + * (from the upper float values) we splat the lower float + * before we to the operation. */ + a = vec_splat (__A, 0); + b = vec_splat (__B, 0); + c = vec_max (a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return (vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_ps (__m128 __A, __m128 __B) +{ + __vector __bool int m = vec_cmpgt ((__v4sf) __B, (__v4sf) __A); + return vec_sel (__B, __A, m); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_ps (__m128 __A, __m128 __B) +{ + __vector __bool int m = vec_cmpgt ((__v4sf) __A, (__v4sf) __B); + return vec_sel (__B, __A, m); +} + +/* Perform logical bit-wise operations on 128-bit values. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_and_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_and ((__v4sf)__A, (__v4sf)__B)); +// return __builtin_ia32_andps (__A, __B); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_andnot_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_andc ((__v4sf)__B, (__v4sf)__A)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_or_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_or ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_xor_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_xor ((__v4sf)__A, (__v4sf)__B)); +} + +/* Perform a comparison on the four SPFP values of A and B. For each + element, if the comparison is true, place a mask of all ones in the + result, otherwise a mask of zeros. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmpeq ((__v4sf)__A,(__v4sf) __B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_ps (__m128 __A, __m128 __B) +{ + __v4sf temp = (__v4sf ) vec_cmpeq ((__v4sf) __A, (__v4sf)__B); + return ((__m128)vec_nor (temp, temp)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmpge ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmpgt ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmple ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_ps (__m128 __A, __m128 __B) +{ + return ((__m128)vec_cmplt ((__v4sf)__A, (__v4sf)__B)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_ps (__m128 __A, __m128 __B) +{ + __vector unsigned int a, b; + __vector unsigned int c, d; + static const __vector unsigned int float_exp_mask = + { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + + a = (__vector unsigned int) vec_abs ((__v4sf)__A); + b = (__vector unsigned int) vec_abs ((__v4sf)__B); + c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a); + d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b); + return ((__m128 ) vec_and (c, d)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_ps (__m128 __A, __m128 __B) +{ + __vector unsigned int a, b; + __vector unsigned int c, d; + static const __vector unsigned int float_exp_mask = + { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + + a = (__vector unsigned int) vec_abs ((__v4sf)__A); + b = (__vector unsigned int) vec_abs ((__v4sf)__B); + c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask); + d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask); + return ((__m128 ) vec_or (c, d)); +} + +/* Perform a comparison on the lower SPFP values of A and B. If the + comparison is true, place a mask of all ones in the result, otherwise a + mask of zeros. The upper three SPFP values are passed through from A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpeq_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpeq(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmplt_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmplt(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmple_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmple(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpgt_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpgt(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpge_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpge(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpneq_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpeq(a, b); + c = vec_nor (c, c); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnlt_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpge(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnle_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmpgt(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpngt_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we to the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmple(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpnge_ss (__m128 __A, __m128 __B) +{ + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + __v4sf a, b, c; + /* PowerISA VMX does not allow partial (for just element 0) + * results. So to insure we don't generate spurious exceptions + * (from the upper elements) we splat the lower float + * before we do the operation. */ + a = vec_splat ((__v4sf) __A, 0); + b = vec_splat ((__v4sf) __B, 0); + c = (__v4sf) vec_cmplt(a, b); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpord_ss (__m128 __A, __m128 __B) +{ + __vector unsigned int a, b; + __vector unsigned int c, d; + static const __vector unsigned int float_exp_mask = + { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + + a = (__vector unsigned int) vec_abs ((__v4sf)__A); + b = (__vector unsigned int) vec_abs ((__v4sf)__B); + c = (__vector unsigned int) vec_cmpgt (float_exp_mask, a); + d = (__vector unsigned int) vec_cmpgt (float_exp_mask, b); + c = vec_and (c, d); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask)); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cmpunord_ss (__m128 __A, __m128 __B) +{ + __vector unsigned int a, b; + __vector unsigned int c, d; + static const __vector unsigned int float_exp_mask = + { 0x7f800000, 0x7f800000, 0x7f800000, 0x7f800000 }; + static const __vector unsigned int mask = + { 0xffffffff, 0, 0, 0 }; + + a = (__vector unsigned int) vec_abs ((__v4sf)__A); + b = (__vector unsigned int) vec_abs ((__v4sf)__B); + c = (__vector unsigned int) vec_cmpgt (a, float_exp_mask); + d = (__vector unsigned int) vec_cmpgt (b, float_exp_mask); + c = vec_or (c, d); + /* Then we merge the lower float result with the original upper + * float elements from __A. */ + return ((__m128)vec_sel ((__v4sf)__A, (__v4sf)c, mask)); +} + +/* Compare the lower SPFP values of A and B and return 1 if true + and 0 if false. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comieq_ss (__m128 __A, __m128 __B) +{ + return (__A[0] == __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comilt_ss (__m128 __A, __m128 __B) +{ + return (__A[0] < __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comile_ss (__m128 __A, __m128 __B) +{ + return (__A[0] <= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comigt_ss (__m128 __A, __m128 __B) +{ + return (__A[0] > __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comige_ss (__m128 __A, __m128 __B) +{ + return (__A[0] >= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_comineq_ss (__m128 __A, __m128 __B) +{ + return (__A[0] != __B[0]); +} + +/* FIXME + * The __mm_ucomi??_ss implementations below are exactly the same as + * __mm_comi??_ss because GCC for PowerPC only generates unordered + * compares (scalar and vector). + * Technically __mm_comieq_ss et al should be using the ordered + * compare and signal for QNaNs. + * The __mm_ucomieq_sd et all should be OK, as is. + */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomieq_ss (__m128 __A, __m128 __B) +{ + return (__A[0] == __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomilt_ss (__m128 __A, __m128 __B) +{ + return (__A[0] < __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomile_ss (__m128 __A, __m128 __B) +{ + return (__A[0] <= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomigt_ss (__m128 __A, __m128 __B) +{ + return (__A[0] > __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomige_ss (__m128 __A, __m128 __B) +{ + return (__A[0] >= __B[0]); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_ucomineq_ss (__m128 __A, __m128 __B) +{ + return (__A[0] != __B[0]); +} + +extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_f32 (__m128 __A) +{ + return ((__v4sf)__A)[0]; +} + +/* Convert the lower SPFP value to a 32-bit integer according to the current + rounding mode. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si32 (__m128 __A) +{ + __m64 res = 0; +#ifdef _ARCH_PWR8 + double dtmp; + __asm__( +#ifdef __LITTLE_ENDIAN__ + "xxsldwi %x0,%x0,%x0,3;\n" +#endif + "xscvspdp %x2,%x0;\n" + "fctiw %2,%2;\n" + "mfvsrd %1,%x2;\n" + : "+wa" (__A), + "=r" (res), + "=f" (dtmp) + : ); +#else + res = __builtin_rint(__A[0]); +#endif + return (res); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_ss2si (__m128 __A) +{ + return _mm_cvtss_si32 (__A); +} + +/* Convert the lower SPFP value to a 32-bit integer according to the + current rounding mode. */ + +/* Intel intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si64 (__m128 __A) +{ + __m64 res = 0; +#ifdef _ARCH_PWR8 + double dtmp; + __asm__( +#ifdef __LITTLE_ENDIAN__ + "xxsldwi %x0,%x0,%x0,3;\n" +#endif + "xscvspdp %x2,%x0;\n" + "fctid %2,%2;\n" + "mfvsrd %1,%x2;\n" + : "+wa" (__A), + "=r" (res), + "=f" (dtmp) + : ); +#else + res = __builtin_llrint(__A[0]); +#endif + return (res); +} + +/* Microsoft intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtss_si64x (__m128 __A) +{ + return _mm_cvtss_si64 ((__v4sf) __A); +} + +/* Constants for use with _mm_prefetch. */ +enum _mm_hint +{ + /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit. */ + _MM_HINT_ET0 = 7, + _MM_HINT_ET1 = 6, + _MM_HINT_T0 = 3, + _MM_HINT_T1 = 2, + _MM_HINT_T2 = 1, + _MM_HINT_NTA = 0 +}; + +/* Loads one cache line from address P to a location "closer" to the + processor. The selector I specifies the type of prefetch operation. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_prefetch (const void *__P, enum _mm_hint __I) +{ + /* Current PowerPC will ignores the hint parameters. */ + __builtin_prefetch (__P); +} + +/* Convert the two lower SPFP values to 32-bit integers according to the + current rounding mode. Return the integers in packed form. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi32 (__m128 __A) +{ + /* Splat two lower SPFP values to both halves. */ + __v4sf temp, rounded; + __vector unsigned long long result; + + /* Splat two lower SPFP values to both halves. */ + temp = (__v4sf) vec_splat ((__vector long long)__A, 0); + rounded = vec_rint(temp); + result = (__vector unsigned long long) vec_cts (rounded, 0); + + return (__m64) ((__vector long long) result)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_ps2pi (__m128 __A) +{ + return _mm_cvtps_pi32 (__A); +} + +/* Truncate the lower SPFP value to a 32-bit integer. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si32 (__m128 __A) +{ + /* Extract the lower float element. */ + float temp = __A[0]; + /* truncate to 32-bit integer and return. */ + return temp; +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_ss2si (__m128 __A) +{ + return _mm_cvttss_si32 (__A); +} + +/* Intel intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si64 (__m128 __A) +{ + /* Extract the lower float element. */ + float temp = __A[0]; + /* truncate to 32-bit integer and return. */ + return temp; +} + +/* Microsoft intrinsic. */ +extern __inline long long __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttss_si64x (__m128 __A) +{ + /* Extract the lower float element. */ + float temp = __A[0]; + /* truncate to 32-bit integer and return. */ + return temp; +} + +/* Truncate the two lower SPFP values to 32-bit integers. Return the + integers in packed form. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvttps_pi32 (__m128 __A) +{ + __v4sf temp; + __vector unsigned long long result; + + /* Splat two lower SPFP values to both halves. */ + temp = (__v4sf) vec_splat ((__vector long long)__A, 0); + result = (__vector unsigned long long) vec_cts (temp, 0); + + return (__m64) ((__vector long long) result)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtt_ps2pi (__m128 __A) +{ + return _mm_cvttps_pi32 (__A); +} + +/* Convert B to a SPFP value and insert it as element zero in A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi32_ss (__m128 __A, int __B) +{ + float temp = __B; + __A[0] = temp; + + return __A; +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_si2ss (__m128 __A, int __B) +{ + return _mm_cvtsi32_ss (__A, __B); +} + +/* Convert B to a SPFP value and insert it as element zero in A. */ +/* Intel intrinsic. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64_ss (__m128 __A, long long __B) +{ + float temp = __B; + __A[0] = temp; + + return __A; +} + +/* Microsoft intrinsic. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtsi64x_ss (__m128 __A, long long __B) +{ + return _mm_cvtsi64_ss (__A, __B); +} + +/* Convert the two 32-bit values in B to SPFP form and insert them + as the two lower elements in A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi32_ps (__m128 __A, __m64 __B) +{ + __vector signed int vm1; + __vector float vf1; + + vm1 = (__vector signed int) (__vector unsigned long long) {__B, __B}; + vf1 = (__vector float) vec_ctf (vm1, 0); + + return ((__m128) (__vector unsigned long long) + { ((__vector unsigned long long)vf1) [0], + ((__vector unsigned long long)__A) [1]}); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvt_pi2ps (__m128 __A, __m64 __B) +{ + return _mm_cvtpi32_ps (__A, __B); +} + +/* Convert the four signed 16-bit values in A to SPFP form. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi16_ps (__m64 __A) +{ + __vector signed short vs8; + __vector signed int vi4; + __vector float vf1; + + vs8 = (__vector signed short) (__vector unsigned long long) { __A, __A }; + vi4 = vec_vupklsh (vs8); + vf1 = (__vector float) vec_ctf (vi4, 0); + + return (__m128) vf1; +} + +/* Convert the four unsigned 16-bit values in A to SPFP form. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpu16_ps (__m64 __A) +{ + const __vector unsigned short zero = + { 0, 0, 0, 0, 0, 0, 0, 0 }; + __vector unsigned short vs8; + __vector unsigned int vi4; + __vector float vf1; + + vs8 = (__vector unsigned short) (__vector unsigned long long) { __A, __A }; + vi4 = (__vector unsigned int) vec_mergel +#ifdef __LITTLE_ENDIAN__ + (vs8, zero); +#else + (zero, vs8); +#endif + vf1 = (__vector float) vec_ctf (vi4, 0); + + return (__m128) vf1; +} + +/* Convert the low four signed 8-bit values in A to SPFP form. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi8_ps (__m64 __A) +{ + __vector signed char vc16; + __vector signed short vs8; + __vector signed int vi4; + __vector float vf1; + + vc16 = (__vector signed char) (__vector unsigned long long) { __A, __A }; + vs8 = vec_vupkhsb (vc16); + vi4 = vec_vupkhsh (vs8); + vf1 = (__vector float) vec_ctf (vi4, 0); + + return (__m128) vf1; +} + +/* Convert the low four unsigned 8-bit values in A to SPFP form. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + +_mm_cvtpu8_ps (__m64 __A) +{ + const __vector unsigned char zero = + { 0, 0, 0, 0, 0, 0, 0, 0 }; + __vector unsigned char vc16; + __vector unsigned short vs8; + __vector unsigned int vi4; + __vector float vf1; + + vc16 = (__vector unsigned char) (__vector unsigned long long) { __A, __A }; +#ifdef __LITTLE_ENDIAN__ + vs8 = (__vector unsigned short) vec_mergel (vc16, zero); + vi4 = (__vector unsigned int) vec_mergeh (vs8, + (__vector unsigned short) zero); +#else + vs8 = (__vector unsigned short) vec_mergel (zero, vc16); + vi4 = (__vector unsigned int) vec_mergeh ((__vector unsigned short) zero, + vs8); +#endif + vf1 = (__vector float) vec_ctf (vi4, 0); + + return (__m128) vf1; +} + +/* Convert the four signed 32-bit values in A and B to SPFP form. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtpi32x2_ps (__m64 __A, __m64 __B) +{ + __vector signed int vi4; + __vector float vf4; + + vi4 = (__vector signed int) (__vector unsigned long long) { __A, __B }; + vf4 = (__vector float) vec_ctf (vi4, 0); + return (__m128) vf4; +} + +/* Convert the four SPFP values in A to four signed 16-bit integers. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi16 (__m128 __A) +{ + __v4sf rounded; + __vector signed int temp; + __vector unsigned long long result; + + rounded = vec_rint(__A); + temp = vec_cts (rounded, 0); + result = (__vector unsigned long long) vec_pack (temp, temp); + + return (__m64) ((__vector long long) result)[0]; +} + +/* Convert the four SPFP values in A to four signed 8-bit integers. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_cvtps_pi8 (__m128 __A) +{ + __v4sf rounded; + __vector signed int tmp_i; + static const __vector signed int zero = {0, 0, 0, 0}; + __vector signed short tmp_s; + __vector signed char res_v; + + rounded = vec_rint(__A); + tmp_i = vec_cts (rounded, 0); + tmp_s = vec_pack (tmp_i, zero); + res_v = vec_pack (tmp_s, tmp_s); + return (__m64) ((__vector long long) res_v)[0]; +} + +/* Selects four specific SPFP values from A and B based on MASK. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + +_mm_shuffle_ps (__m128 __A, __m128 __B, int const __mask) +{ + unsigned long element_selector_10 = __mask & 0x03; + unsigned long element_selector_32 = (__mask >> 2) & 0x03; + unsigned long element_selector_54 = (__mask >> 4) & 0x03; + unsigned long element_selector_76 = (__mask >> 6) & 0x03; + static const unsigned int permute_selectors[4] = + { +#ifdef __LITTLE_ENDIAN__ + 0x03020100, 0x07060504, 0x0B0A0908, 0x0F0E0D0C +#else + 0x00010203, 0x04050607, 0x08090A0B, 0x0C0D0E0F +#endif + }; + __vector unsigned int t; + + t[0] = permute_selectors[element_selector_10]; + t[1] = permute_selectors[element_selector_32]; + t[2] = permute_selectors[element_selector_54] + 0x10101010; + t[3] = permute_selectors[element_selector_76] + 0x10101010; + return vec_perm ((__v4sf) __A, (__v4sf)__B, (__vector unsigned char)t); +} + +/* Selects and interleaves the upper two SPFP values from A and B. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpackhi_ps (__m128 __A, __m128 __B) +{ + return (__m128) vec_vmrglw ((__v4sf) __A, (__v4sf)__B); +} + +/* Selects and interleaves the lower two SPFP values from A and B. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_unpacklo_ps (__m128 __A, __m128 __B) +{ + return (__m128) vec_vmrghw ((__v4sf) __A, (__v4sf)__B); +} + +/* Sets the upper two SPFP values with 64-bits of data loaded from P; + the lower two values are passed through from A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadh_pi (__m128 __A, __m64 const *__P) +{ + __vector unsigned long long __a = (__vector unsigned long long)__A; + __vector unsigned long long __p = vec_splats(*__P); + __a [1] = __p [1]; + + return (__m128)__a; +} + +/* Stores the upper two SPFP values of A into P. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storeh_pi (__m64 *__P, __m128 __A) +{ + __vector unsigned long long __a = (__vector unsigned long long) __A; + + *__P = __a[1]; +} + +/* Moves the upper two values of B into the lower two values of A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movehl_ps (__m128 __A, __m128 __B) +{ + return (__m128) vec_mergel ((__vector unsigned long long)__B, + (__vector unsigned long long)__A); +} + +/* Moves the lower two values of B into the upper two values of A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movelh_ps (__m128 __A, __m128 __B) +{ + return (__m128) vec_mergeh ((__vector unsigned long long)__A, + (__vector unsigned long long)__B); +} + +/* Sets the lower two SPFP values with 64-bits of data loaded from P; + the upper two values are passed through from A. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_loadl_pi (__m128 __A, __m64 const *__P) +{ + __vector unsigned long long __a = (__vector unsigned long long)__A; + __vector unsigned long long __p = vec_splats(*__P); + __a [0] = __p [0]; + + return (__m128)__a; +} + +/* Stores the lower two SPFP values of A into P. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_storel_pi (__m64 *__P, __m128 __A) +{ + __vector unsigned long long __a = (__vector unsigned long long) __A; + + *__P = __a[0]; +} + +#ifdef _ARCH_PWR8 +/* Intrinsic functions that require PowerISA 2.07 minimum. */ + +/* Creates a 4-bit mask from the most significant bits of the SPFP values. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_ps (__m128 __A) +{ + __vector unsigned long long result; + static const __vector unsigned int perm_mask = + { +#ifdef __LITTLE_ENDIAN__ + 0x00204060, 0x80808080, 0x80808080, 0x80808080 +#else + 0x80808080, 0x80808080, 0x80808080, 0x00204060 +#endif + }; + + result = ((__vector unsigned long long) + vec_vbpermq ((__vector unsigned char) __A, + (__vector unsigned char) perm_mask)); + +#ifdef __LITTLE_ENDIAN__ + return result[1]; +#else + return result[0]; +#endif +} +#endif /* _ARCH_PWR8 */ + +/* Create a vector with all four elements equal to *P. */ +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load1_ps (float const *__P) +{ + return _mm_set1_ps (*__P); +} + +extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_load_ps1 (float const *__P) +{ + return _mm_load1_ps (__P); +} + +/* Extracts one of the four words of A. The selector N must be immediate. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_extract_pi16 (__m64 const __A, int const __N) +{ + unsigned int shiftr = __N & 3; +#ifdef __BIG_ENDIAN__ + shiftr = 3 - shiftr; +#endif + + return ((__A >> (shiftr * 16)) & 0xffff); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pextrw (__m64 const __A, int const __N) +{ + return _mm_extract_pi16 (__A, __N); +} + +/* Inserts word D into one of four words of A. The selector N must be + immediate. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_insert_pi16 (__m64 const __A, int const __D, int const __N) +{ + const int shiftl = (__N & 3) * 16; + const __m64 shiftD = (const __m64) __D << shiftl; + const __m64 mask = 0xffffUL << shiftl; + __m64 result = (__A & (~mask)) | (shiftD & mask); + + return (result); +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pinsrw (__m64 const __A, int const __D, int const __N) +{ + return _mm_insert_pi16 (__A, __D, __N); +} + +/* Compute the element-wise maximum of signed 16-bit values. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + +_mm_max_pi16 (__m64 __A, __m64 __B) +{ +#if _ARCH_PWR8 + __vector signed short a, b, r; + __vector __bool short c; + + a = (__vector signed short)vec_splats (__A); + b = (__vector signed short)vec_splats (__B); + c = (__vector __bool short)vec_cmpgt (a, b); + r = vec_sel (b, a, c); + return (__m64) ((__vector long long) r)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __A; + m2.as_m64 = __B; + + res.as_short[0] = + (m1.as_short[0] > m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0]; + res.as_short[1] = + (m1.as_short[1] > m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1]; + res.as_short[2] = + (m1.as_short[2] > m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2]; + res.as_short[3] = + (m1.as_short[3] > m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3]; + + return (__m64) res.as_m64; +#endif +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmaxsw (__m64 __A, __m64 __B) +{ + return _mm_max_pi16 (__A, __B); +} + +/* Compute the element-wise maximum of unsigned 8-bit values. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_max_pu8 (__m64 __A, __m64 __B) +{ +#if _ARCH_PWR8 + __vector unsigned char a, b, r; + __vector __bool char c; + + a = (__vector unsigned char)vec_splats (__A); + b = (__vector unsigned char)vec_splats (__B); + c = (__vector __bool char)vec_cmpgt (a, b); + r = vec_sel (b, a, c); + return (__m64) ((__vector long long) r)[0]; +#else + __m64_union m1, m2, res; + long i; + + m1.as_m64 = __A; + m2.as_m64 = __B; + + + for (i = 0; i < 8; i++) + res.as_char[i] = + ((unsigned char) m1.as_char[i] > (unsigned char) m2.as_char[i]) ? + m1.as_char[i] : m2.as_char[i]; + + return (__m64) res.as_m64; +#endif +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmaxub (__m64 __A, __m64 __B) +{ + return _mm_max_pu8 (__A, __B); +} + +/* Compute the element-wise minimum of signed 16-bit values. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pi16 (__m64 __A, __m64 __B) +{ +#if _ARCH_PWR8 + __vector signed short a, b, r; + __vector __bool short c; + + a = (__vector signed short)vec_splats (__A); + b = (__vector signed short)vec_splats (__B); + c = (__vector __bool short)vec_cmplt (a, b); + r = vec_sel (b, a, c); + return (__m64) ((__vector long long) r)[0]; +#else + __m64_union m1, m2, res; + + m1.as_m64 = __A; + m2.as_m64 = __B; + + res.as_short[0] = + (m1.as_short[0] < m2.as_short[0]) ? m1.as_short[0] : m2.as_short[0]; + res.as_short[1] = + (m1.as_short[1] < m2.as_short[1]) ? m1.as_short[1] : m2.as_short[1]; + res.as_short[2] = + (m1.as_short[2] < m2.as_short[2]) ? m1.as_short[2] : m2.as_short[2]; + res.as_short[3] = + (m1.as_short[3] < m2.as_short[3]) ? m1.as_short[3] : m2.as_short[3]; + + return (__m64) res.as_m64; +#endif +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pminsw (__m64 __A, __m64 __B) +{ + return _mm_min_pi16 (__A, __B); +} + +/* Compute the element-wise minimum of unsigned 8-bit values. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_min_pu8 (__m64 __A, __m64 __B) +{ +#if _ARCH_PWR8 + __vector unsigned char a, b, r; + __vector __bool char c; + + a = (__vector unsigned char)vec_splats (__A); + b = (__vector unsigned char)vec_splats (__B); + c = (__vector __bool char)vec_cmplt (a, b); + r = vec_sel (b, a, c); + return (__m64) ((__vector long long) r)[0]; +#else + __m64_union m1, m2, res; + long i; + + m1.as_m64 = __A; + m2.as_m64 = __B; + + + for (i = 0; i < 8; i++) + res.as_char[i] = + ((unsigned char) m1.as_char[i] < (unsigned char) m2.as_char[i]) ? + m1.as_char[i] : m2.as_char[i]; + + return (__m64) res.as_m64; +#endif +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pminub (__m64 __A, __m64 __B) +{ + return _mm_min_pu8 (__A, __B); +} + +/* Create an 8-bit mask of the signs of 8-bit values. */ +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_movemask_pi8 (__m64 __A) +{ + unsigned long long p = +#ifdef __LITTLE_ENDIAN__ + 0x0008101820283038UL; // permute control for sign bits +#else + 0x3830282018100800UL; // permute control for sign bits +#endif + return __builtin_bpermd (p, __A); +} + +extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmovmskb (__m64 __A) +{ + return _mm_movemask_pi8 (__A); +} + +/* Multiply four unsigned 16-bit values in A by four unsigned 16-bit values + in B and produce the high 16 bits of the 32-bit results. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_mulhi_pu16 (__m64 __A, __m64 __B) +{ + __vector unsigned short a, b; + __vector unsigned short c; + __vector unsigned int w0, w1; + __vector unsigned char xform1 = { +#ifdef __LITTLE_ENDIAN__ + 0x02, 0x03, 0x12, 0x13, 0x06, 0x07, 0x16, 0x17, + 0x0A, 0x0B, 0x1A, 0x1B, 0x0E, 0x0F, 0x1E, 0x1F +#else + 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, + 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15 +#endif + }; + + a = (__vector unsigned short)vec_splats (__A); + b = (__vector unsigned short)vec_splats (__B); + + w0 = vec_vmuleuh (a, b); + w1 = vec_vmulouh (a, b); + c = (__vector unsigned short)vec_perm (w0, w1, xform1); + + return (__m64) ((__vector long long) c)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pmulhuw (__m64 __A, __m64 __B) +{ + return _mm_mulhi_pu16 (__A, __B); +} + +/* Return a combination of the four 16-bit values in A. The selector + must be an immediate. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_shuffle_pi16 (__m64 __A, int const __N) +{ + unsigned long element_selector_10 = __N & 0x03; + unsigned long element_selector_32 = (__N >> 2) & 0x03; + unsigned long element_selector_54 = (__N >> 4) & 0x03; + unsigned long element_selector_76 = (__N >> 6) & 0x03; + static const unsigned short permute_selectors[4] = + { +#ifdef __LITTLE_ENDIAN__ + 0x0908, 0x0B0A, 0x0D0C, 0x0F0E +#else + 0x0607, 0x0405, 0x0203, 0x0001 +#endif + }; + __m64_union t; + __vector unsigned long long a, p, r; + +#ifdef __LITTLE_ENDIAN__ + t.as_short[0] = permute_selectors[element_selector_10]; + t.as_short[1] = permute_selectors[element_selector_32]; + t.as_short[2] = permute_selectors[element_selector_54]; + t.as_short[3] = permute_selectors[element_selector_76]; +#else + t.as_short[3] = permute_selectors[element_selector_10]; + t.as_short[2] = permute_selectors[element_selector_32]; + t.as_short[1] = permute_selectors[element_selector_54]; + t.as_short[0] = permute_selectors[element_selector_76]; +#endif + p = vec_splats (t.as_m64); + a = vec_splats (__A); + r = vec_perm (a, a, (__vector unsigned char)p); + return (__m64) ((__vector long long) r)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pshufw (__m64 __A, int const __N) +{ + return _mm_shuffle_pi16 (__A, __N); +} + +/* Conditionally store byte elements of A into P. The high bit of each + byte in the selector N determines whether the corresponding byte from + A is stored. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_maskmove_si64 (__m64 __A, __m64 __N, char *__P) +{ + __m64 hibit = 0x8080808080808080UL; + __m64 mask, tmp; + __m64 *p = (__m64*)__P; + + tmp = *p; + mask = _mm_cmpeq_pi8 ((__N & hibit), hibit); + tmp = (tmp & (~mask)) | (__A & mask); + *p = tmp; +} + +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_maskmovq (__m64 __A, __m64 __N, char *__P) +{ + _mm_maskmove_si64 (__A, __N, __P); +} + +/* Compute the rounded averages of the unsigned 8-bit values in A and B. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_pu8 (__m64 __A, __m64 __B) +{ + __vector unsigned char a, b, c; + + a = (__vector unsigned char)vec_splats (__A); + b = (__vector unsigned char)vec_splats (__B); + c = vec_avg (a, b); + return (__m64) ((__vector long long) c)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pavgb (__m64 __A, __m64 __B) +{ + return _mm_avg_pu8 (__A, __B); +} + +/* Compute the rounded averages of the unsigned 16-bit values in A and B. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_avg_pu16 (__m64 __A, __m64 __B) +{ + __vector unsigned short a, b, c; + + a = (__vector unsigned short)vec_splats (__A); + b = (__vector unsigned short)vec_splats (__B); + c = vec_avg (a, b); + return (__m64) ((__vector long long) c)[0]; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_pavgw (__m64 __A, __m64 __B) +{ + return _mm_avg_pu16 (__A, __B); +} + +/* Compute the sum of the absolute differences of the unsigned 8-bit + values in A and B. Return the value in the lower 16-bit word; the + upper words are cleared. */ +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sad_pu8 (__m64 __A, __m64 __B) +{ + __vector unsigned char a, b; + __vector unsigned char vmin, vmax, vabsdiff; + __vector signed int vsum; + const __vector unsigned int zero = + { 0, 0, 0, 0 }; + __m64_union result = {0}; + + a = (__vector unsigned char) (__vector unsigned long long) { 0UL, __A }; + b = (__vector unsigned char) (__vector unsigned long long) { 0UL, __B }; + vmin = vec_min (a, b); + vmax = vec_max (a, b); + vabsdiff = vec_sub (vmax, vmin); + /* Sum four groups of bytes into integers. */ + vsum = (__vector signed int) vec_sum4s (vabsdiff, zero); + /* Sum across four integers with integer result. */ + vsum = vec_sums (vsum, (__vector signed int) zero); + /* The sum is in the right most 32-bits of the vector result. + Transfer to a GPR and truncate to 16 bits. */ + result.as_short[0] = vsum[3]; + return result.as_m64; +} + +extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_m_psadbw (__m64 __A, __m64 __B) +{ + return _mm_sad_pu8 (__A, __B); +} + +/* Stores the data in A to the address P without polluting the caches. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_pi (__m64 *__P, __m64 __A) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + " dcbtstt 0,%0" + : + : "b" (__P) + : "memory" + ); + *__P = __A; +} + +/* Likewise. The address must be 16-byte aligned. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_stream_ps (float *__P, __m128 __A) +{ + /* Use the data cache block touch for store transient. */ + __asm__ ( + " dcbtstt 0,%0" + : + : "b" (__P) + : "memory" + ); + _mm_store_ps (__P, __A); +} + +/* Guarantees that every preceding store is globally visible before + any subsequent store. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_sfence (void) +{ + /* Generate a light weight sync. */ + __atomic_thread_fence (__ATOMIC_RELEASE); +} + +/* The execution of the next instruction is delayed by an implementation + specific amount of time. The instruction does not modify the + architectural state. This is after the pop_options pragma because + it does not require SSE support in the processor--the encoding is a + nop on processors that do not support it. */ +extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) +_mm_pause (void) +{ + /* There is no exact match with this construct, but the following is + close to the desired effect. */ +#if _ARCH_PWR8 + /* On power8 and later processors we can depend on Program Priority + (PRI) and associated "very low" PPI setting. Since we don't know + what PPI this thread is running at we: 1) save the current PRI + from the PPR SPR into a local GRP, 2) set the PRI to "very low* + via the special or 31,31,31 encoding. 3) issue an "isync" to + insure the PRI change takes effect before we execute any more + instructions. + Now we can execute a lwsync (release barrier) while we execute + this thread at "very low" PRI. Finally we restore the original + PRI and continue execution. */ + unsigned long __PPR; + + __asm__ volatile ( + " mfppr %0;" + " or 31,31,31;" + " isync;" + " lwsync;" + " isync;" + " mtppr %0;" + : "=r" (__PPR) + : + : "memory" + ); +#else + /* For older processor where we may not even have Program Priority + controls we can only depend on Heavy Weight Sync. */ + __atomic_thread_fence (__ATOMIC_SEQ_CST); +#endif +} + +/* Transpose the 4x4 matrix composed of row[0-3]. */ +#define _MM_TRANSPOSE4_PS(row0, row1, row2, row3) \ +do { \ + __v4sf __r0 = (row0), __r1 = (row1), __r2 = (row2), __r3 = (row3); \ + __v4sf __t0 = vec_vmrghw (__r0, __r1); \ + __v4sf __t1 = vec_vmrghw (__r2, __r3); \ + __v4sf __t2 = vec_vmrglw (__r0, __r1); \ + __v4sf __t3 = vec_vmrglw (__r2, __r3); \ + (row0) = (__v4sf)vec_mergeh ((__vector long long)__t0, \ + (__vector long long)__t1); \ + (row1) = (__v4sf)vec_mergel ((__vector long long)__t0, \ + (__vector long long)__t1); \ + (row2) = (__v4sf)vec_mergeh ((__vector long long)__t2, \ + (__vector long long)__t3); \ + (row3) = (__v4sf)vec_mergel ((__vector long long)__t2, \ + (__vector long long)__t3); \ +} while (0) + +/* For backward source compatibility. */ +//# include + +#endif /* _XMMINTRIN_H_INCLUDED */ diff --git a/lib/include/prfchwintrin.h b/lib/include/prfchwintrin.h index 70851396f4..6e8a4ef2ec 100644 --- a/lib/include/prfchwintrin.h +++ b/lib/include/prfchwintrin.h @@ -1,22 +1,8 @@ /*===---- prfchwintrin.h - PREFETCHW intrinsic -----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/ptwriteintrin.h b/lib/include/ptwriteintrin.h index 1bb1df0a2e..0a04f7c1df 100644 --- a/lib/include/ptwriteintrin.h +++ b/lib/include/ptwriteintrin.h @@ -1,22 +1,8 @@ /*===------------ ptwriteintrin.h - PTWRITE intrinsic --------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/rdseedintrin.h b/lib/include/rdseedintrin.h index 419466932c..ccb3d2dd22 100644 --- a/lib/include/rdseedintrin.h +++ b/lib/include/rdseedintrin.h @@ -1,22 +1,8 @@ /*===---- rdseedintrin.h - RDSEED intrinsics -------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/rtmintrin.h b/lib/include/rtmintrin.h index e6a58d743b..36ff583517 100644 --- a/lib/include/rtmintrin.h +++ b/lib/include/rtmintrin.h @@ -1,22 +1,8 @@ /*===---- rtmintrin.h - RTM intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/s390intrin.h b/lib/include/s390intrin.h index d51274c07d..73a915c233 100644 --- a/lib/include/s390intrin.h +++ b/lib/include/s390intrin.h @@ -1,22 +1,8 @@ /*===---- s390intrin.h - SystemZ intrinsics --------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/sgxintrin.h b/lib/include/sgxintrin.h index 20aee76610..303a21f6b2 100644 --- a/lib/include/sgxintrin.h +++ b/lib/include/sgxintrin.h @@ -1,22 +1,8 @@ /*===---- sgxintrin.h - X86 SGX intrinsics configuration -------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -28,6 +14,8 @@ #ifndef __SGXINTRIN_H #define __SGXINTRIN_H +#if __has_extension(gnu_asm) + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("sgx"))) @@ -67,4 +55,6 @@ _enclv_u32(unsigned int __leaf, __SIZE_TYPE__ __d[]) #undef __DEFAULT_FN_ATTRS +#endif /* __has_extension(gnu_asm) */ + #endif diff --git a/lib/include/shaintrin.h b/lib/include/shaintrin.h index 3df4718ced..08b1fb1dc1 100644 --- a/lib/include/shaintrin.h +++ b/lib/include/shaintrin.h @@ -1,22 +1,8 @@ /*===---- shaintrin.h - SHA intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/smmintrin.h b/lib/include/smmintrin.h index 4806b3e4e1..025830a742 100644 --- a/lib/include/smmintrin.h +++ b/lib/include/smmintrin.h @@ -1,22 +1,8 @@ /*===---- smmintrin.h - SSE4 intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stdalign.h b/lib/include/stdalign.h index 3738d1284f..6ad25db453 100644 --- a/lib/include/stdalign.h +++ b/lib/include/stdalign.h @@ -1,22 +1,8 @@ /*===---- stdalign.h - Standard header for alignment ------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stdarg.h b/lib/include/stdarg.h index 101426fff1..0bc39408c1 100644 --- a/lib/include/stdarg.h +++ b/lib/include/stdarg.h @@ -1,24 +1,8 @@ /*===---- stdarg.h - Variable argument handling ----------------------------=== * - * Copyright (c) 2008 Eli Friedman - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stdatomic.h b/lib/include/stdatomic.h index b4845a74e4..665551ea69 100644 --- a/lib/include/stdatomic.h +++ b/lib/include/stdatomic.h @@ -1,22 +1,8 @@ /*===---- stdatomic.h - Standard header for atomic types and operations -----=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stdbool.h b/lib/include/stdbool.h index 5cb66b55d0..2525363dd0 100644 --- a/lib/include/stdbool.h +++ b/lib/include/stdbool.h @@ -1,24 +1,8 @@ /*===---- stdbool.h - Standard header for booleans -------------------------=== * - * Copyright (c) 2008 Eli Friedman - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stddef.h b/lib/include/stddef.h index 7354996711..15acd4427c 100644 --- a/lib/include/stddef.h +++ b/lib/include/stddef.h @@ -1,24 +1,8 @@ /*===---- stddef.h - Basic type definitions --------------------------------=== * - * Copyright (c) 2008 Eli Friedman - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/stdint.h b/lib/include/stdint.h index 0afcca3a9d..192f653e95 100644 --- a/lib/include/stdint.h +++ b/lib/include/stdint.h @@ -1,29 +1,18 @@ /*===---- stdint.h - Standard header for sized integer types --------------===*\ * - * Copyright (c) 2009 Chris Lattner - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ #ifndef __CLANG_STDINT_H +// AIX system headers need stdint.h to be re-enterable while _STD_TYPES_T +// is defined until an inclusion of it without _STD_TYPES_T occurs, in which +// case the header guard macro is defined. +#if !defined(_AIX) || !defined(_STD_TYPES_T) || !defined(__STDC_HOSTED__) #define __CLANG_STDINT_H +#endif /* If we're hosted, fall back to the system's stdint.h, which might have * additional definitions. diff --git a/lib/include/stdnoreturn.h b/lib/include/stdnoreturn.h index a7a301d7e0..e83cd81537 100644 --- a/lib/include/stdnoreturn.h +++ b/lib/include/stdnoreturn.h @@ -1,22 +1,8 @@ /*===---- stdnoreturn.h - Standard header for noreturn macro ---------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/tbmintrin.h b/lib/include/tbmintrin.h index 1d0d746a82..f4e848a1c0 100644 --- a/lib/include/tbmintrin.h +++ b/lib/include/tbmintrin.h @@ -1,22 +1,8 @@ /*===---- tbmintrin.h - TBM intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/tgmath.h b/lib/include/tgmath.h index 34e26dcc05..7acf18b9dd 100644 --- a/lib/include/tgmath.h +++ b/lib/include/tgmath.h @@ -1,24 +1,8 @@ /*===---- tgmath.h - Standard header for type generic math ----------------===*\ * - * Copyright (c) 2009 Howard Hinnant - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * \*===----------------------------------------------------------------------===*/ diff --git a/lib/include/tmmintrin.h b/lib/include/tmmintrin.h index 734cd391be..35533e115c 100644 --- a/lib/include/tmmintrin.h +++ b/lib/include/tmmintrin.h @@ -1,22 +1,8 @@ /*===---- tmmintrin.h - SSSE3 intrinsics -----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/unwind.h b/lib/include/unwind.h index 0e8317e5b9..029524b7bc 100644 --- a/lib/include/unwind.h +++ b/lib/include/unwind.h @@ -1,22 +1,8 @@ /*===---- unwind.h - Stack unwinding ----------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -66,8 +52,8 @@ extern "C" { #pragma GCC visibility push(default) #endif -typedef uintptr_t _Unwind_Word; -typedef intptr_t _Unwind_Sword; +typedef uintptr_t _Unwind_Word __attribute__((__mode__(__unwind_word__))); +typedef intptr_t _Unwind_Sword __attribute__((__mode__(__unwind_word__))); typedef uintptr_t _Unwind_Ptr; typedef uintptr_t _Unwind_Internal_Ptr; typedef uint64_t _Unwind_Exception_Class; diff --git a/lib/include/vadefs.h b/lib/include/vadefs.h index 7fe9a74e3f..b617568446 100644 --- a/lib/include/vadefs.h +++ b/lib/include/vadefs.h @@ -1,22 +1,8 @@ /* ===-------- vadefs.h ---------------------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/vaesintrin.h b/lib/include/vaesintrin.h index e4174bb82f..c4d5c3e751 100644 --- a/lib/include/vaesintrin.h +++ b/lib/include/vaesintrin.h @@ -1,23 +1,9 @@ /*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/varargs.h b/lib/include/varargs.h index b5477d0a6a..d241b7de3c 100644 --- a/lib/include/varargs.h +++ b/lib/include/varargs.h @@ -1,22 +1,8 @@ /*===---- varargs.h - Variable argument handling -------------------------------------=== * -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to deal -* in the Software without restriction, including without limitation the rights -* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -* copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in -* all copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -* THE SOFTWARE. +* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +* See https://llvm.org/LICENSE.txt for license information. +* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/vecintrin.h b/lib/include/vecintrin.h index e627389838..c71b76a3ee 100644 --- a/lib/include/vecintrin.h +++ b/lib/include/vecintrin.h @@ -1,22 +1,8 @@ /*===---- vecintrin.h - Vector intrinsics ----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -531,6 +517,141 @@ vec_bperm_u128(vector unsigned char __a, vector unsigned char __b) { } #endif +/*-- vec_revb ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed short +vec_revb(vector signed short __vec) { + return (vector signed short) + __builtin_s390_vlbrh((vector unsigned short)__vec); +} + +static inline __ATTRS_o_ai vector unsigned short +vec_revb(vector unsigned short __vec) { + return __builtin_s390_vlbrh(__vec); +} + +static inline __ATTRS_o_ai vector signed int +vec_revb(vector signed int __vec) { + return (vector signed int) + __builtin_s390_vlbrf((vector unsigned int)__vec); +} + +static inline __ATTRS_o_ai vector unsigned int +vec_revb(vector unsigned int __vec) { + return __builtin_s390_vlbrf(__vec); +} + +static inline __ATTRS_o_ai vector signed long long +vec_revb(vector signed long long __vec) { + return (vector signed long long) + __builtin_s390_vlbrg((vector unsigned long long)__vec); +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_revb(vector unsigned long long __vec) { + return __builtin_s390_vlbrg(__vec); +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_revb(vector float __vec) { + return (vector float) + __builtin_s390_vlbrf((vector unsigned int)__vec); +} +#endif + +static inline __ATTRS_o_ai vector double +vec_revb(vector double __vec) { + return (vector double) + __builtin_s390_vlbrg((vector unsigned long long)__vec); +} + +/*-- vec_reve ---------------------------------------------------------------*/ + +static inline __ATTRS_o_ai vector signed char +vec_reve(vector signed char __vec) { + return (vector signed char) { __vec[15], __vec[14], __vec[13], __vec[12], + __vec[11], __vec[10], __vec[9], __vec[8], + __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector unsigned char +vec_reve(vector unsigned char __vec) { + return (vector unsigned char) { __vec[15], __vec[14], __vec[13], __vec[12], + __vec[11], __vec[10], __vec[9], __vec[8], + __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector bool char +vec_reve(vector bool char __vec) { + return (vector bool char) { __vec[15], __vec[14], __vec[13], __vec[12], + __vec[11], __vec[10], __vec[9], __vec[8], + __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector signed short +vec_reve(vector signed short __vec) { + return (vector signed short) { __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector unsigned short +vec_reve(vector unsigned short __vec) { + return (vector unsigned short) { __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector bool short +vec_reve(vector bool short __vec) { + return (vector bool short) { __vec[7], __vec[6], __vec[5], __vec[4], + __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector signed int +vec_reve(vector signed int __vec) { + return (vector signed int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector unsigned int +vec_reve(vector unsigned int __vec) { + return (vector unsigned int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector bool int +vec_reve(vector bool int __vec) { + return (vector bool int) { __vec[3], __vec[2], __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector signed long long +vec_reve(vector signed long long __vec) { + return (vector signed long long) { __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector unsigned long long +vec_reve(vector unsigned long long __vec) { + return (vector unsigned long long) { __vec[1], __vec[0] }; +} + +static inline __ATTRS_o_ai vector bool long long +vec_reve(vector bool long long __vec) { + return (vector bool long long) { __vec[1], __vec[0] }; +} + +#if __ARCH__ >= 12 +static inline __ATTRS_o_ai vector float +vec_reve(vector float __vec) { + return (vector float) { __vec[3], __vec[2], __vec[1], __vec[0] }; +} +#endif + +static inline __ATTRS_o_ai vector double +vec_reve(vector double __vec) { + return (vector double) { __vec[1], __vec[0] }; +} + /*-- vec_sel ----------------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed char @@ -6849,6 +6970,56 @@ vec_sldw(vector double __a, vector double __b, int __c) __builtin_s390_vsldb((vector unsigned char)(X), \ (vector unsigned char)(Y), (Z) * 4)) +/*-- vec_sldb ---------------------------------------------------------------*/ + +#if __ARCH__ >= 13 + +extern __ATTRS_o vector signed char +vec_sldb(vector signed char __a, vector signed char __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned char +vec_sldb(vector unsigned char __a, vector unsigned char __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed short +vec_sldb(vector signed short __a, vector signed short __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned short +vec_sldb(vector unsigned short __a, vector unsigned short __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed int +vec_sldb(vector signed int __a, vector signed int __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned int +vec_sldb(vector unsigned int __a, vector unsigned int __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed long long +vec_sldb(vector signed long long __a, vector signed long long __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned long long +vec_sldb(vector unsigned long long __a, vector unsigned long long __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector float +vec_sldb(vector float __a, vector float __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector double +vec_sldb(vector double __a, vector double __b, int __c) + __constant_range(__c, 0, 7); + +#define vec_sldb(X, Y, Z) ((__typeof__((vec_sldb)((X), (Y), (Z)))) \ + __builtin_s390_vsld((vector unsigned char)(X), \ + (vector unsigned char)(Y), (Z))) + +#endif + /*-- vec_sral ---------------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed char @@ -7579,6 +7750,56 @@ vec_srb(vector double __a, vector unsigned long long __b) { (vector unsigned char)__a, (vector unsigned char)__b); } +/*-- vec_srdb ---------------------------------------------------------------*/ + +#if __ARCH__ >= 13 + +extern __ATTRS_o vector signed char +vec_srdb(vector signed char __a, vector signed char __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned char +vec_srdb(vector unsigned char __a, vector unsigned char __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed short +vec_srdb(vector signed short __a, vector signed short __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned short +vec_srdb(vector unsigned short __a, vector unsigned short __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed int +vec_srdb(vector signed int __a, vector signed int __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned int +vec_srdb(vector unsigned int __a, vector unsigned int __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector signed long long +vec_srdb(vector signed long long __a, vector signed long long __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector unsigned long long +vec_srdb(vector unsigned long long __a, vector unsigned long long __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector float +vec_srdb(vector float __a, vector float __b, int __c) + __constant_range(__c, 0, 7); + +extern __ATTRS_o vector double +vec_srdb(vector double __a, vector double __b, int __c) + __constant_range(__c, 0, 7); + +#define vec_srdb(X, Y, Z) ((__typeof__((vec_srdb)((X), (Y), (Z)))) \ + __builtin_s390_vsrd((vector unsigned char)(X), \ + (vector unsigned char)(Y), (Z))) + +#endif + /*-- vec_abs ----------------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed char @@ -8725,6 +8946,22 @@ vec_double(vector unsigned long long __a) { return __builtin_convertvector(__a, vector double); } +/*-- vec_float --------------------------------------------------------------*/ + +#if __ARCH__ >= 13 + +static inline __ATTRS_o_ai vector float +vec_float(vector signed int __a) { + return __builtin_convertvector(__a, vector float); +} + +static inline __ATTRS_o_ai vector float +vec_float(vector unsigned int __a) { + return __builtin_convertvector(__a, vector float); +} + +#endif + /*-- vec_signed -------------------------------------------------------------*/ static inline __ATTRS_o_ai vector signed long long @@ -8732,6 +8969,13 @@ vec_signed(vector double __a) { return __builtin_convertvector(__a, vector signed long long); } +#if __ARCH__ >= 13 +static inline __ATTRS_o_ai vector signed int +vec_signed(vector float __a) { + return __builtin_convertvector(__a, vector signed int); +} +#endif + /*-- vec_unsigned -----------------------------------------------------------*/ static inline __ATTRS_o_ai vector unsigned long long @@ -8739,6 +8983,13 @@ vec_unsigned(vector double __a) { return __builtin_convertvector(__a, vector unsigned long long); } +#if __ARCH__ >= 13 +static inline __ATTRS_o_ai vector unsigned int +vec_unsigned(vector float __a) { + return __builtin_convertvector(__a, vector unsigned int); +} +#endif + /*-- vec_roundp -------------------------------------------------------------*/ #if __ARCH__ >= 12 @@ -10456,6 +10707,147 @@ vec_find_any_ne_or_0_idx_cc(vector unsigned int __a, vector unsigned int __b, return __builtin_s390_vfaezfs(__a, __b, 8, __cc); } +/*-- vec_search_string_cc ---------------------------------------------------*/ + +#if __ARCH__ >= 13 + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector signed char __a, vector signed char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsb((vector unsigned char)__a, + (vector unsigned char)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector bool char __a, vector bool char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsb((vector unsigned char)__a, + (vector unsigned char)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector unsigned char __a, vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsb(__a, __b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector signed short __a, vector signed short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsh((vector unsigned short)__a, + (vector unsigned short)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector bool short __a, vector bool short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsh((vector unsigned short)__a, + (vector unsigned short)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector unsigned short __a, vector unsigned short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsh(__a, __b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector signed int __a, vector signed int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsf((vector unsigned int)__a, + (vector unsigned int)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector bool int __a, vector bool int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsf((vector unsigned int)__a, + (vector unsigned int)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_cc(vector unsigned int __a, vector unsigned int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrsf(__a, __b, __c, __cc); +} + +#endif + +/*-- vec_search_string_until_zero_cc ----------------------------------------*/ + +#if __ARCH__ >= 13 + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector signed char __a, + vector signed char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszb((vector unsigned char)__a, + (vector unsigned char)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector bool char __a, + vector bool char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszb((vector unsigned char)__a, + (vector unsigned char)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector unsigned char __a, + vector unsigned char __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszb(__a, __b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector signed short __a, + vector signed short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszh((vector unsigned short)__a, + (vector unsigned short)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector bool short __a, + vector bool short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszh((vector unsigned short)__a, + (vector unsigned short)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector unsigned short __a, + vector unsigned short __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszh(__a, __b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector signed int __a, + vector signed int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszf((vector unsigned int)__a, + (vector unsigned int)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector bool int __a, + vector bool int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszf((vector unsigned int)__a, + (vector unsigned int)__b, __c, __cc); +} + +static inline __ATTRS_o_ai vector unsigned char +vec_search_string_until_zero_cc(vector unsigned int __a, + vector unsigned int __b, + vector unsigned char __c, int *__cc) { + return __builtin_s390_vstrszf(__a, __b, __c, __cc); +} + +#endif + #undef __constant_pow2_range #undef __constant_range #undef __constant diff --git a/lib/include/vpclmulqdqintrin.h b/lib/include/vpclmulqdqintrin.h index 86174a457e..470d832549 100644 --- a/lib/include/vpclmulqdqintrin.h +++ b/lib/include/vpclmulqdqintrin.h @@ -1,23 +1,9 @@ /*===------------ vpclmulqdqintrin.h - VPCLMULQDQ intrinsics ---------------=== * * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/waitpkgintrin.h b/lib/include/waitpkgintrin.h index e29d6cfa5a..7ecada4cf7 100644 --- a/lib/include/waitpkgintrin.h +++ b/lib/include/waitpkgintrin.h @@ -1,22 +1,8 @@ /*===----------------------- waitpkgintrin.h - WAITPKG --------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/wbnoinvdintrin.h b/lib/include/wbnoinvdintrin.h index cad83368db..cac0347efc 100644 --- a/lib/include/wbnoinvdintrin.h +++ b/lib/include/wbnoinvdintrin.h @@ -1,22 +1,8 @@ /*===-------------- wbnoinvdintrin.h - wbnoinvd intrinsic-------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/wmmintrin.h b/lib/include/wmmintrin.h index 569a8d838d..f932ca8108 100644 --- a/lib/include/wmmintrin.h +++ b/lib/include/wmmintrin.h @@ -1,22 +1,8 @@ /*===---- wmmintrin.h - AES intrinsics ------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/x86intrin.h b/lib/include/x86intrin.h index 728c58c3eb..a8b36622d4 100644 --- a/lib/include/x86intrin.h +++ b/lib/include/x86intrin.h @@ -1,22 +1,8 @@ /*===---- x86intrin.h - X86 intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/xmmintrin.h b/lib/include/xmmintrin.h index 17af17267c..75ff37655b 100644 --- a/lib/include/xmmintrin.h +++ b/lib/include/xmmintrin.h @@ -1,22 +1,8 @@ /*===---- xmmintrin.h - SSE intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -28,7 +14,9 @@ typedef int __v4si __attribute__((__vector_size__(16))); typedef float __v4sf __attribute__((__vector_size__(16))); -typedef float __m128 __attribute__((__vector_size__(16))); +typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16))); + +typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1))); /* Unsigned types */ typedef unsigned int __v4su __attribute__((__vector_size__(16))); @@ -1752,7 +1740,7 @@ static __inline__ __m128 __DEFAULT_FN_ATTRS _mm_loadu_ps(const float *__p) { struct __loadu_ps { - __m128 __v; + __m128_u __v; } __attribute__((__packed__, __may_alias__)); return ((struct __loadu_ps*)__p)->__v; } @@ -1931,7 +1919,11 @@ _mm_setzero_ps(void) static __inline__ void __DEFAULT_FN_ATTRS _mm_storeh_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storehps((__v2si *)__p, (__v4sf)__a); + typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_storeh_pi_struct { + __mm_storeh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 2, 3); } /// Stores the lower 64 bits of a 128-bit vector of [4 x float] to a @@ -1948,7 +1940,11 @@ _mm_storeh_pi(__m64 *__p, __m128 __a) static __inline__ void __DEFAULT_FN_ATTRS _mm_storel_pi(__m64 *__p, __m128 __a) { - __builtin_ia32_storelps((__v2si *)__p, (__v4sf)__a); + typedef float __mm_storeh_pi_v2f32 __attribute__((__vector_size__(8))); + struct __mm_storeh_pi_struct { + __mm_storeh_pi_v2f32 __u; + } __attribute__((__packed__, __may_alias__)); + ((struct __mm_storeh_pi_struct*)__p)->__u = __builtin_shufflevector(__a, __a, 0, 1); } /// Stores the lower 32 bits of a 128-bit vector of [4 x float] to a @@ -1987,7 +1983,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _mm_storeu_ps(float *__p, __m128 __a) { struct __storeu_ps { - __m128 __v; + __m128_u __v; } __attribute__((__packed__, __may_alias__)); ((struct __storeu_ps*)__p)->__v = __a; } diff --git a/lib/include/xopintrin.h b/lib/include/xopintrin.h index 9d540a2abd..5cedde41b6 100644 --- a/lib/include/xopintrin.h +++ b/lib/include/xopintrin.h @@ -1,22 +1,8 @@ /*===---- xopintrin.h - XOP intrinsics -------------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/xsavecintrin.h b/lib/include/xsavecintrin.h index 25577a95fc..5524947fa9 100644 --- a/lib/include/xsavecintrin.h +++ b/lib/include/xsavecintrin.h @@ -1,22 +1,8 @@ /*===---- xsavecintrin.h - XSAVEC intrinsic --------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/xsaveintrin.h b/lib/include/xsaveintrin.h index 16f3a78d3f..9429db6dde 100644 --- a/lib/include/xsaveintrin.h +++ b/lib/include/xsaveintrin.h @@ -1,22 +1,8 @@ /*===---- xsaveintrin.h - XSAVE intrinsic ----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ @@ -28,6 +14,10 @@ #ifndef __XSAVEINTRIN_H #define __XSAVEINTRIN_H +#ifdef _MSC_VER +#define _XCR_XFEATURE_ENABLED_MASK 0 +#endif + /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xsave"))) @@ -41,6 +31,20 @@ _xrstor(void *__p, unsigned long long __m) { __builtin_ia32_xrstor(__p, __m); } +#ifndef _MSC_VER +#define _xgetbv(A) __builtin_ia32_xgetbv((long long)(A)) +#define _xsetbv(A, B) __builtin_ia32_xsetbv((unsigned int)(A), (unsigned long long)(B)) +#else +#ifdef __cplusplus +extern "C" { +#endif +unsigned __int64 __cdecl _xgetbv(unsigned int); +void __cdecl _xsetbv(unsigned int, unsigned __int64); +#ifdef __cplusplus +} +#endif +#endif /* _MSC_VER */ + #ifdef __x86_64__ static __inline__ void __DEFAULT_FN_ATTRS _xsave64(void *__p, unsigned long long __m) { @@ -51,6 +55,7 @@ static __inline__ void __DEFAULT_FN_ATTRS _xrstor64(void *__p, unsigned long long __m) { __builtin_ia32_xrstor64(__p, __m); } + #endif #undef __DEFAULT_FN_ATTRS diff --git a/lib/include/xsaveoptintrin.h b/lib/include/xsaveoptintrin.h index 792cf92d46..89a4c44db5 100644 --- a/lib/include/xsaveoptintrin.h +++ b/lib/include/xsaveoptintrin.h @@ -1,22 +1,8 @@ /*===---- xsaveoptintrin.h - XSAVEOPT intrinsic ----------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/xsavesintrin.h b/lib/include/xsavesintrin.h index fe2bc4b93b..3f99219a29 100644 --- a/lib/include/xsavesintrin.h +++ b/lib/include/xsavesintrin.h @@ -1,22 +1,8 @@ /*===---- xsavesintrin.h - XSAVES intrinsic --------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ diff --git a/lib/include/xtestintrin.h b/lib/include/xtestintrin.h index 924424386b..7d19e3733d 100644 --- a/lib/include/xtestintrin.h +++ b/lib/include/xtestintrin.h @@ -1,22 +1,8 @@ /*===---- xtestintrin.h - XTEST intrinsic ----------------------------------=== * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN - * THE SOFTWARE. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * *===-----------------------------------------------------------------------=== */ From dab0cf6428d96af540399563f2e2d5a79c3a7d5e Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 19 Jul 2019 16:54:00 -0400 Subject: [PATCH 12/31] update libunwind to llvm9 upstream commit 1931d3cb20a00da732c5210b123656632982fde0 --- lib/libunwind/LICENSE.TXT | 311 +++++++++++++++++ lib/libunwind/include/__libunwind_config.h | 7 +- lib/libunwind/include/libunwind.h | 35 +- .../include/mach-o/compact_unwind_encoding.h | 7 +- lib/libunwind/include/unwind.h | 9 +- lib/libunwind/src/AddressSpace.hpp | 149 +------- lib/libunwind/src/CompactUnwinder.hpp | 7 +- lib/libunwind/src/DwarfInstructions.hpp | 39 ++- lib/libunwind/src/DwarfParser.hpp | 7 +- lib/libunwind/src/EHHeaderParser.hpp | 7 +- lib/libunwind/src/RWMutex.hpp | 49 ++- lib/libunwind/src/Registers.hpp | 27 +- lib/libunwind/src/Unwind-EHABI.cpp | 109 +++--- lib/libunwind/src/Unwind-EHABI.h | 7 +- lib/libunwind/src/Unwind-seh.cpp | 100 +++--- lib/libunwind/src/Unwind-sjlj.c | 120 ++++--- lib/libunwind/src/UnwindCursor.hpp | 53 ++- lib/libunwind/src/UnwindLevel1-gcc-ext.c | 41 ++- lib/libunwind/src/UnwindLevel1.c | 122 +++---- lib/libunwind/src/UnwindRegistersRestore.S | 283 +++++++-------- lib/libunwind/src/UnwindRegistersSave.S | 323 +++++++++--------- lib/libunwind/src/Unwind_AppleExtras.cpp | 7 +- lib/libunwind/src/assembly.h | 74 ++-- lib/libunwind/src/config.h | 36 +- lib/libunwind/src/dwarf2.h | 7 +- lib/libunwind/src/libunwind.cpp | 214 ++++-------- lib/libunwind/src/libunwind_ext.h | 38 ++- 27 files changed, 1233 insertions(+), 955 deletions(-) create mode 100644 lib/libunwind/LICENSE.TXT diff --git a/lib/libunwind/LICENSE.TXT b/lib/libunwind/LICENSE.TXT new file mode 100644 index 0000000000..1e3120621c --- /dev/null +++ b/lib/libunwind/LICENSE.TXT @@ -0,0 +1,311 @@ +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +============================================================================== +Legacy LLVM License (https://llvm.org/docs/DeveloperPolicy.html#legacy): +============================================================================== + +The libunwind library is dual licensed under both the University of Illinois +"BSD-Like" license and the MIT license. As a user of this code you may choose +to use it under either license. As a contributor, you agree to allow your code +to be used under both. + +Full text of the relevant licenses is included below. + +============================================================================== + +University of Illinois/NCSA +Open Source License + +Copyright (c) 2009-2019 by the contributors listed in CREDITS.TXT + +All rights reserved. + +Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal with +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at + Urbana-Champaign, nor the names of its contributors may be used to + endorse or promote products derived from this Software without specific + prior written permission. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE +SOFTWARE. + +============================================================================== + +Copyright (c) 2009-2014 by the contributors listed in CREDITS.TXT + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/lib/libunwind/include/__libunwind_config.h b/lib/libunwind/include/__libunwind_config.h index a8e30de13a..6e7e5e6f7f 100644 --- a/lib/libunwind/include/__libunwind_config.h +++ b/lib/libunwind/include/__libunwind_config.h @@ -1,9 +1,8 @@ //===------------------------- __libunwind_config.h -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libunwind/include/libunwind.h b/lib/libunwind/include/libunwind.h index 29c4aebde5..d06724d3c3 100644 --- a/lib/libunwind/include/libunwind.h +++ b/lib/libunwind/include/libunwind.h @@ -1,9 +1,8 @@ //===---------------------------- libunwind.h -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Compatible with libunwind API documented at: @@ -76,7 +75,7 @@ typedef struct unw_addr_space *unw_addr_space_t; typedef int unw_regnum_t; typedef uintptr_t unw_word_t; -#if defined(__arm__) +#if defined(__arm__) && !defined(__ARM_DWARF_EH__) typedef uint64_t unw_fpreg_t; #else typedef double unw_fpreg_t; @@ -125,32 +124,6 @@ extern int unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *) LIBUN extern unw_addr_space_t unw_local_addr_space; -#ifdef UNW_REMOTE -/* - * Mac OS X "remote" API for unwinding other processes on same machine - * - */ -extern unw_addr_space_t unw_create_addr_space_for_task(task_t); -extern void unw_destroy_addr_space(unw_addr_space_t); -extern int unw_init_remote_thread(unw_cursor_t *, unw_addr_space_t, thread_t *); -#endif /* UNW_REMOTE */ - -/* - * traditional libunwind "remote" API - * NOT IMPLEMENTED on Mac OS X - * - * extern int unw_init_remote(unw_cursor_t*, unw_addr_space_t, - * thread_t*); - * extern unw_accessors_t unw_get_accessors(unw_addr_space_t); - * extern unw_addr_space_t unw_create_addr_space(unw_accessors_t, int); - * extern void unw_flush_cache(unw_addr_space_t, unw_word_t, - * unw_word_t); - * extern int unw_set_caching_policy(unw_addr_space_t, - * unw_caching_policy_t); - * extern void _U_dyn_register(unw_dyn_info_t*); - * extern void _U_dyn_cancel(unw_dyn_info_t*); - */ - #ifdef __cplusplus } #endif diff --git a/lib/libunwind/include/mach-o/compact_unwind_encoding.h b/lib/libunwind/include/mach-o/compact_unwind_encoding.h index de14fd51e5..5301b1055e 100644 --- a/lib/libunwind/include/mach-o/compact_unwind_encoding.h +++ b/lib/libunwind/include/mach-o/compact_unwind_encoding.h @@ -1,9 +1,8 @@ //===------------------ mach-o/compact_unwind_encoding.h ------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Darwin's alternative to DWARF based unwind encodings. diff --git a/lib/libunwind/include/unwind.h b/lib/libunwind/include/unwind.h index ae8ae5d24e..b6cc70498b 100644 --- a/lib/libunwind/include/unwind.h +++ b/lib/libunwind/include/unwind.h @@ -1,13 +1,12 @@ //===------------------------------- unwind.h -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // C++ ABI Level 1 ABI documented at: -// http://mentorembedded.github.io/cxx-abi/abi-eh.html +// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html // //===----------------------------------------------------------------------===// diff --git a/lib/libunwind/src/AddressSpace.hpp b/lib/libunwind/src/AddressSpace.hpp index 49bb360d79..fb07c807db 100644 --- a/lib/libunwind/src/AddressSpace.hpp +++ b/lib/libunwind/src/AddressSpace.hpp @@ -1,9 +1,8 @@ //===------------------------- AddressSpace.hpp ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Abstracts accessing local vs remote address spaces. @@ -28,6 +27,9 @@ #if _LIBUNWIND_USE_DLADDR #include +#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#pragma comment(lib, "dl") +#endif #endif #ifdef __APPLE__ @@ -457,6 +459,8 @@ inline bool LocalAddressSpace::findUnwindSections(pint_t targetAddr, #elif defined(_LIBUNWIND_SUPPORT_SEH_UNWIND) && defined(_WIN32) // Don't even bother, since Windows has functions that do all this stuff // for us. + (void)targetAddr; + (void)info; return true; #elif defined(_LIBUNWIND_ARM_EHABI) && defined(__BIONIC__) && \ (__ANDROID_API__ < 21) @@ -597,142 +601,15 @@ inline bool LocalAddressSpace::findFunctionName(pint_t addr, char *buf, return true; } } +#else + (void)addr; + (void)buf; + (void)bufLen; + (void)offset; #endif return false; } - - -#ifdef UNW_REMOTE - -/// RemoteAddressSpace is used as a template parameter to UnwindCursor when -/// unwinding a thread in the another process. The other process can be a -/// different endianness and a different pointer size which is handled by -/// the P template parameter. -template -class RemoteAddressSpace { -public: - RemoteAddressSpace(task_t task) : fTask(task) {} - - typedef typename P::uint_t pint_t; - - uint8_t get8(pint_t addr); - uint16_t get16(pint_t addr); - uint32_t get32(pint_t addr); - uint64_t get64(pint_t addr); - pint_t getP(pint_t addr); - uint64_t getRegister(pint_t addr); - uint64_t getULEB128(pint_t &addr, pint_t end); - int64_t getSLEB128(pint_t &addr, pint_t end); - pint_t getEncodedP(pint_t &addr, pint_t end, uint8_t encoding, - pint_t datarelBase = 0); - bool findFunctionName(pint_t addr, char *buf, size_t bufLen, - unw_word_t *offset); - bool findUnwindSections(pint_t targetAddr, UnwindInfoSections &info); - bool findOtherFDE(pint_t targetAddr, pint_t &fde); -private: - void *localCopy(pint_t addr); - - task_t fTask; -}; - -template uint8_t RemoteAddressSpace

::get8(pint_t addr) { - return *((uint8_t *)localCopy(addr)); -} - -template uint16_t RemoteAddressSpace

::get16(pint_t addr) { - return P::E::get16(*(uint16_t *)localCopy(addr)); -} - -template uint32_t RemoteAddressSpace

::get32(pint_t addr) { - return P::E::get32(*(uint32_t *)localCopy(addr)); -} - -template uint64_t RemoteAddressSpace

::get64(pint_t addr) { - return P::E::get64(*(uint64_t *)localCopy(addr)); -} - -template -typename P::uint_t RemoteAddressSpace

::getP(pint_t addr) { - return P::getP(*(uint64_t *)localCopy(addr)); -} - -template -typename P::uint_t OtherAddressSpace

::getRegister(pint_t addr) { - return P::getRegister(*(uint64_t *)localCopy(addr)); -} - -template -uint64_t OtherAddressSpace

::getULEB128(pint_t &addr, pint_t end) { - uintptr_t size = (end - addr); - LocalAddressSpace::pint_t laddr = (LocalAddressSpace::pint_t) localCopy(addr); - LocalAddressSpace::pint_t sladdr = laddr; - uint64_t result = LocalAddressSpace::getULEB128(laddr, laddr + size); - addr += (laddr - sladdr); - return result; -} - -template -int64_t RemoteAddressSpace

::getSLEB128(pint_t &addr, pint_t end) { - uintptr_t size = (end - addr); - LocalAddressSpace::pint_t laddr = (LocalAddressSpace::pint_t) localCopy(addr); - LocalAddressSpace::pint_t sladdr = laddr; - uint64_t result = LocalAddressSpace::getSLEB128(laddr, laddr + size); - addr += (laddr - sladdr); - return result; -} - -template void *RemoteAddressSpace

::localCopy(pint_t addr) { - // FIX ME -} - -template -bool RemoteAddressSpace

::findFunctionName(pint_t addr, char *buf, - size_t bufLen, - unw_word_t *offset) { - // FIX ME -} - -/// unw_addr_space is the base class that abstract unw_addr_space_t type in -/// libunwind.h points to. -struct unw_addr_space { - cpu_type_t cpuType; - task_t taskPort; -}; - -/// unw_addr_space_i386 is the concrete instance that a unw_addr_space_t points -/// to when examining -/// a 32-bit intel process. -struct unw_addr_space_i386 : public unw_addr_space { - unw_addr_space_i386(task_t task) : oas(task) {} - RemoteAddressSpace> oas; -}; - -/// unw_addr_space_x86_64 is the concrete instance that a unw_addr_space_t -/// points to when examining -/// a 64-bit intel process. -struct unw_addr_space_x86_64 : public unw_addr_space { - unw_addr_space_x86_64(task_t task) : oas(task) {} - RemoteAddressSpace> oas; -}; - -/// unw_addr_space_ppc is the concrete instance that a unw_addr_space_t points -/// to when examining -/// a 32-bit PowerPC process. -struct unw_addr_space_ppc : public unw_addr_space { - unw_addr_space_ppc(task_t task) : oas(task) {} - RemoteAddressSpace> oas; -}; - -/// unw_addr_space_ppc is the concrete instance that a unw_addr_space_t points -/// to when examining a 64-bit PowerPC process. -struct unw_addr_space_ppc64 : public unw_addr_space { - unw_addr_space_ppc64(task_t task) : oas(task) {} - RemoteAddressSpace> oas; -}; - -#endif // UNW_REMOTE - } // namespace libunwind #endif // __ADDRESSSPACE_HPP__ diff --git a/lib/libunwind/src/CompactUnwinder.hpp b/lib/libunwind/src/CompactUnwinder.hpp index 7b97bf84ce..1c3175dff5 100644 --- a/lib/libunwind/src/CompactUnwinder.hpp +++ b/lib/libunwind/src/CompactUnwinder.hpp @@ -1,9 +1,8 @@ //===-------------------------- CompactUnwinder.hpp -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Does runtime stack unwinding using compact unwind encodings. diff --git a/lib/libunwind/src/DwarfInstructions.hpp b/lib/libunwind/src/DwarfInstructions.hpp index ec70c0a11f..29a070fa3e 100644 --- a/lib/libunwind/src/DwarfInstructions.hpp +++ b/lib/libunwind/src/DwarfInstructions.hpp @@ -1,9 +1,8 @@ //===-------------------------- DwarfInstructions.hpp ---------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Processor specific interpretation of DWARF unwind info. @@ -82,12 +81,11 @@ typename A::pint_t DwarfInstructions::getSavedRegister( const RegisterLocation &savedReg) { switch (savedReg.location) { case CFI_Parser::kRegisterInCFA: - return addressSpace.getRegister(cfa + (pint_t)savedReg.value); + return (pint_t)addressSpace.getRegister(cfa + (pint_t)savedReg.value); case CFI_Parser::kRegisterAtExpression: - return addressSpace.getRegister( - evaluateExpression((pint_t)savedReg.value, addressSpace, - registers, cfa)); + return (pint_t)addressSpace.getRegister(evaluateExpression( + (pint_t)savedReg.value, addressSpace, registers, cfa)); case CFI_Parser::kRegisterIsExpression: return evaluateExpression((pint_t)savedReg.value, addressSpace, @@ -234,6 +232,31 @@ int DwarfInstructions::stepWithDwarf(A &addressSpace, pint_t pc, } #endif +#if defined(_LIBUNWIND_TARGET_PPC64) +#define PPC64_ELFV1_R2_LOAD_INST_ENCODING 0xe8410028u // ld r2,40(r1) +#define PPC64_ELFV1_R2_OFFSET 40 +#define PPC64_ELFV2_R2_LOAD_INST_ENCODING 0xe8410018u // ld r2,24(r1) +#define PPC64_ELFV2_R2_OFFSET 24 + // If the instruction at return address is a TOC (r2) restore, + // then r2 was saved and needs to be restored. + // ELFv2 ABI specifies that the TOC Pointer must be saved at SP + 24, + // while in ELFv1 ABI it is saved at SP + 40. + if (R::getArch() == REGISTERS_PPC64 && returnAddress != 0) { + pint_t sp = newRegisters.getRegister(UNW_REG_SP); + pint_t r2 = 0; + switch (addressSpace.get32(returnAddress)) { + case PPC64_ELFV1_R2_LOAD_INST_ENCODING: + r2 = addressSpace.get64(sp + PPC64_ELFV1_R2_OFFSET); + break; + case PPC64_ELFV2_R2_LOAD_INST_ENCODING: + r2 = addressSpace.get64(sp + PPC64_ELFV2_R2_OFFSET); + break; + } + if (r2) + newRegisters.setRegister(UNW_PPC64_R2, r2); + } +#endif + // Return address is address after call site instruction, so setting IP to // that does simualates a return. newRegisters.setIP(returnAddress); diff --git a/lib/libunwind/src/DwarfParser.hpp b/lib/libunwind/src/DwarfParser.hpp index a2c6d7f8db..df69c2a4bd 100644 --- a/lib/libunwind/src/DwarfParser.hpp +++ b/lib/libunwind/src/DwarfParser.hpp @@ -1,9 +1,8 @@ //===--------------------------- DwarfParser.hpp --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Parses DWARF CFIs (FDEs and CIEs). diff --git a/lib/libunwind/src/EHHeaderParser.hpp b/lib/libunwind/src/EHHeaderParser.hpp index 6b3e7dead8..0101835b8e 100644 --- a/lib/libunwind/src/EHHeaderParser.hpp +++ b/lib/libunwind/src/EHHeaderParser.hpp @@ -1,9 +1,8 @@ //===------------------------- EHHeaderParser.hpp -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Parses ELF .eh_frame_hdr sections. diff --git a/lib/libunwind/src/RWMutex.hpp b/lib/libunwind/src/RWMutex.hpp index 50a78a57b0..a37ac77144 100644 --- a/lib/libunwind/src/RWMutex.hpp +++ b/lib/libunwind/src/RWMutex.hpp @@ -1,9 +1,8 @@ //===----------------------------- Registers.hpp --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Abstract interface to shared reader/writer log, hiding platform and @@ -18,6 +17,9 @@ #include #elif !defined(_LIBUNWIND_HAS_NO_THREADS) #include +#if defined(__unix__) && defined(__ELF__) && defined(_LIBUNWIND_HAS_COMMENT_LIB_PRAGMA) +#pragma comment(lib, "pthread") +#endif #endif namespace libunwind { @@ -57,11 +59,11 @@ private: SRWLOCK _lock = SRWLOCK_INIT; }; -#else +#elif !defined(LIBUNWIND_USE_WEAK_PTHREAD) class _LIBUNWIND_HIDDEN RWMutex { public: - bool lock_shared() { return pthread_rwlock_rdlock(&_lock) == 0; } + bool lock_shared() { return pthread_rwlock_rdlock(&_lock) == 0; } bool unlock_shared() { return pthread_rwlock_unlock(&_lock) == 0; } bool lock() { return pthread_rwlock_wrlock(&_lock) == 0; } bool unlock() { return pthread_rwlock_unlock(&_lock) == 0; } @@ -70,6 +72,41 @@ private: pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER; }; +#else + +extern "C" int __attribute__((weak)) +pthread_create(pthread_t *thread, const pthread_attr_t *attr, + void *(*start_routine)(void *), void *arg); +extern "C" int __attribute__((weak)) +pthread_rwlock_rdlock(pthread_rwlock_t *lock); +extern "C" int __attribute__((weak)) +pthread_rwlock_wrlock(pthread_rwlock_t *lock); +extern "C" int __attribute__((weak)) +pthread_rwlock_unlock(pthread_rwlock_t *lock); + +// Calls to the locking functions are gated on pthread_create, and not the +// functions themselves, because the data structure should only be locked if +// another thread has been created. This is what similar libraries do. + +class _LIBUNWIND_HIDDEN RWMutex { +public: + bool lock_shared() { + return !pthread_create || (pthread_rwlock_rdlock(&_lock) == 0); + } + bool unlock_shared() { + return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0); + } + bool lock() { + return !pthread_create || (pthread_rwlock_wrlock(&_lock) == 0); + } + bool unlock() { + return !pthread_create || (pthread_rwlock_unlock(&_lock) == 0); + } + +private: + pthread_rwlock_t _lock = PTHREAD_RWLOCK_INITIALIZER; +}; + #endif } // namespace libunwind diff --git a/lib/libunwind/src/Registers.hpp b/lib/libunwind/src/Registers.hpp index a4f5a85f2a..a36c6cf90d 100644 --- a/lib/libunwind/src/Registers.hpp +++ b/lib/libunwind/src/Registers.hpp @@ -1,9 +1,8 @@ //===----------------------------- Registers.hpp --------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Models register sets for supported processors. @@ -2104,16 +2103,16 @@ private: uint32_t __pc; // Program counter r15 }; - static void saveVFPWithFSTMD(unw_fpreg_t*); - static void saveVFPWithFSTMX(unw_fpreg_t*); - static void saveVFPv3(unw_fpreg_t*); - static void restoreVFPWithFLDMD(unw_fpreg_t*); - static void restoreVFPWithFLDMX(unw_fpreg_t*); - static void restoreVFPv3(unw_fpreg_t*); + static void saveVFPWithFSTMD(void*); + static void saveVFPWithFSTMX(void*); + static void saveVFPv3(void*); + static void restoreVFPWithFLDMD(void*); + static void restoreVFPWithFLDMX(void*); + static void restoreVFPv3(void*); #if defined(__ARM_WMMX) - static void saveiWMMX(unw_fpreg_t*); + static void saveiWMMX(void*); static void saveiWMMXControl(uint32_t*); - static void restoreiWMMX(unw_fpreg_t*); + static void restoreiWMMX(void*); static void restoreiWMMXControl(uint32_t*); #endif void restoreCoreAndJumpTo(); @@ -2155,7 +2154,7 @@ inline Registers_arm::Registers_arm(const void *registers) _saved_vfp_d16_d31(false) { static_assert((check_fit::does_fit), "arm registers do not fit into unw_context_t"); - // See unw_getcontext() note about data. + // See __unw_getcontext() note about data. memcpy(&_registers, registers, sizeof(_registers)); memset(&_vfp_d0_d15_pad, 0, sizeof(_vfp_d0_d15_pad)); memset(&_vfp_d16_d31, 0, sizeof(_vfp_d16_d31)); @@ -2434,7 +2433,7 @@ inline const char *Registers_arm::getRegisterName(int regNum) { inline bool Registers_arm::validFloatRegister(int regNum) const { // NOTE: Consider the intel MMX registers floating points so the - // unw_get_fpreg can be used to transmit the 64-bit data back. + // __unw_get_fpreg can be used to transmit the 64-bit data back. return ((regNum >= UNW_ARM_D0) && (regNum <= UNW_ARM_D31)) #if defined(__ARM_WMMX) || ((regNum >= UNW_ARM_WR0) && (regNum <= UNW_ARM_WR15)) diff --git a/lib/libunwind/src/Unwind-EHABI.cpp b/lib/libunwind/src/Unwind-EHABI.cpp index f37732c6ac..4ff5e318b5 100644 --- a/lib/libunwind/src/Unwind-EHABI.cpp +++ b/lib/libunwind/src/Unwind-EHABI.cpp @@ -1,9 +1,8 @@ //===--------------------------- Unwind-EHABI.cpp -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Implements ARM zero-cost C++ exceptions @@ -21,8 +20,6 @@ #include #include -#include - #include "config.h" #include "libunwind.h" #include "libunwind_ext.h" @@ -34,7 +31,11 @@ namespace { // signinficant byte. uint8_t getByte(const uint32_t* data, size_t offset) { const uint8_t* byteData = reinterpret_cast(data); +#ifdef __LITTLE_ENDIAN__ return byteData[(offset & ~(size_t)0x03) + (3 - (offset & (size_t)0x03))]; +#else + return byteData[offset]; +#endif } const char* getNextWord(const char* data, uint32_t* out) { @@ -182,7 +183,7 @@ static _Unwind_Reason_Code unwindOneFrame(_Unwind_State state, if (result != _URC_CONTINUE_UNWIND) return result; - if (unw_step(reinterpret_cast(context)) != UNW_STEP_SUCCESS) + if (__unw_step(reinterpret_cast(context)) != UNW_STEP_SUCCESS) return _URC_FAILURE; return _URC_CONTINUE_UNWIND; } @@ -443,17 +444,18 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // In this implementation, the phases don't share the VRS backing store. // Instead, they are passed the original |uc| and they create a new VRS // from scratch thus achieving the same effect. - unw_init_local(cursor, uc); + __unw_init_local(cursor, uc); // Walk each frame looking for a place to stop. for (bool handlerNotFound = true; handlerNotFound;) { // See if frame has code to run (has personality routine). unw_proc_info_t frameInfo; - if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_get_proc_info " - "failed => _URC_FATAL_PHASE1_ERROR", - static_cast(exception_object)); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + static_cast(exception_object)); return _URC_FATAL_PHASE1_ERROR; } @@ -462,12 +464,12 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; unw_word_t pc; - unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_IP, &pc); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, " "lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR, @@ -537,7 +539,7 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor _Unwind_Exception *exception_object, bool resume) { // See comment at the start of unwind_phase1 regarding VRS integrity. - unw_init_local(cursor, uc); + __unw_init_local(cursor, uc); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", static_cast(exception_object)); @@ -559,19 +561,20 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor // for. After this, continue unwinding as if normal. // // See #7.4.6 for details. - unw_set_reg(cursor, UNW_REG_IP, - exception_object->unwinder_cache.reserved2); + __unw_set_reg(cursor, UNW_REG_IP, + exception_object->unwinder_cache.reserved2); resume = false; } // Get info about this frame. unw_word_t sp; unw_proc_info_t frameInfo; - unw_get_reg(cursor, UNW_REG_SP, &sp); - if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_get_proc_info " - "failed => _URC_FATAL_PHASE2_ERROR", - static_cast(exception_object)); + __unw_get_reg(cursor, UNW_REG_SP, &sp); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE2_ERROR", + static_cast(exception_object)); return _URC_FATAL_PHASE2_ERROR; } @@ -580,8 +583,8 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING( @@ -625,8 +628,8 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor // We may get control back if landing pad calls _Unwind_Resume(). if (_LIBUNWIND_TRACING_UNWINDING) { unw_word_t pc; - unw_get_reg(cursor, UNW_REG_IP, &pc); - unw_get_reg(cursor, UNW_REG_SP, &sp); + __unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_SP, &sp); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR, static_cast(exception_object), @@ -637,11 +640,11 @@ static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor // EHABI #7.4.1 says we need to preserve pc for when _Unwind_Resume // is called back, to find this same frame. unw_word_t pc; - unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_IP, &pc); exception_object->unwinder_cache.reserved2 = (uint32_t)pc; } - unw_resume(cursor); - // unw_resume() only returns if there was an error. + __unw_resume(cursor); + // __unw_resume() only returns if there was an error. return _URC_FATAL_PHASE2_ERROR; // # EHABI #7.4.3 @@ -670,7 +673,7 @@ _Unwind_RaiseException(_Unwind_Exception *exception_object) { static_cast(exception_object)); unw_context_t uc; unw_cursor_t cursor; - unw_getcontext(&uc); + __unw_getcontext(&uc); // This field for is for compatibility with GCC to say this isn't a forced // unwind. EHABI #7.2 @@ -708,7 +711,7 @@ _Unwind_Resume(_Unwind_Exception *exception_object) { static_cast(exception_object)); unw_context_t uc; unw_cursor_t cursor; - unw_getcontext(&uc); + __unw_getcontext(&uc); // _Unwind_RaiseException on EHABI will always set the reserved1 field to 0, // which is in the same position as private_1 below. @@ -725,7 +728,7 @@ _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; - if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.lsda; _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%llx", @@ -765,8 +768,8 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, case _UVRSC_CORE: if (representation != _UVRSD_UINT32 || regno > 15) return _UVRSR_FAILED; - return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), - *(unw_word_t *)valuep) == UNW_ESUCCESS + return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), + *(unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_VFP: @@ -776,28 +779,28 @@ _Unwind_VRS_Set(_Unwind_Context *context, _Unwind_VRS_RegClass regclass, // Can only touch d0-15 with FSTMFDX. if (regno > 15) return _UVRSR_FAILED; - unw_save_vfp_as_X(cursor); + __unw_save_vfp_as_X(cursor); } else { if (regno > 31) return _UVRSR_FAILED; } - return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), - *(unw_fpreg_t *)valuep) == UNW_ESUCCESS + return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), + *(unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; #if defined(__ARM_WMMX) case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) return _UVRSR_FAILED; - return unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), - *(unw_word_t *)valuep) == UNW_ESUCCESS + return __unw_set_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), + *(unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXD: if (representation != _UVRSD_DOUBLE || regno > 31) return _UVRSR_FAILED; - return unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), - *(unw_fpreg_t *)valuep) == UNW_ESUCCESS + return __unw_set_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), + *(unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; #else @@ -819,8 +822,8 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context, case _UVRSC_CORE: if (representation != _UVRSD_UINT32 || regno > 15) return _UVRSR_FAILED; - return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), - (unw_word_t *)valuep) == UNW_ESUCCESS + return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_R0 + regno), + (unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_VFP: @@ -830,28 +833,28 @@ _Unwind_VRS_Get_Internal(_Unwind_Context *context, // Can only touch d0-15 with FSTMFDX. if (regno > 15) return _UVRSR_FAILED; - unw_save_vfp_as_X(cursor); + __unw_save_vfp_as_X(cursor); } else { if (regno > 31) return _UVRSR_FAILED; } - return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), - (unw_fpreg_t *)valuep) == UNW_ESUCCESS + return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_D0 + regno), + (unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; #if defined(__ARM_WMMX) case _UVRSC_WMMXC: if (representation != _UVRSD_UINT32 || regno > 3) return _UVRSR_FAILED; - return unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), - (unw_word_t *)valuep) == UNW_ESUCCESS + return __unw_get_reg(cursor, (unw_regnum_t)(UNW_ARM_WC0 + regno), + (unw_word_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; case _UVRSC_WMMXD: if (representation != _UVRSD_DOUBLE || regno > 31) return _UVRSR_FAILED; - return unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), - (unw_fpreg_t *)valuep) == UNW_ESUCCESS + return __unw_get_fpreg(cursor, (unw_regnum_t)(UNW_ARM_WR0 + regno), + (unw_fpreg_t *)valuep) == UNW_ESUCCESS ? _UVRSR_OK : _UVRSR_FAILED; #else @@ -960,7 +963,7 @@ _Unwind_GetRegionStart(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; - if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.start_ip; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%llX", static_cast(context), (long long)result); @@ -983,7 +986,7 @@ extern "C" _LIBUNWIND_EXPORT _Unwind_Reason_Code __gnu_unwind_frame(_Unwind_Exception *exception_object, struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; - if (unw_step(cursor) != UNW_STEP_SUCCESS) + if (__unw_step(cursor) != UNW_STEP_SUCCESS) return _URC_FAILURE; return _URC_OK; } diff --git a/lib/libunwind/src/Unwind-EHABI.h b/lib/libunwind/src/Unwind-EHABI.h index fe164ff7be..6897082a33 100644 --- a/lib/libunwind/src/Unwind-EHABI.h +++ b/lib/libunwind/src/Unwind-EHABI.h @@ -1,9 +1,8 @@ //===------------------------- Unwind-EHABI.hpp ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // //===----------------------------------------------------------------------===// diff --git a/lib/libunwind/src/Unwind-seh.cpp b/lib/libunwind/src/Unwind-seh.cpp index c5cf7c47d4..7647f2e0db 100644 --- a/lib/libunwind/src/Unwind-seh.cpp +++ b/lib/libunwind/src/Unwind-seh.cpp @@ -1,9 +1,8 @@ //===--------------------------- Unwind-seh.cpp ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // @@ -53,14 +52,16 @@ static const uint64_t kSEHExceptionClass = 0x434C4E4753454800; // CLNGSEH\0 /// Exception cleanup routine used by \c _GCC_specific_handler to /// free foreign exceptions. static void seh_exc_cleanup(_Unwind_Reason_Code urc, _Unwind_Exception *exc) { + (void)urc; if (exc->exception_class != kSEHExceptionClass) _LIBUNWIND_ABORT("SEH cleanup called on non-SEH exception"); free(exc); } -static int _unw_init_seh(unw_cursor_t *cursor, CONTEXT *ctx); -static DISPATCHER_CONTEXT *_unw_seh_get_disp_ctx(unw_cursor_t *cursor); -static void _unw_seh_set_disp_ctx(unw_cursor_t *cursor, DISPATCHER_CONTEXT *disp); +static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *ctx); +static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor); +static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor, + DISPATCHER_CONTEXT *disp); /// Common implementation of SEH-style handler functions used by Itanium- /// style frames. Depending on how and why it was called, it may do one of: @@ -69,7 +70,6 @@ static void _unw_seh_set_disp_ctx(unw_cursor_t *cursor, DISPATCHER_CONTEXT *disp _LIBUNWIND_EXPORT EXCEPTION_DISPOSITION _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, DISPATCHER_CONTEXT *disp, __personality_routine pers) { - unw_context_t uc; unw_cursor_t cursor; _Unwind_Exception *exc; _Unwind_Action action; @@ -78,7 +78,9 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, uintptr_t retval, target; bool ours = false; - _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler(%#010x(%x), %p)", ms_exc->ExceptionCode, ms_exc->ExceptionFlags, frame); + _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler(%#010lx(%lx), %p)", + ms_exc->ExceptionCode, ms_exc->ExceptionFlags, + (void *)frame); if (ms_exc->ExceptionCode == STATUS_GCC_UNWIND) { if (IS_TARGET_UNWIND(ms_exc->ExceptionFlags)) { // Set up the upper return value (the lower one and the target PC @@ -112,9 +114,9 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, memset(exc->private_, 0, sizeof(exc->private_)); } if (!ctx) { - _unw_init_seh(&cursor, disp->ContextRecord); - _unw_seh_set_disp_ctx(&cursor, disp); - unw_set_reg(&cursor, UNW_REG_IP, disp->ControlPc-1); + __unw_init_seh(&cursor, disp->ContextRecord); + __unw_seh_set_disp_ctx(&cursor, disp); + __unw_set_reg(&cursor, UNW_REG_IP, disp->ControlPc - 1); ctx = (struct _Unwind_Context *)&cursor; if (!IS_UNWINDING(ms_exc->ExceptionFlags)) { @@ -130,7 +132,10 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, } } - _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() calling personality function %p(1, %d, %llx, %p, %p)", pers, action, exc->exception_class, exc, ctx); + _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() calling personality " + "function %p(1, %d, %llx, %p, %p)", + (void *)pers, action, exc->exception_class, + (void *)exc, (void *)ctx); urc = pers(1, action, exc->exception_class, exc, ctx); _LIBUNWIND_TRACE_UNWINDING("_GCC_specific_handler() personality returned %d", urc); switch (urc) { @@ -168,18 +173,18 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, _LIBUNWIND_ABORT("Personality installed context during phase 1!"); #ifdef __x86_64__ exc->private_[2] = disp->TargetIp; - unw_get_reg(&cursor, UNW_X86_64_RAX, &retval); - unw_get_reg(&cursor, UNW_X86_64_RDX, &exc->private_[3]); + __unw_get_reg(&cursor, UNW_X86_64_RAX, &retval); + __unw_get_reg(&cursor, UNW_X86_64_RDX, &exc->private_[3]); #elif defined(__arm__) exc->private_[2] = disp->TargetPc; - unw_get_reg(&cursor, UNW_ARM_R0, &retval); - unw_get_reg(&cursor, UNW_ARM_R1, &exc->private_[3]); + __unw_get_reg(&cursor, UNW_ARM_R0, &retval); + __unw_get_reg(&cursor, UNW_ARM_R1, &exc->private_[3]); #elif defined(__aarch64__) exc->private_[2] = disp->TargetPc; - unw_get_reg(&cursor, UNW_ARM64_X0, &retval); - unw_get_reg(&cursor, UNW_ARM64_X1, &exc->private_[3]); + __unw_get_reg(&cursor, UNW_ARM64_X0, &retval); + __unw_get_reg(&cursor, UNW_ARM64_X1, &exc->private_[3]); #endif - unw_get_reg(&cursor, UNW_REG_IP, &target); + __unw_get_reg(&cursor, UNW_REG_IP, &target); ms_exc->ExceptionCode = STATUS_GCC_UNWIND; #ifdef __x86_64__ ms_exc->ExceptionInformation[2] = disp->TargetIp; @@ -199,7 +204,7 @@ _GCC_specific_handler(PEXCEPTION_RECORD ms_exc, PVOID frame, PCONTEXT ms_ctx, } } -/// Personality function returned by \c unw_get_proc_info() in SEH contexts. +/// Personality function returned by \c __unw_get_proc_info() in SEH contexts. /// This is a wrapper that calls the real SEH handler function, which in /// turn (at least, for Itanium-style frames) calls the real Itanium /// personality function (see \c _GCC_specific_handler()). @@ -207,6 +212,8 @@ extern "C" _Unwind_Reason_Code __libunwind_seh_personality(int version, _Unwind_Action state, uint64_t klass, _Unwind_Exception *exc, struct _Unwind_Context *context) { + (void)version; + (void)klass; EXCEPTION_RECORD ms_exc; bool phase2 = (state & (_UA_SEARCH_PHASE|_UA_CLEANUP_PHASE)) == _UA_CLEANUP_PHASE; ms_exc.ExceptionCode = STATUS_GCC_THROW; @@ -215,7 +222,8 @@ __libunwind_seh_personality(int version, _Unwind_Action state, ms_exc.ExceptionInformation[0] = (ULONG_PTR)exc; ms_exc.ExceptionInformation[1] = (ULONG_PTR)context; ms_exc.ExceptionInformation[2] = state; - DISPATCHER_CONTEXT *disp_ctx = _unw_seh_get_disp_ctx((unw_cursor_t *)context); + DISPATCHER_CONTEXT *disp_ctx = + __unw_seh_get_disp_ctx((unw_cursor_t *)context); EXCEPTION_DISPOSITION ms_act = disp_ctx->LanguageHandler(&ms_exc, (PVOID)disp_ctx->EstablisherFrame, disp_ctx->ContextRecord, @@ -234,15 +242,15 @@ unwind_phase2_forced(unw_context_t *uc, _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop, void *stop_parameter) { unw_cursor_t cursor2; - unw_init_local(&cursor2, uc); + __unw_init_local(&cursor2, uc); // Walk each frame until we reach where search phase said to stop - while (unw_step(&cursor2) > 0) { + while (__unw_step(&cursor2) > 0) { // Update info about this frame. unw_proc_info_t frameInfo; - if (unw_get_proc_info(&cursor2, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): unw_step " + if (__unw_get_proc_info(&cursor2, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step " "failed => _URC_END_OF_STACK", (void *)exception_object); return _URC_FATAL_PHASE2_ERROR; @@ -253,8 +261,8 @@ unwind_phase2_forced(unw_context_t *uc, char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(&cursor2, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(&cursor2, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING( @@ -304,7 +312,7 @@ unwind_phase2_forced(unw_context_t *uc, "_URC_INSTALL_CONTEXT", (void *)exception_object); // We may get control back if landing pad calls _Unwind_Resume(). - unw_resume(&cursor2); + __unw_resume(&cursor2); break; default: // Personality routine returned an unknown result code. @@ -369,7 +377,7 @@ _Unwind_Resume(_Unwind_Exception *exception_object) { if (exception_object->private_[0] != 0) { unw_context_t uc; - unw_getcontext(&uc); + __unw_getcontext(&uc); unwind_phase2_forced(&uc, exception_object, (_Unwind_Stop_Fn) exception_object->private_[0], (void *)exception_object->private_[4]); @@ -407,7 +415,7 @@ _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, _LIBUNWIND_TRACE_API("_Unwind_ForcedUnwind(ex_obj=%p, stop=%p)", (void *)exception_object, (void *)(uintptr_t)stop); unw_context_t uc; - unw_getcontext(&uc); + __unw_getcontext(&uc); // Mark that this is a forced unwind, so _Unwind_Resume() can do // the right thing. @@ -421,7 +429,8 @@ _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, /// Called by personality handler during phase 2 to get LSDA for current frame. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { - uintptr_t result = (uintptr_t)_unw_seh_get_disp_ctx((unw_cursor_t *)context)->HandlerData; + uintptr_t result = + (uintptr_t)__unw_seh_get_disp_ctx((unw_cursor_t *)context)->HandlerData; _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR, (void *)context, result); @@ -432,30 +441,32 @@ _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { /// function. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context) { - DISPATCHER_CONTEXT *disp = _unw_seh_get_disp_ctx((unw_cursor_t *)context); + DISPATCHER_CONTEXT *disp = __unw_seh_get_disp_ctx((unw_cursor_t *)context); uintptr_t result = (uintptr_t)disp->FunctionEntry->BeginAddress + disp->ImageBase; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR, (void *)context, result); return result; } -static int -_unw_init_seh(unw_cursor_t *cursor, CONTEXT *context) { +static int __unw_init_seh(unw_cursor_t *cursor, CONTEXT *context) { #ifdef _LIBUNWIND_TARGET_X86_64 - new ((void *)cursor) UnwindCursor( - context, LocalAddressSpace::sThisAddressSpace); + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); auto *co = reinterpret_cast(cursor); co->setInfoBasedOnIPRegister(); return UNW_ESUCCESS; #elif defined(_LIBUNWIND_TARGET_ARM) - new ((void *)cursor) UnwindCursor( - context, LocalAddressSpace::sThisAddressSpace); + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); auto *co = reinterpret_cast(cursor); co->setInfoBasedOnIPRegister(); return UNW_ESUCCESS; #elif defined(_LIBUNWIND_TARGET_AARCH64) - new ((void *)cursor) UnwindCursor( - context, LocalAddressSpace::sThisAddressSpace); + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); auto *co = reinterpret_cast(cursor); co->setInfoBasedOnIPRegister(); return UNW_ESUCCESS; @@ -464,8 +475,7 @@ _unw_init_seh(unw_cursor_t *cursor, CONTEXT *context) { #endif } -static DISPATCHER_CONTEXT * -_unw_seh_get_disp_ctx(unw_cursor_t *cursor) { +static DISPATCHER_CONTEXT *__unw_seh_get_disp_ctx(unw_cursor_t *cursor) { #ifdef _LIBUNWIND_TARGET_X86_64 return reinterpret_cast *>(cursor)->getDispatcherContext(); #elif defined(_LIBUNWIND_TARGET_ARM) @@ -477,8 +487,8 @@ _unw_seh_get_disp_ctx(unw_cursor_t *cursor) { #endif } -static void -_unw_seh_set_disp_ctx(unw_cursor_t *cursor, DISPATCHER_CONTEXT *disp) { +static void __unw_seh_set_disp_ctx(unw_cursor_t *cursor, + DISPATCHER_CONTEXT *disp) { #ifdef _LIBUNWIND_TARGET_X86_64 reinterpret_cast *>(cursor)->setDispatcherContext(disp); #elif defined(_LIBUNWIND_TARGET_ARM) diff --git a/lib/libunwind/src/Unwind-sjlj.c b/lib/libunwind/src/Unwind-sjlj.c index 90cac3f8c7..b8bb7c83bd 100644 --- a/lib/libunwind/src/Unwind-sjlj.c +++ b/lib/libunwind/src/Unwind-sjlj.c @@ -1,9 +1,8 @@ //===--------------------------- Unwind-sjlj.c ----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Implements setjump-longjump based C++ exceptions @@ -12,6 +11,7 @@ #include +#include #include #include #include @@ -52,7 +52,7 @@ struct _Unwind_FunctionContext { #else # if __STDC_VERSION__ >= 201112L # define _LIBUNWIND_THREAD_LOCAL _Thread_local -# elif defined(_WIN32) +# elif defined(_MSC_VER) # define _LIBUNWIND_THREAD_LOCAL __declspec(thread) # elif defined(__GNUC__) || defined(__clang__) # define _LIBUNWIND_THREAD_LOCAL __thread @@ -108,7 +108,8 @@ _Unwind_SjLj_Unregister(struct _Unwind_FunctionContext *fc) { static _Unwind_Reason_Code unwind_phase1(struct _Unwind_Exception *exception_object) { _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p", c); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: initial function-context=%p", + (void *)c); // walk each frame looking for a place to stop for (bool handlerNotFound = true; handlerNotFound; c = c->prev) { @@ -117,17 +118,18 @@ unwind_phase1(struct _Unwind_Exception *exception_object) { if (c == NULL) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): reached " "bottom => _URC_END_OF_STACK", - exception_object); + (void *)exception_object); return _URC_END_OF_STACK; } - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", c); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase1: function-context=%p", (void *)c); // if there is a personality routine, ask it if it will want to stop at this // frame if (c->personality != NULL) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): calling " - "personality function %p", - exception_object, c->personality); + "personality function %p", + (void *)exception_object, + (void *)c->personality); _Unwind_Reason_Code personalityResult = (*c->personality)( 1, _UA_SEARCH_PHASE, exception_object->exception_class, exception_object, (struct _Unwind_Context *)c); @@ -138,12 +140,14 @@ unwind_phase1(struct _Unwind_Exception *exception_object) { handlerNotFound = false; exception_object->private_2 = (uintptr_t) c; _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " - "_URC_HANDLER_FOUND", exception_object); + "_URC_HANDLER_FOUND", + (void *)exception_object); return _URC_NO_REASON; case _URC_CONTINUE_UNWIND: _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): " - "_URC_CONTINUE_UNWIND", exception_object); + "_URC_CONTINUE_UNWIND", + (void *)exception_object); // continue unwinding break; @@ -151,7 +155,7 @@ unwind_phase1(struct _Unwind_Exception *exception_object) { // something went wrong _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_FATAL_PHASE1_ERROR", - exception_object); + (void *)exception_object); return _URC_FATAL_PHASE1_ERROR; } } @@ -162,19 +166,21 @@ unwind_phase1(struct _Unwind_Exception *exception_object) { static _Unwind_Reason_Code unwind_phase2(struct _Unwind_Exception *exception_object) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", exception_object); + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", + (void *)exception_object); // walk each frame until we reach where search phase said to stop _Unwind_FunctionContext_t c = __Unwind_SjLj_GetTopOfFunctionStack(); while (true) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2s(ex_ojb=%p): context=%p", - exception_object, c); + (void *)exception_object, (void *)c); // check for no more frames if (c == NULL) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_step() reached " - "bottom => _URC_END_OF_STACK", - exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); return _URC_END_OF_STACK; } @@ -194,7 +200,7 @@ unwind_phase2(struct _Unwind_Exception *exception_object) { // continue unwinding _LIBUNWIND_TRACE_UNWINDING( "unwind_phase2(ex_ojb=%p): _URC_CONTINUE_UNWIND", - exception_object); + (void *)exception_object); if ((uintptr_t) c == exception_object->private_2) { // phase 1 said we would stop at this frame, but we did not... _LIBUNWIND_ABORT("during phase1 personality function said it would " @@ -203,14 +209,14 @@ unwind_phase2(struct _Unwind_Exception *exception_object) { break; case _URC_INSTALL_CONTEXT: _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): " - "_URC_INSTALL_CONTEXT, will resume at " - "landing pad %p", - exception_object, c->jbuf[1]); + "_URC_INSTALL_CONTEXT, will resume at " + "landing pad %p", + (void *)exception_object, c->jbuf[1]); // personality routine says to transfer control to landing pad // we may get control back if landing pad calls _Unwind_Resume() __Unwind_SjLj_SetTopOfFunctionStack(c); __builtin_longjmp(c->jbuf, 1); - // unw_resume() only returns if there was an error + // __unw_resume() only returns if there was an error return _URC_FATAL_PHASE2_ERROR; default: // something went wrong @@ -237,9 +243,10 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, // get next frame (skip over first which is _Unwind_RaiseException) if (c == NULL) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_step() reached " - "bottom => _URC_END_OF_STACK", - exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); return _URC_END_OF_STACK; } @@ -251,11 +258,11 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, (struct _Unwind_Context *)c, stop_parameter); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "stop function returned %d", - exception_object, stopResult); + (void *)exception_object, stopResult); if (stopResult != _URC_NO_REASON) { _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "stopped by stop function", - exception_object); + (void *)exception_object); return _URC_FATAL_PHASE2_ERROR; } @@ -264,7 +271,7 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, __personality_routine p = (__personality_routine) c->personality; _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "calling personality function %p", - exception_object, p); + (void *)exception_object, (void *)p); _Unwind_Reason_Code personalityResult = (*p)(1, action, exception_object->exception_class, exception_object, (struct _Unwind_Context *)c); @@ -272,13 +279,13 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, case _URC_CONTINUE_UNWIND: _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "personality returned _URC_CONTINUE_UNWIND", - exception_object); + (void *)exception_object); // destructors called, continue unwinding break; case _URC_INSTALL_CONTEXT: _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "personality returned _URC_INSTALL_CONTEXT", - exception_object); + (void *)exception_object); // we may get control back if landing pad calls _Unwind_Resume() __Unwind_SjLj_SetTopOfFunctionStack(c); __builtin_longjmp(c->jbuf, 1); @@ -288,7 +295,7 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): " "personality returned %d, " "_URC_FATAL_PHASE2_ERROR", - exception_object, personalityResult); + (void *)exception_object, personalityResult); return _URC_FATAL_PHASE2_ERROR; } } @@ -298,8 +305,8 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, // call stop function one last time and tell it we've reached the end of the // stack _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): calling stop " - "function with _UA_END_OF_STACK", - exception_object); + "function with _UA_END_OF_STACK", + (void *)exception_object); _Unwind_Action lastAction = (_Unwind_Action)(_UA_FORCE_UNWIND | _UA_CLEANUP_PHASE | _UA_END_OF_STACK); (*stop)(1, lastAction, exception_object->exception_class, exception_object, @@ -314,7 +321,8 @@ unwind_phase2_forced(struct _Unwind_Exception *exception_object, /// Called by __cxa_throw. Only returns if there is a fatal error _LIBUNWIND_EXPORT _Unwind_Reason_Code _Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { - _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)", exception_object); + _LIBUNWIND_TRACE_API("_Unwind_SjLj_RaiseException(ex_obj=%p)", + (void *)exception_object); // mark that this is a non-forced unwind, so _Unwind_Resume() can do the right // thing @@ -344,7 +352,8 @@ _Unwind_SjLj_RaiseException(struct _Unwind_Exception *exception_object) { /// __cxa_rethrow() which in turn calls _Unwind_Resume_or_Rethrow() _LIBUNWIND_EXPORT void _Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { - _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)", exception_object); + _LIBUNWIND_TRACE_API("_Unwind_SjLj_Resume(ex_obj=%p)", + (void *)exception_object); if (exception_object->private_1 != 0) unwind_phase2_forced(exception_object, @@ -362,8 +371,8 @@ _Unwind_SjLj_Resume(struct _Unwind_Exception *exception_object) { _LIBUNWIND_EXPORT _Unwind_Reason_Code _Unwind_SjLj_Resume_or_Rethrow(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("__Unwind_SjLj_Resume_or_Rethrow(ex_obj=%p), " - "private_1=%ld", - exception_object, exception_object->private_1); + "private_1=%" PRIuPTR, + (void *)exception_object, exception_object->private_1); // If this is non-forced and a stopping place was found, then this is a // re-throw. // Call _Unwind_RaiseException() as if this was a new exception. @@ -386,7 +395,8 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; _LIBUNWIND_TRACE_API("_Unwind_GetLanguageSpecificData(context=%p) " - "=> 0x%0lX", context, ufc->lsda); + "=> 0x%" PRIuPTR, + (void *)context, ufc->lsda); return ufc->lsda; } @@ -394,8 +404,8 @@ _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { /// Called by personality handler during phase 2 to get register values. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { - _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", - context, index); + _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d)", (void *)context, + index); _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; return ufc->resumeParameters[index]; } @@ -404,8 +414,9 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, /// Called by personality handler during phase 2 to alter register values. _LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, uintptr_t new_value) { - _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%0lX)" - , context, index, new_value); + _LIBUNWIND_TRACE_API("_Unwind_SetGR(context=%p, reg=%d, value=0x%" PRIuPTR + ")", + (void *)context, index, new_value); _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; ufc->resumeParameters[index] = new_value; } @@ -414,8 +425,8 @@ _LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, /// Called by personality handler during phase 2 to get instruction pointer. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; - _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%lX", context, - ufc->resumeLocation + 1); + _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIu32, + (void *)context, ufc->resumeLocation + 1); return ufc->resumeLocation + 1; } @@ -427,8 +438,9 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, int *ipBefore) { _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; *ipBefore = 0; - _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%lX", - context, ipBefore, ufc->resumeLocation + 1); + _LIBUNWIND_TRACE_API("_Unwind_GetIPInfo(context=%p, %p) => 0x%" PRIu32, + (void *)context, (void *)ipBefore, + ufc->resumeLocation + 1); return ufc->resumeLocation + 1; } @@ -436,8 +448,8 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, /// Called by personality handler during phase 2 to alter instruction pointer. _LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, uintptr_t new_value) { - _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0lX)", - context, new_value); + _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%" PRIuPTR ")", + (void *)context, new_value); _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; ufc->resumeLocation = new_value - 1; } @@ -449,7 +461,7 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetRegionStart(struct _Unwind_Context *context) { // Not supported or needed for sjlj based unwinding (void)context; - _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", context); + _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p)", (void *)context); return 0; } @@ -459,7 +471,7 @@ _Unwind_GetRegionStart(struct _Unwind_Context *context) { _LIBUNWIND_EXPORT void _Unwind_DeleteException(struct _Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_DeleteException(ex_obj=%p)", - exception_object); + (void *)exception_object); if (exception_object->exception_cleanup != NULL) (*exception_object->exception_cleanup)(_URC_FOREIGN_EXCEPTION_CAUGHT, exception_object); @@ -473,7 +485,7 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetDataRelBase(struct _Unwind_Context *context) { // Not supported or needed for sjlj based unwinding (void)context; - _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", context); + _LIBUNWIND_TRACE_API("_Unwind_GetDataRelBase(context=%p)", (void *)context); _LIBUNWIND_ABORT("_Unwind_GetDataRelBase() not implemented"); } @@ -484,14 +496,14 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetTextRelBase(struct _Unwind_Context *context) { // Not supported or needed for sjlj based unwinding (void)context; - _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", context); + _LIBUNWIND_TRACE_API("_Unwind_GetTextRelBase(context=%p)", (void *)context); _LIBUNWIND_ABORT("_Unwind_GetTextRelBase() not implemented"); } /// Called by personality handler to get "Call Frame Area" for current frame. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { - _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", context); + _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p)", (void *)context); if (context != NULL) { _Unwind_FunctionContext_t ufc = (_Unwind_FunctionContext_t) context; // Setjmp/longjmp based exceptions don't have a true CFA. diff --git a/lib/libunwind/src/UnwindCursor.hpp b/lib/libunwind/src/UnwindCursor.hpp index 52439f9b54..a96c9f3995 100644 --- a/lib/libunwind/src/UnwindCursor.hpp +++ b/lib/libunwind/src/UnwindCursor.hpp @@ -1,9 +1,8 @@ //===------------------------- UnwindCursor.hpp ---------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // C++ interface to lower levels of libunwind @@ -12,7 +11,6 @@ #ifndef __UNWINDCURSOR_HPP__ #define __UNWINDCURSOR_HPP__ -#include #include #include #include @@ -106,7 +104,6 @@ private: static void dyldUnloadHook(const struct mach_header *mh, intptr_t slide); static bool _registeredForDyldUnloads; #endif - // Can't use std::vector<> here because this code is below libc++. static entry *_buffer; static entry *_bufferUsed; static entry *_bufferEnd; @@ -485,6 +482,10 @@ public: DISPATCHER_CONTEXT *getDispatcherContext() { return &_dispContext; } void setDispatcherContext(DISPATCHER_CONTEXT *disp) { _dispContext = *disp; } + // libunwind does not and should not depend on C++ library which means that we + // need our own defition of inline placement new. + static void *operator new(size_t, UnwindCursor *p) { return p; } + private: pint_t getLastPC() const { return _dispContext.ControlPc; } @@ -789,6 +790,8 @@ bool UnwindCursor::validFloatReg(int regNum) { if (regNum >= UNW_ARM_D0 && regNum <= UNW_ARM_D31) return true; #elif defined(_LIBUNWIND_TARGET_AARCH64) if (regNum >= UNW_ARM64_D0 && regNum <= UNW_ARM64_D31) return true; +#else + (void)regNum; #endif return false; } @@ -816,6 +819,7 @@ unw_fpreg_t UnwindCursor::getFloatReg(int regNum) { #elif defined(_LIBUNWIND_TARGET_AARCH64) return _msContext.V[regNum - UNW_ARM64_D0].D[0]; #else + (void)regNum; _LIBUNWIND_ABORT("float registers unimplemented"); #endif } @@ -843,6 +847,8 @@ void UnwindCursor::setFloatReg(int regNum, unw_fpreg_t value) { #elif defined(_LIBUNWIND_TARGET_AARCH64) _msContext.V[regNum - UNW_ARM64_D0].D[0] = value; #else + (void)regNum; + (void)value; _LIBUNWIND_ABORT("float registers unimplemented"); #endif } @@ -892,6 +898,10 @@ public: virtual void saveVFPAsX(); #endif + // libunwind does not and should not depend on C++ library which means that we + // need our own defition of inline placement new. + static void *operator new(size_t, UnwindCursor *p) { return p; } + private: #if defined(_LIBUNWIND_ARM_EHABI) @@ -1221,7 +1231,6 @@ template struct EHABISectionIterator { typedef EHABISectionIterator _Self; - typedef std::random_access_iterator_tag iterator_category; typedef typename A::pint_t value_type; typedef typename A::pint_t* pointer; typedef typename A::pint_t& reference; @@ -1275,6 +1284,29 @@ struct EHABISectionIterator { const UnwindInfoSections* _sects; }; +namespace { + +template +EHABISectionIterator EHABISectionUpperBound( + EHABISectionIterator first, + EHABISectionIterator last, + typename A::pint_t value) { + size_t len = last - first; + while (len > 0) { + size_t l2 = len / 2; + EHABISectionIterator m = first + l2; + if (value < *m) { + len = l2; + } else { + first = ++m; + len -= l2 + 1; + } + } + return first; +} + +} + template bool UnwindCursor::getInfoFromEHABISection( pint_t pc, @@ -1286,7 +1318,7 @@ bool UnwindCursor::getInfoFromEHABISection( if (begin == end) return false; - EHABISectionIterator itNextPC = std::upper_bound(begin, end, pc); + EHABISectionIterator itNextPC = EHABISectionUpperBound(begin, end, pc); if (itNextPC == begin) return false; EHABISectionIterator itThisPC = itNextPC - 1; @@ -1296,8 +1328,7 @@ bool UnwindCursor::getInfoFromEHABISection( // in the table, we don't really know the function extent and have to choose a // value for nextPC. Choosing max() will allow the range check during trace to // succeed. - pint_t nextPC = (itNextPC == end) ? std::numeric_limits::max() - : itNextPC.functionAddress(); + pint_t nextPC = (itNextPC == end) ? UINTPTR_MAX : itNextPC.functionAddress(); pint_t indexDataAddr = itThisPC.dataAddress(); if (indexDataAddr == 0) @@ -1709,7 +1740,7 @@ bool UnwindCursor::getInfoFromCompactEncodingSection(pint_t pc, --personalityIndex; // change 1-based to zero-based index if (personalityIndex > sectionHeader.personalityArrayCount()) { _LIBUNWIND_DEBUG_LOG("found encoding 0x%08X with personality index %d, " - "but personality table has only %d entires", + "but personality table has only %d entries", encoding, personalityIndex, sectionHeader.personalityArrayCount()); return false; diff --git a/lib/libunwind/src/UnwindLevel1-gcc-ext.c b/lib/libunwind/src/UnwindLevel1-gcc-ext.c index 1c66265854..63e4083a45 100644 --- a/lib/libunwind/src/UnwindLevel1-gcc-ext.c +++ b/lib/libunwind/src/UnwindLevel1-gcc-ext.c @@ -1,9 +1,8 @@ //===--------------------- UnwindLevel1-gcc-ext.c -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Implements gcc extensions to the C++ ABI Exception Handling Level 1. @@ -94,10 +93,10 @@ _LIBUNWIND_EXPORT void *_Unwind_FindEnclosingFunction(void *pc) { unw_cursor_t cursor; unw_context_t uc; unw_proc_info_t info; - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); - unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t) pc); - if (unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS) + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); + __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); + if (__unw_get_proc_info(&cursor, &info) == UNW_ESUCCESS) return (void *)(intptr_t) info.start_ip; else return NULL; @@ -109,8 +108,8 @@ _LIBUNWIND_EXPORT _Unwind_Reason_Code _Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { unw_cursor_t cursor; unw_context_t uc; - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); _LIBUNWIND_TRACE_API("_Unwind_Backtrace(callback=%p)", (void *)(uintptr_t)callback); @@ -129,7 +128,7 @@ _Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { #if !defined(_LIBUNWIND_ARM_EHABI) // ask libunwind to get next frame (skip over first frame which is // _Unwind_Backtrace()) - if (unw_step(&cursor) <= 0) { + if (__unw_step(&cursor) <= 0) { _LIBUNWIND_TRACE_UNWINDING(" _backtrace: ended because cursor reached " "bottom of stack, returning %d", _URC_END_OF_STACK); @@ -138,7 +137,7 @@ _Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { #else // Get the information for this frame. unw_proc_info_t frameInfo; - if (unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) { + if (__unw_get_proc_info(&cursor, &frameInfo) != UNW_ESUCCESS) { return _URC_END_OF_STACK; } @@ -165,8 +164,8 @@ _Unwind_Backtrace(_Unwind_Trace_Fn callback, void *ref) { char functionName[512]; unw_proc_info_t frame; unw_word_t offset; - unw_get_proc_name(&cursor, functionName, 512, &offset); - unw_get_proc_info(&cursor, &frame); + __unw_get_proc_name(&cursor, functionName, 512, &offset); + __unw_get_proc_info(&cursor, &frame); _LIBUNWIND_TRACE_UNWINDING( " _backtrace: start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", context=%p", frame.start_ip, functionName, frame.lsda, @@ -192,10 +191,10 @@ _LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, unw_cursor_t cursor; unw_context_t uc; unw_proc_info_t info; - unw_getcontext(&uc); - unw_init_local(&cursor, &uc); - unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t) pc); - unw_get_proc_info(&cursor, &info); + __unw_getcontext(&uc); + __unw_init_local(&cursor, &uc); + __unw_set_reg(&cursor, UNW_REG_IP, (unw_word_t)(intptr_t)pc); + __unw_get_proc_info(&cursor, &info); bases->tbase = (uintptr_t)info.extra; bases->dbase = 0; // dbase not used on Mac OS X bases->func = (uintptr_t)info.start_ip; @@ -209,7 +208,7 @@ _LIBUNWIND_EXPORT const void *_Unwind_Find_FDE(const void *pc, _LIBUNWIND_EXPORT uintptr_t _Unwind_GetCFA(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_word_t result; - unw_get_reg(cursor, UNW_REG_SP, &result); + __unw_get_reg(cursor, UNW_REG_SP, &result); _LIBUNWIND_TRACE_API("_Unwind_GetCFA(context=%p) => 0x%" PRIxPTR, (void *)context, result); return (uintptr_t)result; @@ -234,7 +233,7 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIPInfo(struct _Unwind_Context *context, /// was broken until 10.6. _LIBUNWIND_EXPORT void __register_frame(const void *fde) { _LIBUNWIND_TRACE_API("__register_frame(%p)", fde); - _unw_add_dynamic_fde((unw_word_t)(uintptr_t) fde); + __unw_add_dynamic_fde((unw_word_t)(uintptr_t)fde); } @@ -244,7 +243,7 @@ _LIBUNWIND_EXPORT void __register_frame(const void *fde) { /// was broken until 10.6. _LIBUNWIND_EXPORT void __deregister_frame(const void *fde) { _LIBUNWIND_TRACE_API("__deregister_frame(%p)", fde); - _unw_remove_dynamic_fde((unw_word_t)(uintptr_t) fde); + __unw_remove_dynamic_fde((unw_word_t)(uintptr_t)fde); } diff --git a/lib/libunwind/src/UnwindLevel1.c b/lib/libunwind/src/UnwindLevel1.c index 0a04438bba..bcb1a7fbec 100644 --- a/lib/libunwind/src/UnwindLevel1.c +++ b/lib/libunwind/src/UnwindLevel1.c @@ -1,13 +1,12 @@ //===------------------------- UnwindLevel1.c -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Implements C++ ABI Exception Handling Level 1 as documented at: -// http://mentorembedded.github.io/cxx-abi/abi-eh.html +// https://itanium-cxx-abi.github.io/cxx-abi/abi-eh.html // using libunwind // //===----------------------------------------------------------------------===// @@ -26,9 +25,10 @@ #include #include -#include "libunwind.h" -#include "unwind.h" #include "config.h" +#include "libunwind.h" +#include "libunwind_ext.h" +#include "unwind.h" #if !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) @@ -36,33 +36,36 @@ static _Unwind_Reason_Code unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { - unw_init_local(cursor, uc); + __unw_init_local(cursor, uc); // Walk each frame looking for a place to stop. bool handlerNotFound = true; while (handlerNotFound) { // Ask libunwind to get next frame (skip over first which is // _Unwind_RaiseException). - int stepResult = unw_step(cursor); + int stepResult = __unw_step(cursor); if (stepResult == 0) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_step() reached " - "bottom => _URC_END_OF_STACK", - (void *)exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); return _URC_END_OF_STACK; } else if (stepResult < 0) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_step failed => " - "_URC_FATAL_PHASE1_ERROR", - (void *)exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_step failed => " + "_URC_FATAL_PHASE1_ERROR", + (void *)exception_object); return _URC_FATAL_PHASE1_ERROR; } // See if frame has code to run (has personality routine). unw_proc_info_t frameInfo; unw_word_t sp; - if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase1(ex_ojb=%p): unw_get_proc_info " - "failed => _URC_FATAL_PHASE1_ERROR", - (void *)exception_object); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase1(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); return _URC_FATAL_PHASE1_ERROR; } @@ -71,12 +74,12 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; unw_word_t pc; - unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_IP, &pc); _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): pc=0x%" PRIxPTR ", start_ip=0x%" PRIxPTR ", func=%s, lsda=0x%" PRIxPTR ", personality=0x%" PRIxPTR "", @@ -100,7 +103,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // found a catch clause or locals that need destructing in this frame // stop search and remember stack pointer at the frame handlerNotFound = false; - unw_get_reg(cursor, UNW_REG_SP, &sp); + __unw_get_reg(cursor, UNW_REG_SP, &sp); exception_object->private_2 = (uintptr_t)sp; _LIBUNWIND_TRACE_UNWINDING( "unwind_phase1(ex_ojb=%p): _URC_HANDLER_FOUND", @@ -129,7 +132,7 @@ unwind_phase1(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except static _Unwind_Reason_Code unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object) { - unw_init_local(cursor, uc); + __unw_init_local(cursor, uc); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p)", (void *)exception_object); @@ -139,27 +142,30 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // Ask libunwind to get next frame (skip over first which is // _Unwind_RaiseException). - int stepResult = unw_step(cursor); + int stepResult = __unw_step(cursor); if (stepResult == 0) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_step() reached " - "bottom => _URC_END_OF_STACK", - (void *)exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step() reached " + "bottom => _URC_END_OF_STACK", + (void *)exception_object); return _URC_END_OF_STACK; } else if (stepResult < 0) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_step failed => " - "_URC_FATAL_PHASE1_ERROR", - (void *)exception_object); + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_step failed => " + "_URC_FATAL_PHASE1_ERROR", + (void *)exception_object); return _URC_FATAL_PHASE2_ERROR; } // Get info about this frame. unw_word_t sp; unw_proc_info_t frameInfo; - unw_get_reg(cursor, UNW_REG_SP, &sp); - if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): unw_get_proc_info " - "failed => _URC_FATAL_PHASE1_ERROR", - (void *)exception_object); + __unw_get_reg(cursor, UNW_REG_SP, &sp); + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING( + "unwind_phase2(ex_ojb=%p): __unw_get_proc_info " + "failed => _URC_FATAL_PHASE1_ERROR", + (void *)exception_object); return _URC_FATAL_PHASE2_ERROR; } @@ -168,8 +174,8 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): start_ip=0x%" PRIxPTR @@ -212,15 +218,15 @@ unwind_phase2(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *except // We may get control back if landing pad calls _Unwind_Resume(). if (_LIBUNWIND_TRACING_UNWINDING) { unw_word_t pc; - unw_get_reg(cursor, UNW_REG_IP, &pc); - unw_get_reg(cursor, UNW_REG_SP, &sp); + __unw_get_reg(cursor, UNW_REG_IP, &pc); + __unw_get_reg(cursor, UNW_REG_SP, &sp); _LIBUNWIND_TRACE_UNWINDING("unwind_phase2(ex_ojb=%p): re-entering " "user code with ip=0x%" PRIxPTR ", sp=0x%" PRIxPTR, (void *)exception_object, pc, sp); } - unw_resume(cursor); - // unw_resume() only returns if there was an error. + __unw_resume(cursor); + // __unw_resume() only returns if there was an error. return _URC_FATAL_PHASE2_ERROR; default: // Personality routine returned an unknown result code. @@ -240,15 +246,15 @@ static _Unwind_Reason_Code unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, _Unwind_Exception *exception_object, _Unwind_Stop_Fn stop, void *stop_parameter) { - unw_init_local(cursor, uc); + __unw_init_local(cursor, uc); // Walk each frame until we reach where search phase said to stop - while (unw_step(cursor) > 0) { + while (__unw_step(cursor) > 0) { // Update info about this frame. unw_proc_info_t frameInfo; - if (unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { - _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): unw_step " + if (__unw_get_proc_info(cursor, &frameInfo) != UNW_ESUCCESS) { + _LIBUNWIND_TRACE_UNWINDING("unwind_phase2_forced(ex_ojb=%p): __unw_step " "failed => _URC_END_OF_STACK", (void *)exception_object); return _URC_FATAL_PHASE2_ERROR; @@ -259,8 +265,8 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, char functionBuf[512]; const char *functionName = functionBuf; unw_word_t offset; - if ((unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), - &offset) != UNW_ESUCCESS) || + if ((__unw_get_proc_name(cursor, functionBuf, sizeof(functionBuf), + &offset) != UNW_ESUCCESS) || (frameInfo.start_ip + offset > frameInfo.end_ip)) functionName = ".anonymous."; _LIBUNWIND_TRACE_UNWINDING( @@ -310,7 +316,7 @@ unwind_phase2_forced(unw_context_t *uc, unw_cursor_t *cursor, "_URC_INSTALL_CONTEXT", (void *)exception_object); // We may get control back if landing pad calls _Unwind_Resume(). - unw_resume(cursor); + __unw_resume(cursor); break; default: // Personality routine returned an unknown result code. @@ -346,7 +352,7 @@ _Unwind_RaiseException(_Unwind_Exception *exception_object) { (void *)exception_object); unw_context_t uc; unw_cursor_t cursor; - unw_getcontext(&uc); + __unw_getcontext(&uc); // Mark that this is a non-forced unwind, so _Unwind_Resume() // can do the right thing. @@ -380,7 +386,7 @@ _Unwind_Resume(_Unwind_Exception *exception_object) { _LIBUNWIND_TRACE_API("_Unwind_Resume(ex_obj=%p)", (void *)exception_object); unw_context_t uc; unw_cursor_t cursor; - unw_getcontext(&uc); + __unw_getcontext(&uc); if (exception_object->private_1 != 0) unwind_phase2_forced(&uc, &cursor, exception_object, @@ -405,7 +411,7 @@ _Unwind_ForcedUnwind(_Unwind_Exception *exception_object, (void *)exception_object, (void *)(uintptr_t)stop); unw_context_t uc; unw_cursor_t cursor; - unw_getcontext(&uc); + __unw_getcontext(&uc); // Mark that this is a forced unwind, so _Unwind_Resume() can do // the right thing. @@ -423,7 +429,7 @@ _Unwind_GetLanguageSpecificData(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; - if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.lsda; _LIBUNWIND_TRACE_API( "_Unwind_GetLanguageSpecificData(context=%p) => 0x%" PRIxPTR, @@ -444,7 +450,7 @@ _Unwind_GetRegionStart(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_proc_info_t frameInfo; uintptr_t result = 0; - if (unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) + if (__unw_get_proc_info(cursor, &frameInfo) == UNW_ESUCCESS) result = (uintptr_t)frameInfo.start_ip; _LIBUNWIND_TRACE_API("_Unwind_GetRegionStart(context=%p) => 0x%" PRIxPTR, (void *)context, result); @@ -469,7 +475,7 @@ _LIBUNWIND_EXPORT uintptr_t _Unwind_GetGR(struct _Unwind_Context *context, int index) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_word_t result; - unw_get_reg(cursor, index, &result); + __unw_get_reg(cursor, index, &result); _LIBUNWIND_TRACE_API("_Unwind_GetGR(context=%p, reg=%d) => 0x%" PRIxPTR, (void *)context, index, result); return (uintptr_t)result; @@ -482,14 +488,14 @@ _LIBUNWIND_EXPORT void _Unwind_SetGR(struct _Unwind_Context *context, int index, ")", (void *)context, index, value); unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_set_reg(cursor, index, value); + __unw_set_reg(cursor, index, value); } /// Called by personality handler during phase 2 to get instruction pointer. _LIBUNWIND_EXPORT uintptr_t _Unwind_GetIP(struct _Unwind_Context *context) { unw_cursor_t *cursor = (unw_cursor_t *)context; unw_word_t result; - unw_get_reg(cursor, UNW_REG_IP, &result); + __unw_get_reg(cursor, UNW_REG_IP, &result); _LIBUNWIND_TRACE_API("_Unwind_GetIP(context=%p) => 0x%" PRIxPTR, (void *)context, result); return (uintptr_t)result; @@ -503,7 +509,7 @@ _LIBUNWIND_EXPORT void _Unwind_SetIP(struct _Unwind_Context *context, _LIBUNWIND_TRACE_API("_Unwind_SetIP(context=%p, value=0x%0" PRIxPTR ")", (void *)context, value); unw_cursor_t *cursor = (unw_cursor_t *)context; - unw_set_reg(cursor, UNW_REG_IP, value); + __unw_set_reg(cursor, UNW_REG_IP, value); } #endif // !defined(_LIBUNWIND_ARM_EHABI) && !defined(__USING_SJLJ_EXCEPTIONS__) diff --git a/lib/libunwind/src/UnwindRegistersRestore.S b/lib/libunwind/src/UnwindRegistersRestore.S index 389db67579..8122bf3e1b 100644 --- a/lib/libunwind/src/UnwindRegistersRestore.S +++ b/lib/libunwind/src/UnwindRegistersRestore.S @@ -1,9 +1,8 @@ //===-------------------- UnwindRegistersRestore.S ------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -14,7 +13,7 @@ #if !defined(__USING_SJLJ_EXCEPTIONS__) #if defined(__i386__) -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv) # # void libunwind::Registers_x86::jumpto() # @@ -61,7 +60,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_x866jumptoEv) #elif defined(__x86_64__) -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv) # # void libunwind::Registers_x86_64::jumpto() # @@ -130,7 +129,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind16Registers_x86_646jumptoEv) #elif defined(__powerpc64__) -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_ppc646jumptoEv) // // void libunwind::Registers_ppc64::jumpto() // @@ -395,120 +394,120 @@ Lnovec: #elif defined(__ppc__) -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) -; -; void libunwind::Registers_ppc::jumpto() -; -; On entry: -; thread_state pointer is in r3 -; +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_ppc6jumptoEv) +// +// void libunwind::Registers_ppc::jumpto() +// +// On entry: +// thread_state pointer is in r3 +// - ; restore integral registerrs - ; skip r0 for now - ; skip r1 for now - lwz r2, 16(r3) - ; skip r3 for now - ; skip r4 for now - ; skip r5 for now - lwz r6, 32(r3) - lwz r7, 36(r3) - lwz r8, 40(r3) - lwz r9, 44(r3) - lwz r10, 48(r3) - lwz r11, 52(r3) - lwz r12, 56(r3) - lwz r13, 60(r3) - lwz r14, 64(r3) - lwz r15, 68(r3) - lwz r16, 72(r3) - lwz r17, 76(r3) - lwz r18, 80(r3) - lwz r19, 84(r3) - lwz r20, 88(r3) - lwz r21, 92(r3) - lwz r22, 96(r3) - lwz r23,100(r3) - lwz r24,104(r3) - lwz r25,108(r3) - lwz r26,112(r3) - lwz r27,116(r3) - lwz r28,120(r3) - lwz r29,124(r3) - lwz r30,128(r3) - lwz r31,132(r3) + // restore integral registerrs + // skip r0 for now + // skip r1 for now + lwz %r2, 16(%r3) + // skip r3 for now + // skip r4 for now + // skip r5 for now + lwz %r6, 32(%r3) + lwz %r7, 36(%r3) + lwz %r8, 40(%r3) + lwz %r9, 44(%r3) + lwz %r10, 48(%r3) + lwz %r11, 52(%r3) + lwz %r12, 56(%r3) + lwz %r13, 60(%r3) + lwz %r14, 64(%r3) + lwz %r15, 68(%r3) + lwz %r16, 72(%r3) + lwz %r17, 76(%r3) + lwz %r18, 80(%r3) + lwz %r19, 84(%r3) + lwz %r20, 88(%r3) + lwz %r21, 92(%r3) + lwz %r22, 96(%r3) + lwz %r23,100(%r3) + lwz %r24,104(%r3) + lwz %r25,108(%r3) + lwz %r26,112(%r3) + lwz %r27,116(%r3) + lwz %r28,120(%r3) + lwz %r29,124(%r3) + lwz %r30,128(%r3) + lwz %r31,132(%r3) - ; restore float registers - lfd f0, 160(r3) - lfd f1, 168(r3) - lfd f2, 176(r3) - lfd f3, 184(r3) - lfd f4, 192(r3) - lfd f5, 200(r3) - lfd f6, 208(r3) - lfd f7, 216(r3) - lfd f8, 224(r3) - lfd f9, 232(r3) - lfd f10,240(r3) - lfd f11,248(r3) - lfd f12,256(r3) - lfd f13,264(r3) - lfd f14,272(r3) - lfd f15,280(r3) - lfd f16,288(r3) - lfd f17,296(r3) - lfd f18,304(r3) - lfd f19,312(r3) - lfd f20,320(r3) - lfd f21,328(r3) - lfd f22,336(r3) - lfd f23,344(r3) - lfd f24,352(r3) - lfd f25,360(r3) - lfd f26,368(r3) - lfd f27,376(r3) - lfd f28,384(r3) - lfd f29,392(r3) - lfd f30,400(r3) - lfd f31,408(r3) + // restore float registers + lfd %f0, 160(%r3) + lfd %f1, 168(%r3) + lfd %f2, 176(%r3) + lfd %f3, 184(%r3) + lfd %f4, 192(%r3) + lfd %f5, 200(%r3) + lfd %f6, 208(%r3) + lfd %f7, 216(%r3) + lfd %f8, 224(%r3) + lfd %f9, 232(%r3) + lfd %f10,240(%r3) + lfd %f11,248(%r3) + lfd %f12,256(%r3) + lfd %f13,264(%r3) + lfd %f14,272(%r3) + lfd %f15,280(%r3) + lfd %f16,288(%r3) + lfd %f17,296(%r3) + lfd %f18,304(%r3) + lfd %f19,312(%r3) + lfd %f20,320(%r3) + lfd %f21,328(%r3) + lfd %f22,336(%r3) + lfd %f23,344(%r3) + lfd %f24,352(%r3) + lfd %f25,360(%r3) + lfd %f26,368(%r3) + lfd %f27,376(%r3) + lfd %f28,384(%r3) + lfd %f29,392(%r3) + lfd %f30,400(%r3) + lfd %f31,408(%r3) - ; restore vector registers if any are in use - lwz r5,156(r3) ; test VRsave - cmpwi r5,0 - beq Lnovec - - subi r4,r1,16 - rlwinm r4,r4,0,0,27 ; mask low 4-bits - ; r4 is now a 16-byte aligned pointer into the red zone - ; the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer + // restore vector registers if any are in use + lwz %r5, 156(%r3) // test VRsave + cmpwi %r5, 0 + beq Lnovec + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone + // the _vectorRegisters may not be 16-byte aligned so copy via red zone temp buffer + #define LOAD_VECTOR_UNALIGNEDl(_index) \ - andis. r0,r5,(1<<(15-_index)) @\ - beq Ldone ## _index @\ - lwz r0, 424+_index*16(r3) @\ - stw r0, 0(r4) @\ - lwz r0, 424+_index*16+4(r3) @\ - stw r0, 4(r4) @\ - lwz r0, 424+_index*16+8(r3) @\ - stw r0, 8(r4) @\ - lwz r0, 424+_index*16+12(r3)@\ - stw r0, 12(r4) @\ - lvx v ## _index,0,r4 @\ -Ldone ## _index: + andis. %r0, %r5, (1<<(15-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: #define LOAD_VECTOR_UNALIGNEDh(_index) \ - andi. r0,r5,(1<<(31-_index)) @\ - beq Ldone ## _index @\ - lwz r0, 424+_index*16(r3) @\ - stw r0, 0(r4) @\ - lwz r0, 424+_index*16+4(r3) @\ - stw r0, 4(r4) @\ - lwz r0, 424+_index*16+8(r3) @\ - stw r0, 8(r4) @\ - lwz r0, 424+_index*16+12(r3)@\ - stw r0, 12(r4) @\ - lvx v ## _index,0,r4 @\ - Ldone ## _index: + andi. %r0, %r5, (1<<(31-_index)) SEPARATOR \ + beq Ldone ## _index SEPARATOR \ + lwz %r0, 424+_index*16(%r3) SEPARATOR \ + stw %r0, 0(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+4(%r3) SEPARATOR \ + stw %r0, 4(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+8(%r3) SEPARATOR \ + stw %r0, 8(%r4) SEPARATOR \ + lwz %r0, 424+_index*16+12(%r3) SEPARATOR \ + stw %r0, 12(%r4) SEPARATOR \ + lvx %v ## _index, 0, %r4 SEPARATOR \ + Ldone ## _index: LOAD_VECTOR_UNALIGNEDl(0) @@ -545,17 +544,17 @@ Ldone ## _index: LOAD_VECTOR_UNALIGNEDh(31) Lnovec: - lwz r0, 136(r3) ; __cr - mtocrf 255,r0 - lwz r0, 148(r3) ; __ctr - mtctr r0 - lwz r0, 0(r3) ; __ssr0 - mtctr r0 - lwz r0, 8(r3) ; do r0 now - lwz r5,28(r3) ; do r5 now - lwz r4,24(r3) ; do r4 now - lwz r1,12(r3) ; do sp now - lwz r3,20(r3) ; do r3 last + lwz %r0, 136(%r3) // __cr + mtcr %r0 + lwz %r0, 148(%r3) // __ctr + mtctr %r0 + lwz %r0, 0(%r3) // __ssr0 + mtctr %r0 + lwz %r0, 8(%r3) // do r0 now + lwz %r5, 28(%r3) // do r5 now + lwz %r4, 24(%r3) // do r4 now + lwz %r1, 12(%r3) // do sp now + lwz %r3, 20(%r3) // do r3 last bctr #elif defined(__arm64__) || defined(__aarch64__) @@ -567,7 +566,7 @@ Lnovec: // thread_state pointer is in x0 // .p2align 2 -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) // skip restore of x0,x1 for now ldp x2, x3, [x0, #0x010] ldp x4, x5, [x0, #0x020] @@ -576,7 +575,8 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) ldp x10,x11, [x0, #0x050] ldp x12,x13, [x0, #0x060] ldp x14,x15, [x0, #0x070] - ldp x16,x17, [x0, #0x080] + // x16 and x17 were clobbered by the call into the unwinder, so no point in + // restoring them. ldp x18,x19, [x0, #0x090] ldp x20,x21, [x0, #0x0A0] ldp x22,x23, [x0, #0x0B0] @@ -584,8 +584,6 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) ldp x26,x27, [x0, #0x0D0] ldp x28,x29, [x0, #0x0E0] ldr x30, [x0, #0x100] // restore pc into lr - ldr x1, [x0, #0x0F8] - mov sp,x1 // restore sp ldp d0, d1, [x0, #0x110] ldp d2, d3, [x0, #0x120] @@ -605,12 +603,21 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) ldr d30, [x0, #0x200] ldr d31, [x0, #0x208] + // Finally, restore sp. This must be done after the the last read from the + // context struct, because it is allocated on the stack, and an exception + // could clobber the de-allocated portion of the stack after sp has been + // restored. + ldr x16, [x0, #0x0F8] ldp x0, x1, [x0, #0x000] // restore x0,x1 + mov sp,x16 // restore sp ret x30 // jump to pc #elif defined(__arm__) && !defined(__APPLE__) #if !defined(__ARM_ARCH_ISA_ARM) +#if (__ARM_ARCH_ISA_THUMB == 2) + .syntax unified +#endif .thumb #endif @@ -621,7 +628,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_arm646jumptoEv) @ thread_state pointer is in r0 @ .p2align 2 -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJumpToEv) #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 @ r8-r11: ldm into r1-r4, then mov to r8-r11 adds r0, #0x20 @@ -659,7 +666,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm20restoreCoreAndJu #if defined(__ELF__) .fpu vfpv3-d16 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMDEPv) @ VFP and iwMMX instructions are only available when compiling with the flags @ that enable them. We do not want to do that in the library (because we do not @ want the compiler to generate instructions that access those) but this is @@ -680,7 +687,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFL #if defined(__ELF__) .fpu vfpv3-d16 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFLDMXEPv) vldmia r0, {d0-d15} @ fldmiax is deprecated in ARMv7+ and now behaves like vldmia JMP(lr) @@ -694,7 +701,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreVFPWithFL #if defined(__ELF__) .fpu vfpv3 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPv) vldmia r0, {d16-d31} JMP(lr) @@ -710,7 +717,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreVFPv3EPy) #if defined(__ELF__) .arch armv5te #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPv) ldcl p1, cr0, [r0], #8 @ wldrd wR0, [r0], #8 ldcl p1, cr1, [r0], #8 @ wldrd wR1, [r0], #8 ldcl p1, cr2, [r0], #8 @ wldrd wR2, [r0], #8 @@ -739,7 +746,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm12restoreiWMMXEPy) #if defined(__ELF__) .arch armv5te #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXControlEPj) ldc2 p1, cr8, [r0], #4 @ wldrw wCGR0, [r0], #4 ldc2 p1, cr9, [r0], #4 @ wldrw wCGR1, [r0], #4 ldc2 p1, cr10, [r0], #4 @ wldrw wCGR2, [r0], #4 @@ -750,7 +757,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm19restoreiWMMXCont #elif defined(__or1k__) -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv) # # void libunwind::Registers_or1k::jumpto() # @@ -809,7 +816,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind14Registers_or1k6jumptoEv) // On entry: // thread state pointer is in a0 ($4) // -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv) .set push .set noat .set noreorder @@ -918,7 +925,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind18Registers_mips_o326jumptoEv) // On entry: // thread state pointer is in a0 ($4) // -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) .set push .set noat .set noreorder @@ -1008,7 +1015,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind21Registers_mips_newabi6jumptoEv) // On entry: // thread_state pointer is in o0 // -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind15Registers_sparc6jumptoEv) ta 3 ldd [%o0 + 64], %l0 ldd [%o0 + 72], %l2 diff --git a/lib/libunwind/src/UnwindRegistersSave.S b/lib/libunwind/src/UnwindRegistersSave.S index 48ecb0aec7..54505e53ba 100644 --- a/lib/libunwind/src/UnwindRegistersSave.S +++ b/lib/libunwind/src/UnwindRegistersSave.S @@ -1,9 +1,8 @@ //===------------------------ UnwindRegistersSave.S -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -16,7 +15,7 @@ #if defined(__i386__) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # + + @@ -27,7 +26,7 @@ # +-----------------------+ <-- SP # + + # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) push %eax movl 8(%esp), %eax movl %ebx, 4(%eax) @@ -57,12 +56,12 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__x86_64__) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # thread_state pointer is in rdi # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) #if defined(_WIN64) #define PTR %rcx #define TMP %rdx @@ -119,12 +118,12 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__mips__) && defined(_ABIO32) && _MIPS_SIM == _ABIO32 # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # thread_state pointer is in a0 ($4) # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) .set push .set noat .set noreorder @@ -228,12 +227,12 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__mips64) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # thread_state pointer is in a0 ($4) # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) .set push .set noat .set noreorder @@ -318,21 +317,21 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) # elif defined(__mips__) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # Just trap for the time being. -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) teq $0, $0 #elif defined(__powerpc64__) // -// extern int unw_getcontext(unw_context_t* thread_state) +// extern int __unw_getcontext(unw_context_t* thread_state) // // On entry: // thread_state pointer is in r3 // -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) // store register (GPR) #define PPC64_STR(n) \ @@ -557,157 +556,157 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__ppc__) -; -; extern int unw_getcontext(unw_context_t* thread_state) -; -; On entry: -; thread_state pointer is in r3 -; -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) - stw r0, 8(r3) - mflr r0 - stw r0, 0(r3) ; store lr as ssr0 - stw r1, 12(r3) - stw r2, 16(r3) - stw r3, 20(r3) - stw r4, 24(r3) - stw r5, 28(r3) - stw r6, 32(r3) - stw r7, 36(r3) - stw r8, 40(r3) - stw r9, 44(r3) - stw r10, 48(r3) - stw r11, 52(r3) - stw r12, 56(r3) - stw r13, 60(r3) - stw r14, 64(r3) - stw r15, 68(r3) - stw r16, 72(r3) - stw r17, 76(r3) - stw r18, 80(r3) - stw r19, 84(r3) - stw r20, 88(r3) - stw r21, 92(r3) - stw r22, 96(r3) - stw r23,100(r3) - stw r24,104(r3) - stw r25,108(r3) - stw r26,112(r3) - stw r27,116(r3) - stw r28,120(r3) - stw r29,124(r3) - stw r30,128(r3) - stw r31,132(r3) +// +// extern int unw_getcontext(unw_context_t* thread_state) +// +// On entry: +// thread_state pointer is in r3 +// +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) + stw %r0, 8(%r3) + mflr %r0 + stw %r0, 0(%r3) // store lr as ssr0 + stw %r1, 12(%r3) + stw %r2, 16(%r3) + stw %r3, 20(%r3) + stw %r4, 24(%r3) + stw %r5, 28(%r3) + stw %r6, 32(%r3) + stw %r7, 36(%r3) + stw %r8, 40(%r3) + stw %r9, 44(%r3) + stw %r10, 48(%r3) + stw %r11, 52(%r3) + stw %r12, 56(%r3) + stw %r13, 60(%r3) + stw %r14, 64(%r3) + stw %r15, 68(%r3) + stw %r16, 72(%r3) + stw %r17, 76(%r3) + stw %r18, 80(%r3) + stw %r19, 84(%r3) + stw %r20, 88(%r3) + stw %r21, 92(%r3) + stw %r22, 96(%r3) + stw %r23,100(%r3) + stw %r24,104(%r3) + stw %r25,108(%r3) + stw %r26,112(%r3) + stw %r27,116(%r3) + stw %r28,120(%r3) + stw %r29,124(%r3) + stw %r30,128(%r3) + stw %r31,132(%r3) - ; save VRSave register - mfspr r0,256 - stw r0,156(r3) - ; save CR registers - mfcr r0 - stw r0,136(r3) - ; save CTR register - mfctr r0 - stw r0,148(r3) + // save VRSave register + mfspr %r0, 256 + stw %r0, 156(%r3) + // save CR registers + mfcr %r0 + stw %r0, 136(%r3) + // save CTR register + mfctr %r0 + stw %r0, 148(%r3) - ; save float registers - stfd f0, 160(r3) - stfd f1, 168(r3) - stfd f2, 176(r3) - stfd f3, 184(r3) - stfd f4, 192(r3) - stfd f5, 200(r3) - stfd f6, 208(r3) - stfd f7, 216(r3) - stfd f8, 224(r3) - stfd f9, 232(r3) - stfd f10,240(r3) - stfd f11,248(r3) - stfd f12,256(r3) - stfd f13,264(r3) - stfd f14,272(r3) - stfd f15,280(r3) - stfd f16,288(r3) - stfd f17,296(r3) - stfd f18,304(r3) - stfd f19,312(r3) - stfd f20,320(r3) - stfd f21,328(r3) - stfd f22,336(r3) - stfd f23,344(r3) - stfd f24,352(r3) - stfd f25,360(r3) - stfd f26,368(r3) - stfd f27,376(r3) - stfd f28,384(r3) - stfd f29,392(r3) - stfd f30,400(r3) - stfd f31,408(r3) + // save float registers + stfd %f0, 160(%r3) + stfd %f1, 168(%r3) + stfd %f2, 176(%r3) + stfd %f3, 184(%r3) + stfd %f4, 192(%r3) + stfd %f5, 200(%r3) + stfd %f6, 208(%r3) + stfd %f7, 216(%r3) + stfd %f8, 224(%r3) + stfd %f9, 232(%r3) + stfd %f10,240(%r3) + stfd %f11,248(%r3) + stfd %f12,256(%r3) + stfd %f13,264(%r3) + stfd %f14,272(%r3) + stfd %f15,280(%r3) + stfd %f16,288(%r3) + stfd %f17,296(%r3) + stfd %f18,304(%r3) + stfd %f19,312(%r3) + stfd %f20,320(%r3) + stfd %f21,328(%r3) + stfd %f22,336(%r3) + stfd %f23,344(%r3) + stfd %f24,352(%r3) + stfd %f25,360(%r3) + stfd %f26,368(%r3) + stfd %f27,376(%r3) + stfd %f28,384(%r3) + stfd %f29,392(%r3) + stfd %f30,400(%r3) + stfd %f31,408(%r3) - ; save vector registers + // save vector registers - subi r4,r1,16 - rlwinm r4,r4,0,0,27 ; mask low 4-bits - ; r4 is now a 16-byte aligned pointer into the red zone + subi %r4, %r1, 16 + rlwinm %r4, %r4, 0, 0, 27 // mask low 4-bits + // r4 is now a 16-byte aligned pointer into the red zone #define SAVE_VECTOR_UNALIGNED(_vec, _offset) \ - stvx _vec,0,r4 @\ - lwz r5, 0(r4) @\ - stw r5, _offset(r3) @\ - lwz r5, 4(r4) @\ - stw r5, _offset+4(r3) @\ - lwz r5, 8(r4) @\ - stw r5, _offset+8(r3) @\ - lwz r5, 12(r4) @\ - stw r5, _offset+12(r3) + stvx _vec, 0, %r4 SEPARATOR \ + lwz %r5, 0(%r4) SEPARATOR \ + stw %r5, _offset(%r3) SEPARATOR \ + lwz %r5, 4(%r4) SEPARATOR \ + stw %r5, _offset+4(%r3) SEPARATOR \ + lwz %r5, 8(%r4) SEPARATOR \ + stw %r5, _offset+8(%r3) SEPARATOR \ + lwz %r5, 12(%r4) SEPARATOR \ + stw %r5, _offset+12(%r3) - SAVE_VECTOR_UNALIGNED( v0, 424+0x000) - SAVE_VECTOR_UNALIGNED( v1, 424+0x010) - SAVE_VECTOR_UNALIGNED( v2, 424+0x020) - SAVE_VECTOR_UNALIGNED( v3, 424+0x030) - SAVE_VECTOR_UNALIGNED( v4, 424+0x040) - SAVE_VECTOR_UNALIGNED( v5, 424+0x050) - SAVE_VECTOR_UNALIGNED( v6, 424+0x060) - SAVE_VECTOR_UNALIGNED( v7, 424+0x070) - SAVE_VECTOR_UNALIGNED( v8, 424+0x080) - SAVE_VECTOR_UNALIGNED( v9, 424+0x090) - SAVE_VECTOR_UNALIGNED(v10, 424+0x0A0) - SAVE_VECTOR_UNALIGNED(v11, 424+0x0B0) - SAVE_VECTOR_UNALIGNED(v12, 424+0x0C0) - SAVE_VECTOR_UNALIGNED(v13, 424+0x0D0) - SAVE_VECTOR_UNALIGNED(v14, 424+0x0E0) - SAVE_VECTOR_UNALIGNED(v15, 424+0x0F0) - SAVE_VECTOR_UNALIGNED(v16, 424+0x100) - SAVE_VECTOR_UNALIGNED(v17, 424+0x110) - SAVE_VECTOR_UNALIGNED(v18, 424+0x120) - SAVE_VECTOR_UNALIGNED(v19, 424+0x130) - SAVE_VECTOR_UNALIGNED(v20, 424+0x140) - SAVE_VECTOR_UNALIGNED(v21, 424+0x150) - SAVE_VECTOR_UNALIGNED(v22, 424+0x160) - SAVE_VECTOR_UNALIGNED(v23, 424+0x170) - SAVE_VECTOR_UNALIGNED(v24, 424+0x180) - SAVE_VECTOR_UNALIGNED(v25, 424+0x190) - SAVE_VECTOR_UNALIGNED(v26, 424+0x1A0) - SAVE_VECTOR_UNALIGNED(v27, 424+0x1B0) - SAVE_VECTOR_UNALIGNED(v28, 424+0x1C0) - SAVE_VECTOR_UNALIGNED(v29, 424+0x1D0) - SAVE_VECTOR_UNALIGNED(v30, 424+0x1E0) - SAVE_VECTOR_UNALIGNED(v31, 424+0x1F0) + SAVE_VECTOR_UNALIGNED( %v0, 424+0x000) + SAVE_VECTOR_UNALIGNED( %v1, 424+0x010) + SAVE_VECTOR_UNALIGNED( %v2, 424+0x020) + SAVE_VECTOR_UNALIGNED( %v3, 424+0x030) + SAVE_VECTOR_UNALIGNED( %v4, 424+0x040) + SAVE_VECTOR_UNALIGNED( %v5, 424+0x050) + SAVE_VECTOR_UNALIGNED( %v6, 424+0x060) + SAVE_VECTOR_UNALIGNED( %v7, 424+0x070) + SAVE_VECTOR_UNALIGNED( %v8, 424+0x080) + SAVE_VECTOR_UNALIGNED( %v9, 424+0x090) + SAVE_VECTOR_UNALIGNED(%v10, 424+0x0A0) + SAVE_VECTOR_UNALIGNED(%v11, 424+0x0B0) + SAVE_VECTOR_UNALIGNED(%v12, 424+0x0C0) + SAVE_VECTOR_UNALIGNED(%v13, 424+0x0D0) + SAVE_VECTOR_UNALIGNED(%v14, 424+0x0E0) + SAVE_VECTOR_UNALIGNED(%v15, 424+0x0F0) + SAVE_VECTOR_UNALIGNED(%v16, 424+0x100) + SAVE_VECTOR_UNALIGNED(%v17, 424+0x110) + SAVE_VECTOR_UNALIGNED(%v18, 424+0x120) + SAVE_VECTOR_UNALIGNED(%v19, 424+0x130) + SAVE_VECTOR_UNALIGNED(%v20, 424+0x140) + SAVE_VECTOR_UNALIGNED(%v21, 424+0x150) + SAVE_VECTOR_UNALIGNED(%v22, 424+0x160) + SAVE_VECTOR_UNALIGNED(%v23, 424+0x170) + SAVE_VECTOR_UNALIGNED(%v24, 424+0x180) + SAVE_VECTOR_UNALIGNED(%v25, 424+0x190) + SAVE_VECTOR_UNALIGNED(%v26, 424+0x1A0) + SAVE_VECTOR_UNALIGNED(%v27, 424+0x1B0) + SAVE_VECTOR_UNALIGNED(%v28, 424+0x1C0) + SAVE_VECTOR_UNALIGNED(%v29, 424+0x1D0) + SAVE_VECTOR_UNALIGNED(%v30, 424+0x1E0) + SAVE_VECTOR_UNALIGNED(%v31, 424+0x1F0) - li r3, 0 ; return UNW_ESUCCESS + li %r3, 0 // return UNW_ESUCCESS blr #elif defined(__arm64__) || defined(__aarch64__) // -// extern int unw_getcontext(unw_context_t* thread_state) +// extern int __unw_getcontext(unw_context_t* thread_state) // // On entry: // thread_state pointer is in x0 // .p2align 2 -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) stp x0, x1, [x0, #0x000] stp x2, x3, [x0, #0x010] stp x4, x5, [x0, #0x020] @@ -751,11 +750,14 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__arm__) && !defined(__APPLE__) #if !defined(__ARM_ARCH_ISA_ARM) +#if (__ARM_ARCH_ISA_THUMB == 2) + .syntax unified +#endif .thumb #endif @ -@ extern int unw_getcontext(unw_context_t* thread_state) +@ extern int __unw_getcontext(unw_context_t* thread_state) @ @ On entry: @ thread_state pointer is in r0 @@ -764,10 +766,10 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) @ EHABI #7.4.5 notes that in general all VRS registers should be restored @ however this is very hard to do for VFP registers because it is unknown @ to the library how many registers are implemented by the architecture. -@ Instead, VFP registers are demand saved by logic external to unw_getcontext. +@ Instead, VFP registers are demand saved by logic external to __unw_getcontext. @ .p2align 2 -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) #if !defined(__ARM_ARCH_ISA_ARM) && __ARM_ARCH_ISA_THUMB == 1 stm r0!, {r0-r7} mov r1, r8 @@ -808,7 +810,7 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #if defined(__ELF__) .fpu vfpv3-d16 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMDEPv) vstmia r0, {d0-d15} JMP(lr) @@ -822,7 +824,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMD #if defined(__ELF__) .fpu vfpv3-d16 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMXEPv) vstmia r0, {d0-d15} @ fstmiax is deprecated in ARMv7+ and now behaves like vstmia JMP(lr) @@ -836,7 +838,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveVFPWithFSTMX #if defined(__ELF__) .fpu vfpv3 #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPv) @ VFP and iwMMX instructions are only available when compiling with the flags @ that enable them. We do not want to do that in the library (because we do not @ want the compiler to generate instructions that access those) but this is @@ -859,7 +861,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveVFPv3EPy) #if defined(__ELF__) .arch armv5te #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPy) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPv) stcl p1, cr0, [r0], #8 @ wstrd wR0, [r0], #8 stcl p1, cr1, [r0], #8 @ wstrd wR1, [r0], #8 stcl p1, cr2, [r0], #8 @ wstrd wR2, [r0], #8 @@ -888,7 +890,7 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm9saveiWMMXEPy) #if defined(__ELF__) .arch armv5te #endif -DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj) +DEFINE_LIBUNWIND_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControlEPj) stc2 p1, cr8, [r0], #4 @ wstrw wCGR0, [r0], #4 stc2 p1, cr9, [r0], #4 @ wstrw wCGR1, [r0], #4 stc2 p1, cr10, [r0], #4 @ wstrw wCGR2, [r0], #4 @@ -900,12 +902,12 @@ DEFINE_LIBUNWIND_PRIVATE_FUNCTION(_ZN9libunwind13Registers_arm16saveiWMMXControl #elif defined(__or1k__) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # thread_state pointer is in r3 # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) l.sw 0(r3), r0 l.sw 4(r3), r1 l.sw 8(r3), r2 @@ -946,12 +948,12 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) #elif defined(__sparc__) # -# extern int unw_getcontext(unw_context_t* thread_state) +# extern int __unw_getcontext(unw_context_t* thread_state) # # On entry: # thread_state pointer is in o0 # -DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) +DEFINE_LIBUNWIND_FUNCTION(__unw_getcontext) ta 3 add %o7, 8, %o7 std %g0, [%o0 + 0] @@ -973,6 +975,9 @@ DEFINE_LIBUNWIND_FUNCTION(unw_getcontext) jmp %o7 clr %o0 // return UNW_ESUCCESS #endif + + WEAK_ALIAS(__unw_getcontext, unw_getcontext) + #endif /* !defined(__USING_SJLJ_EXCEPTIONS__) */ NO_EXEC_STACK_DIRECTIVE diff --git a/lib/libunwind/src/Unwind_AppleExtras.cpp b/lib/libunwind/src/Unwind_AppleExtras.cpp index 39f379c529..248d99570e 100644 --- a/lib/libunwind/src/Unwind_AppleExtras.cpp +++ b/lib/libunwind/src/Unwind_AppleExtras.cpp @@ -1,9 +1,8 @@ //===--------------------- Unwind_AppleExtras.cpp -------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // //===----------------------------------------------------------------------===// diff --git a/lib/libunwind/src/assembly.h b/lib/libunwind/src/assembly.h index 2df930214f..7132b6c561 100644 --- a/lib/libunwind/src/assembly.h +++ b/lib/libunwind/src/assembly.h @@ -1,9 +1,8 @@ /* ===-- assembly.h - libUnwind assembler support macros -------------------=== * - * The LLVM Compiler Infrastructure - * - * This file is dual licensed under the MIT and the University of Illinois Open - * Source Licenses. See LICENSE.TXT for details. + * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. + * See https://llvm.org/LICENSE.txt for license information. + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception * * ===----------------------------------------------------------------------=== * @@ -29,14 +28,26 @@ #ifdef _ARCH_PWR8 #define PPC64_HAS_VMX #endif -#elif defined(__POWERPC__) || defined(__powerpc__) || defined(__ppc__) -#define SEPARATOR @ #elif defined(__arm64__) #define SEPARATOR %% #else #define SEPARATOR ; #endif +#if defined(__powerpc64__) && (!defined(_CALL_ELF) || _CALL_ELF == 1) +#define PPC64_OPD1 .section .opd,"aw",@progbits SEPARATOR +#define PPC64_OPD2 SEPARATOR \ + .p2align 3 SEPARATOR \ + .quad .Lfunc_begin0 SEPARATOR \ + .quad .TOC.@tocbase SEPARATOR \ + .quad 0 SEPARATOR \ + .text SEPARATOR \ +.Lfunc_begin0: +#else +#define PPC64_OPD1 +#define PPC64_OPD2 +#endif + #define GLUE2(a, b) a ## b #define GLUE(a, b) GLUE2(a, b) #define SYMBOL_NAME(name) GLUE(__USER_LABEL_PREFIX__, name) @@ -46,6 +57,12 @@ #define SYMBOL_IS_FUNC(name) #define EXPORT_SYMBOL(name) #define HIDDEN_SYMBOL(name) .private_extern name +#define WEAK_SYMBOL(name) .weak_reference name +#define WEAK_ALIAS(name, aliasname) \ + .globl SYMBOL_NAME(aliasname) SEPARATOR \ + WEAK_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) + #define NO_EXEC_STACK_DIRECTIVE #elif defined(__ELF__) @@ -57,6 +74,10 @@ #endif #define EXPORT_SYMBOL(name) #define HIDDEN_SYMBOL(name) .hidden name +#define WEAK_SYMBOL(name) .weak name +#define WEAK_ALIAS(name, aliasname) \ + WEAK_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) #if defined(__GNU__) || defined(__FreeBSD__) || defined(__Fuchsia__) || \ defined(__linux__) @@ -72,9 +93,9 @@ .scl 2 SEPARATOR \ .type 32 SEPARATOR \ .endef -#define EXPORT_SYMBOL2(name) \ - .section .drectve,"yn" SEPARATOR \ - .ascii "-export:", #name, "\0" SEPARATOR \ +#define EXPORT_SYMBOL2(name) \ + .section .drectve,"yn" SEPARATOR \ + .ascii "-export:", #name, "\0" SEPARATOR \ .text #if defined(_LIBUNWIND_DISABLE_VISIBILITY_ANNOTATIONS) #define EXPORT_SYMBOL(name) @@ -83,6 +104,23 @@ #endif #define HIDDEN_SYMBOL(name) +#if defined(__MINGW32__) +#define WEAK_ALIAS(name, aliasname) \ + .globl SYMBOL_NAME(aliasname) SEPARATOR \ + EXPORT_SYMBOL(aliasname) SEPARATOR \ + SYMBOL_NAME(aliasname) = SYMBOL_NAME(name) +#else +#define WEAK_ALIAS3(name, aliasname) \ + .section .drectve,"yn" SEPARATOR \ + .ascii "-alternatename:", #aliasname, "=", #name, "\0" SEPARATOR \ + .text +#define WEAK_ALIAS2(name, aliasname) \ + WEAK_ALIAS3(name, aliasname) +#define WEAK_ALIAS(name, aliasname) \ + EXPORT_SYMBOL(SYMBOL_NAME(aliasname)) SEPARATOR \ + WEAK_ALIAS2(SYMBOL_NAME(name), SYMBOL_NAME(aliasname)) +#endif + #define NO_EXEC_STACK_DIRECTIVE #elif defined(__sparc__) @@ -93,17 +131,13 @@ #endif -#define DEFINE_LIBUNWIND_FUNCTION(name) \ - .globl SYMBOL_NAME(name) SEPARATOR \ - EXPORT_SYMBOL(name) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - SYMBOL_NAME(name): - -#define DEFINE_LIBUNWIND_PRIVATE_FUNCTION(name) \ - .globl SYMBOL_NAME(name) SEPARATOR \ - HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \ - SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ - SYMBOL_NAME(name): +#define DEFINE_LIBUNWIND_FUNCTION(name) \ + .globl SYMBOL_NAME(name) SEPARATOR \ + HIDDEN_SYMBOL(SYMBOL_NAME(name)) SEPARATOR \ + SYMBOL_IS_FUNC(SYMBOL_NAME(name)) SEPARATOR \ + PPC64_OPD1 \ + SYMBOL_NAME(name): \ + PPC64_OPD2 #if defined(__arm__) #if !defined(__ARM_ARCH) diff --git a/lib/libunwind/src/config.h b/lib/libunwind/src/config.h index fc7fd64670..09bb261647 100644 --- a/lib/libunwind/src/config.h +++ b/lib/libunwind/src/config.h @@ -1,9 +1,8 @@ //===----------------------------- config.h -------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Defines macros used within libunwind project. @@ -63,6 +62,35 @@ #endif #endif +#define STR(a) #a +#define XSTR(a) STR(a) +#define SYMBOL_NAME(name) XSTR(__USER_LABEL_PREFIX__) #name + +#if defined(__APPLE__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + __asm__(".globl " SYMBOL_NAME(aliasname)); \ + __asm__(SYMBOL_NAME(aliasname) " = " SYMBOL_NAME(name)); \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((weak_import)); +#elif defined(__ELF__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((weak, alias(#name))); +#elif defined(_WIN32) +#if defined(__MINGW32__) +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname \ + __attribute__((alias(#name))); +#else +#define _LIBUNWIND_WEAK_ALIAS(name, aliasname) \ + __pragma(comment(linker, "/alternatename:" SYMBOL_NAME(aliasname) "=" \ + SYMBOL_NAME(name))) \ + extern "C" _LIBUNWIND_EXPORT __typeof(name) aliasname; +#endif +#else +#error Unsupported target +#endif + #if (defined(__APPLE__) && defined(__arm__)) || defined(__USING_SJLJ_EXCEPTIONS__) #define _LIBUNWIND_BUILD_SJLJ_APIS #endif diff --git a/lib/libunwind/src/dwarf2.h b/lib/libunwind/src/dwarf2.h index 58525eea04..40f0daf468 100644 --- a/lib/libunwind/src/dwarf2.h +++ b/lib/libunwind/src/dwarf2.h @@ -1,9 +1,8 @@ //===------------------------------- dwarf2.h -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libunwind/src/libunwind.cpp b/lib/libunwind/src/libunwind.cpp index f4d71a1863..c90032bd66 100644 --- a/lib/libunwind/src/libunwind.cpp +++ b/lib/libunwind/src/libunwind.cpp @@ -1,9 +1,8 @@ //===--------------------------- libunwind.cpp ----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Implements unw_* functions from @@ -12,12 +11,6 @@ #include -#ifndef NDEBUG -#include // getenv -#endif -#include -#include - #include "libunwind_ext.h" #include "config.h" @@ -36,15 +29,11 @@ LocalAddressSpace LocalAddressSpace::sThisAddressSpace; _LIBUNWIND_EXPORT unw_addr_space_t unw_local_addr_space = (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace; -/// record the registers and stack position of the caller -extern int unw_getcontext(unw_context_t *); -// note: unw_getcontext() implemented in assembly - /// Create a cursor of a thread in this process given 'context' recorded by -/// unw_getcontext(). -_LIBUNWIND_EXPORT int unw_init_local(unw_cursor_t *cursor, - unw_context_t *context) { - _LIBUNWIND_TRACE_API("unw_init_local(cursor=%p, context=%p)", +/// __unw_getcontext(). +_LIBUNWIND_HIDDEN int __unw_init_local(unw_cursor_t *cursor, + unw_context_t *context) { + _LIBUNWIND_TRACE_API("__unw_init_local(cursor=%p, context=%p)", static_cast(cursor), static_cast(context)); #if defined(__i386__) @@ -73,101 +62,21 @@ _LIBUNWIND_EXPORT int unw_init_local(unw_cursor_t *cursor, # error Architecture not supported #endif // Use "placement new" to allocate UnwindCursor in the cursor buffer. - new ((void *)cursor) UnwindCursor( - context, LocalAddressSpace::sThisAddressSpace); + new (reinterpret_cast *>(cursor)) + UnwindCursor( + context, LocalAddressSpace::sThisAddressSpace); #undef REGISTER_KIND AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; co->setInfoBasedOnIPRegister(); return UNW_ESUCCESS; } - -#ifdef UNW_REMOTE -/// Create a cursor into a thread in another process. -_LIBUNWIND_EXPORT int unw_init_remote_thread(unw_cursor_t *cursor, - unw_addr_space_t as, - void *arg) { - // special case: unw_init_remote(xx, unw_local_addr_space, xx) - if (as == (unw_addr_space_t)&LocalAddressSpace::sThisAddressSpace) - return unw_init_local(cursor, NULL); //FIXME - - // use "placement new" to allocate UnwindCursor in the cursor buffer - switch (as->cpuType) { - case CPU_TYPE_I386: - new ((void *)cursor) - UnwindCursor>, - Registers_x86>(((unw_addr_space_i386 *)as)->oas, arg); - break; - case CPU_TYPE_X86_64: - new ((void *)cursor) - UnwindCursor>, - Registers_x86_64>(((unw_addr_space_x86_64 *)as)->oas, arg); - break; - case CPU_TYPE_POWERPC: - new ((void *)cursor) - UnwindCursor>, - Registers_ppc>(((unw_addr_space_ppc *)as)->oas, arg); - break; - default: - return UNW_EUNSPEC; - } - return UNW_ESUCCESS; -} - - -static bool is64bit(task_t task) { - return false; // FIXME -} - -/// Create an address_space object for use in examining another task. -_LIBUNWIND_EXPORT unw_addr_space_t unw_create_addr_space_for_task(task_t task) { -#if __i386__ - if (is64bit(task)) { - unw_addr_space_x86_64 *as = new unw_addr_space_x86_64(task); - as->taskPort = task; - as->cpuType = CPU_TYPE_X86_64; - //as->oas - } else { - unw_addr_space_i386 *as = new unw_addr_space_i386(task); - as->taskPort = task; - as->cpuType = CPU_TYPE_I386; - //as->oas - } -#else -// FIXME -#endif -} - - -/// Delete an address_space object. -_LIBUNWIND_EXPORT void unw_destroy_addr_space(unw_addr_space_t asp) { - switch (asp->cpuType) { -#if __i386__ || __x86_64__ - case CPU_TYPE_I386: { - unw_addr_space_i386 *as = (unw_addr_space_i386 *)asp; - delete as; - } - break; - case CPU_TYPE_X86_64: { - unw_addr_space_x86_64 *as = (unw_addr_space_x86_64 *)asp; - delete as; - } - break; -#endif - case CPU_TYPE_POWERPC: { - unw_addr_space_ppc *as = (unw_addr_space_ppc *)asp; - delete as; - } - break; - } -} -#endif // UNW_REMOTE - +_LIBUNWIND_WEAK_ALIAS(__unw_init_local, unw_init_local) /// Get value of specified register at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum, - unw_word_t *value) { - _LIBUNWIND_TRACE_API("unw_get_reg(cursor=%p, regNum=%d, &value=%p)", +_LIBUNWIND_HIDDEN int __unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_word_t *value) { + _LIBUNWIND_TRACE_API("__unw_get_reg(cursor=%p, regNum=%d, &value=%p)", static_cast(cursor), regNum, static_cast(value)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; @@ -177,12 +86,13 @@ _LIBUNWIND_EXPORT int unw_get_reg(unw_cursor_t *cursor, unw_regnum_t regNum, } return UNW_EBADREG; } - +_LIBUNWIND_WEAK_ALIAS(__unw_get_reg, unw_get_reg) /// Set value of specified register at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, - unw_word_t value) { - _LIBUNWIND_TRACE_API("unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR ")", +_LIBUNWIND_HIDDEN int __unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_word_t value) { + _LIBUNWIND_TRACE_API("__unw_set_reg(cursor=%p, regNum=%d, value=0x%" PRIxPTR + ")", static_cast(cursor), regNum, value); typedef LocalAddressSpace::pint_t pint_t; AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; @@ -208,12 +118,12 @@ _LIBUNWIND_EXPORT int unw_set_reg(unw_cursor_t *cursor, unw_regnum_t regNum, } return UNW_EBADREG; } - +_LIBUNWIND_WEAK_ALIAS(__unw_set_reg, unw_set_reg) /// Get value of specified float register at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, - unw_fpreg_t *value) { - _LIBUNWIND_TRACE_API("unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)", +_LIBUNWIND_HIDDEN int __unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_fpreg_t *value) { + _LIBUNWIND_TRACE_API("__unw_get_fpreg(cursor=%p, regNum=%d, &value=%p)", static_cast(cursor), regNum, static_cast(value)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; @@ -223,16 +133,16 @@ _LIBUNWIND_EXPORT int unw_get_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, } return UNW_EBADREG; } - +_LIBUNWIND_WEAK_ALIAS(__unw_get_fpreg, unw_get_fpreg) /// Set value of specified float register at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, - unw_fpreg_t value) { +_LIBUNWIND_HIDDEN int __unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, + unw_fpreg_t value) { #if defined(_LIBUNWIND_ARM_EHABI) - _LIBUNWIND_TRACE_API("unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)", + _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%llX)", static_cast(cursor), regNum, value); #else - _LIBUNWIND_TRACE_API("unw_set_fpreg(cursor=%p, regNum=%d, value=%g)", + _LIBUNWIND_TRACE_API("__unw_set_fpreg(cursor=%p, regNum=%d, value=%g)", static_cast(cursor), regNum, value); #endif AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; @@ -242,20 +152,20 @@ _LIBUNWIND_EXPORT int unw_set_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum, } return UNW_EBADREG; } - +_LIBUNWIND_WEAK_ALIAS(__unw_set_fpreg, unw_set_fpreg) /// Move cursor to next frame. -_LIBUNWIND_EXPORT int unw_step(unw_cursor_t *cursor) { - _LIBUNWIND_TRACE_API("unw_step(cursor=%p)", static_cast(cursor)); +_LIBUNWIND_HIDDEN int __unw_step(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_step(cursor=%p)", static_cast(cursor)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->step(); } - +_LIBUNWIND_WEAK_ALIAS(__unw_step, unw_step) /// Get unwind info at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_get_proc_info(unw_cursor_t *cursor, - unw_proc_info_t *info) { - _LIBUNWIND_TRACE_API("unw_get_proc_info(cursor=%p, &info=%p)", +_LIBUNWIND_HIDDEN int __unw_get_proc_info(unw_cursor_t *cursor, + unw_proc_info_t *info) { + _LIBUNWIND_TRACE_API("__unw_get_proc_info(cursor=%p, &info=%p)", static_cast(cursor), static_cast(info)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; co->getInfo(info); @@ -264,21 +174,21 @@ _LIBUNWIND_EXPORT int unw_get_proc_info(unw_cursor_t *cursor, else return UNW_ESUCCESS; } - +_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_info, unw_get_proc_info) /// Resume execution at cursor position (aka longjump). -_LIBUNWIND_EXPORT int unw_resume(unw_cursor_t *cursor) { - _LIBUNWIND_TRACE_API("unw_resume(cursor=%p)", static_cast(cursor)); +_LIBUNWIND_HIDDEN int __unw_resume(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_resume(cursor=%p)", static_cast(cursor)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; co->jumpto(); return UNW_EUNSPEC; } - +_LIBUNWIND_WEAK_ALIAS(__unw_resume, unw_resume) /// Get name of function at cursor position in stack frame. -_LIBUNWIND_EXPORT int unw_get_proc_name(unw_cursor_t *cursor, char *buf, - size_t bufLen, unw_word_t *offset) { - _LIBUNWIND_TRACE_API("unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)", +_LIBUNWIND_HIDDEN int __unw_get_proc_name(unw_cursor_t *cursor, char *buf, + size_t bufLen, unw_word_t *offset) { + _LIBUNWIND_TRACE_API("__unw_get_proc_name(cursor=%p, &buf=%p, bufLen=%lu)", static_cast(cursor), static_cast(buf), static_cast(bufLen)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; @@ -287,58 +197,62 @@ _LIBUNWIND_EXPORT int unw_get_proc_name(unw_cursor_t *cursor, char *buf, else return UNW_EUNSPEC; } - +_LIBUNWIND_WEAK_ALIAS(__unw_get_proc_name, unw_get_proc_name) /// Checks if a register is a floating-point register. -_LIBUNWIND_EXPORT int unw_is_fpreg(unw_cursor_t *cursor, unw_regnum_t regNum) { - _LIBUNWIND_TRACE_API("unw_is_fpreg(cursor=%p, regNum=%d)", +_LIBUNWIND_HIDDEN int __unw_is_fpreg(unw_cursor_t *cursor, + unw_regnum_t regNum) { + _LIBUNWIND_TRACE_API("__unw_is_fpreg(cursor=%p, regNum=%d)", static_cast(cursor), regNum); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->validFloatReg(regNum); } - +_LIBUNWIND_WEAK_ALIAS(__unw_is_fpreg, unw_is_fpreg) /// Checks if a register is a floating-point register. -_LIBUNWIND_EXPORT const char *unw_regname(unw_cursor_t *cursor, - unw_regnum_t regNum) { - _LIBUNWIND_TRACE_API("unw_regname(cursor=%p, regNum=%d)", +_LIBUNWIND_HIDDEN const char *__unw_regname(unw_cursor_t *cursor, + unw_regnum_t regNum) { + _LIBUNWIND_TRACE_API("__unw_regname(cursor=%p, regNum=%d)", static_cast(cursor), regNum); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->getRegisterName(regNum); } - +_LIBUNWIND_WEAK_ALIAS(__unw_regname, unw_regname) /// Checks if current frame is signal trampoline. -_LIBUNWIND_EXPORT int unw_is_signal_frame(unw_cursor_t *cursor) { - _LIBUNWIND_TRACE_API("unw_is_signal_frame(cursor=%p)", +_LIBUNWIND_HIDDEN int __unw_is_signal_frame(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_is_signal_frame(cursor=%p)", static_cast(cursor)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->isSignalFrame(); } +_LIBUNWIND_WEAK_ALIAS(__unw_is_signal_frame, unw_is_signal_frame) #ifdef __arm__ // Save VFP registers d0-d15 using FSTMIADX instead of FSTMIADD -_LIBUNWIND_EXPORT void unw_save_vfp_as_X(unw_cursor_t *cursor) { - _LIBUNWIND_TRACE_API("unw_fpreg_save_vfp_as_X(cursor=%p)", +_LIBUNWIND_HIDDEN void __unw_save_vfp_as_X(unw_cursor_t *cursor) { + _LIBUNWIND_TRACE_API("__unw_get_fpreg_save_vfp_as_X(cursor=%p)", static_cast(cursor)); AbstractUnwindCursor *co = (AbstractUnwindCursor *)cursor; return co->saveVFPAsX(); } +_LIBUNWIND_WEAK_ALIAS(__unw_save_vfp_as_X, unw_save_vfp_as_X) #endif #if defined(_LIBUNWIND_SUPPORT_DWARF_UNWIND) /// SPI: walks cached DWARF entries -_LIBUNWIND_EXPORT void unw_iterate_dwarf_unwind_cache(void (*func)( +_LIBUNWIND_HIDDEN void __unw_iterate_dwarf_unwind_cache(void (*func)( unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)) { - _LIBUNWIND_TRACE_API("unw_iterate_dwarf_unwind_cache(func=%p)", + _LIBUNWIND_TRACE_API("__unw_iterate_dwarf_unwind_cache(func=%p)", reinterpret_cast(func)); DwarfFDECache::iterateCacheEntries(func); } - +_LIBUNWIND_WEAK_ALIAS(__unw_iterate_dwarf_unwind_cache, + unw_iterate_dwarf_unwind_cache) /// IPI: for __register_frame() -void _unw_add_dynamic_fde(unw_word_t fde) { +void __unw_add_dynamic_fde(unw_word_t fde) { CFI_Parser::FDE_Info fdeInfo; CFI_Parser::CIE_Info cieInfo; const char *message = CFI_Parser::decodeFDE( @@ -352,12 +266,12 @@ void _unw_add_dynamic_fde(unw_word_t fde) { fdeInfo.pcStart, fdeInfo.pcEnd, fdeInfo.fdeStart); } else { - _LIBUNWIND_DEBUG_LOG("_unw_add_dynamic_fde: bad fde: %s", message); + _LIBUNWIND_DEBUG_LOG("__unw_add_dynamic_fde: bad fde: %s", message); } } /// IPI: for __deregister_frame() -void _unw_remove_dynamic_fde(unw_word_t fde) { +void __unw_remove_dynamic_fde(unw_word_t fde) { // fde is own mh_group DwarfFDECache::removeAllIn((LocalAddressSpace::pint_t)fde); } diff --git a/lib/libunwind/src/libunwind_ext.h b/lib/libunwind/src/libunwind_ext.h index ec645a8467..316dee2982 100644 --- a/lib/libunwind/src/libunwind_ext.h +++ b/lib/libunwind/src/libunwind_ext.h @@ -1,9 +1,8 @@ //===------------------------ libunwind_ext.h -----------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // // // Extensions to libunwind API. @@ -23,15 +22,34 @@ #ifdef __cplusplus extern "C" { #endif + +extern int __unw_getcontext(unw_context_t *); +extern int __unw_init_local(unw_cursor_t *, unw_context_t *); +extern int __unw_step(unw_cursor_t *); +extern int __unw_get_reg(unw_cursor_t *, unw_regnum_t, unw_word_t *); +extern int __unw_get_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t *); +extern int __unw_set_reg(unw_cursor_t *, unw_regnum_t, unw_word_t); +extern int __unw_set_fpreg(unw_cursor_t *, unw_regnum_t, unw_fpreg_t); +extern int __unw_resume(unw_cursor_t *); + +#ifdef __arm__ +/* Save VFP registers in FSTMX format (instead of FSTMD). */ +extern void __unw_save_vfp_as_X(unw_cursor_t *); +#endif + +extern const char *__unw_regname(unw_cursor_t *, unw_regnum_t); +extern int __unw_get_proc_info(unw_cursor_t *, unw_proc_info_t *); +extern int __unw_is_fpreg(unw_cursor_t *, unw_regnum_t); +extern int __unw_is_signal_frame(unw_cursor_t *); +extern int __unw_get_proc_name(unw_cursor_t *, char *, size_t, unw_word_t *); + // SPI -extern void unw_iterate_dwarf_unwind_cache(void (*func)(unw_word_t ip_start, - unw_word_t ip_end, - unw_word_t fde, - unw_word_t mh)); +extern void __unw_iterate_dwarf_unwind_cache(void (*func)( + unw_word_t ip_start, unw_word_t ip_end, unw_word_t fde, unw_word_t mh)); // IPI -extern void _unw_add_dynamic_fde(unw_word_t fde); -extern void _unw_remove_dynamic_fde(unw_word_t fde); +extern void __unw_add_dynamic_fde(unw_word_t fde); +extern void __unw_remove_dynamic_fde(unw_word_t fde); #if defined(_LIBUNWIND_ARM_EHABI) extern const uint32_t* decode_eht_entry(const uint32_t*, size_t*, size_t*); From 70e05c67ce6f044b5b99e01a5d972bbe92c38344 Mon Sep 17 00:00:00 2001 From: Andrew Kelley Date: Fri, 19 Jul 2019 16:55:59 -0400 Subject: [PATCH 13/31] update libcxx to llvm9 upstream commit 1931d3cb20a00da732c5210b123656632982fde0 --- lib/libcxx/include/__bit_reference | 35 +- lib/libcxx/include/__bsd_locale_defaults.h | 7 +- lib/libcxx/include/__bsd_locale_fallbacks.h | 7 +- lib/libcxx/include/__config | 287 ++- lib/libcxx/include/__config_site.in | 8 +- lib/libcxx/include/__debug | 55 +- lib/libcxx/include/__errc | 7 +- lib/libcxx/include/__functional_03 | 7 +- lib/libcxx/include/__functional_base | 17 +- lib/libcxx/include/__functional_base_03 | 7 +- lib/libcxx/include/__hash_table | 30 +- lib/libcxx/include/__libcpp_version | 2 +- lib/libcxx/include/__locale | 80 +- lib/libcxx/include/__mutex_base | 49 +- lib/libcxx/include/__node_handle | 12 +- lib/libcxx/include/__nullptr | 7 +- lib/libcxx/include/__sso_allocator | 7 +- lib/libcxx/include/__std_stream | 7 +- lib/libcxx/include/__string | 7 +- lib/libcxx/include/__threading_support | 19 +- lib/libcxx/include/__tree | 218 +-- lib/libcxx/include/__tuple | 68 +- lib/libcxx/include/__undef_macros | 7 +- lib/libcxx/include/algorithm | 459 +++-- lib/libcxx/include/any | 9 +- lib/libcxx/include/array | 36 +- lib/libcxx/include/atomic | 1080 ++++++++--- lib/libcxx/include/bit | 371 +++- lib/libcxx/include/bitset | 11 +- lib/libcxx/include/cassert | 7 +- lib/libcxx/include/ccomplex | 7 +- lib/libcxx/include/cctype | 7 +- lib/libcxx/include/cerrno | 7 +- lib/libcxx/include/cfenv | 7 +- lib/libcxx/include/cfloat | 7 +- lib/libcxx/include/charconv | 43 +- lib/libcxx/include/chrono | 109 +- lib/libcxx/include/cinttypes | 7 +- lib/libcxx/include/ciso646 | 7 +- lib/libcxx/include/climits | 7 +- lib/libcxx/include/clocale | 7 +- lib/libcxx/include/cmath | 35 +- lib/libcxx/include/codecvt | 7 +- lib/libcxx/include/compare | 7 +- lib/libcxx/include/complex | 11 +- lib/libcxx/include/complex.h | 7 +- lib/libcxx/include/condition_variable | 7 +- lib/libcxx/include/csetjmp | 7 +- lib/libcxx/include/csignal | 7 +- lib/libcxx/include/cstdarg | 7 +- lib/libcxx/include/cstdbool | 7 +- lib/libcxx/include/cstddef | 7 +- lib/libcxx/include/cstdint | 7 +- lib/libcxx/include/cstdio | 7 +- lib/libcxx/include/cstdlib | 7 +- lib/libcxx/include/cstring | 7 +- lib/libcxx/include/ctgmath | 7 +- lib/libcxx/include/ctime | 7 +- lib/libcxx/include/ctype.h | 7 +- lib/libcxx/include/cwchar | 7 +- lib/libcxx/include/cwctype | 7 +- lib/libcxx/include/deque | 31 +- lib/libcxx/include/errno.h | 7 +- lib/libcxx/include/exception | 21 +- lib/libcxx/include/experimental/__config | 15 +- lib/libcxx/include/experimental/__memory | 7 +- lib/libcxx/include/experimental/algorithm | 7 +- lib/libcxx/include/experimental/any | 21 - lib/libcxx/include/experimental/chrono | 21 - lib/libcxx/include/experimental/coroutine | 13 +- lib/libcxx/include/experimental/deque | 7 +- lib/libcxx/include/experimental/filesystem | 7 +- lib/libcxx/include/experimental/forward_list | 7 +- lib/libcxx/include/experimental/functional | 16 +- lib/libcxx/include/experimental/iterator | 7 +- lib/libcxx/include/experimental/list | 7 +- lib/libcxx/include/experimental/map | 7 +- .../include/experimental/memory_resource | 7 +- lib/libcxx/include/experimental/numeric | 21 - lib/libcxx/include/experimental/optional | 21 - .../include/experimental/propagate_const | 7 +- lib/libcxx/include/experimental/ratio | 21 - lib/libcxx/include/experimental/regex | 7 +- lib/libcxx/include/experimental/set | 7 +- lib/libcxx/include/experimental/simd | 7 +- lib/libcxx/include/experimental/string | 7 +- lib/libcxx/include/experimental/string_view | 21 - lib/libcxx/include/experimental/system_error | 21 - lib/libcxx/include/experimental/tuple | 21 - lib/libcxx/include/experimental/type_traits | 17 +- lib/libcxx/include/experimental/unordered_map | 7 +- lib/libcxx/include/experimental/unordered_set | 7 +- lib/libcxx/include/experimental/utility | 7 +- lib/libcxx/include/experimental/vector | 7 +- lib/libcxx/include/ext/__hash | 34 +- lib/libcxx/include/ext/hash_map | 78 +- lib/libcxx/include/ext/hash_set | 34 +- lib/libcxx/include/fenv.h | 116 ++ lib/libcxx/include/filesystem | 32 +- lib/libcxx/include/float.h | 7 +- lib/libcxx/include/forward_list | 77 +- lib/libcxx/include/fstream | 23 +- lib/libcxx/include/functional | 140 +- lib/libcxx/include/future | 21 +- lib/libcxx/include/initializer_list | 9 +- lib/libcxx/include/inttypes.h | 12 +- lib/libcxx/include/iomanip | 17 +- lib/libcxx/include/ios | 56 +- lib/libcxx/include/iosfwd | 7 +- lib/libcxx/include/iostream | 7 +- lib/libcxx/include/istream | 937 +++++----- lib/libcxx/include/iterator | 247 +-- lib/libcxx/include/limits | 9 +- lib/libcxx/include/limits.h | 7 +- lib/libcxx/include/list | 158 +- lib/libcxx/include/locale | 21 +- lib/libcxx/include/locale.h | 7 +- lib/libcxx/include/map | 104 +- lib/libcxx/include/math.h | 143 +- lib/libcxx/include/memory | 403 ++--- lib/libcxx/include/module.modulemap | 5 +- lib/libcxx/include/mutex | 26 +- lib/libcxx/include/new | 42 +- lib/libcxx/include/numeric | 84 +- lib/libcxx/include/optional | 114 +- lib/libcxx/include/ostream | 12 +- lib/libcxx/include/queue | 27 +- lib/libcxx/include/random | 23 +- lib/libcxx/include/ratio | 7 +- lib/libcxx/include/regex | 135 +- lib/libcxx/include/scoped_allocator | 7 +- lib/libcxx/include/set | 98 +- lib/libcxx/include/setjmp.h | 7 +- lib/libcxx/include/shared_mutex | 7 +- lib/libcxx/include/span | 251 +-- lib/libcxx/include/sstream | 9 +- lib/libcxx/include/stack | 17 +- lib/libcxx/include/stdbool.h | 7 +- lib/libcxx/include/stddef.h | 7 +- lib/libcxx/include/stdexcept | 36 +- lib/libcxx/include/stdint.h | 12 +- lib/libcxx/include/stdio.h | 7 +- lib/libcxx/include/stdlib.h | 41 +- lib/libcxx/include/streambuf | 7 +- lib/libcxx/include/string | 77 +- lib/libcxx/include/string.h | 7 +- lib/libcxx/include/string_view | 41 +- lib/libcxx/include/strstream | 7 +- .../include/support/android/locale_bionic.h | 7 +- lib/libcxx/include/support/fuchsia/xlocale.h | 7 +- lib/libcxx/include/support/ibm/limits.h | 7 +- .../include/support/ibm/locale_mgmt_aix.h | 7 +- lib/libcxx/include/support/ibm/support.h | 7 +- lib/libcxx/include/support/ibm/xlocale.h | 7 +- lib/libcxx/include/support/musl/xlocale.h | 7 +- lib/libcxx/include/support/newlib/xlocale.h | 7 +- .../include/support/solaris/floatingpoint.h | 7 +- lib/libcxx/include/support/solaris/wchar.h | 7 +- lib/libcxx/include/support/solaris/xlocale.h | 7 +- .../include/support/win32/limits_msvc_win32.h | 7 +- .../include/support/win32/locale_win32.h | 7 +- .../support/xlocale/__nop_locale_mgmt.h | 7 +- .../support/xlocale/__posix_l_fallback.h | 7 +- .../support/xlocale/__strtonum_fallback.h | 7 +- lib/libcxx/include/support/xlocale/xlocale.h | 0 lib/libcxx/include/system_error | 7 +- lib/libcxx/include/tgmath.h | 7 +- lib/libcxx/include/thread | 12 +- lib/libcxx/include/tuple | 155 +- lib/libcxx/include/type_traits | 1599 ++++------------- lib/libcxx/include/typeindex | 7 +- lib/libcxx/include/typeinfo | 318 ++-- lib/libcxx/include/unordered_map | 201 ++- lib/libcxx/include/unordered_set | 165 +- lib/libcxx/include/utility | 57 +- lib/libcxx/include/valarray | 32 +- lib/libcxx/include/variant | 82 +- lib/libcxx/include/vector | 42 +- lib/libcxx/include/version | 17 +- lib/libcxx/include/wchar.h | 7 +- lib/libcxx/include/wctype.h | 7 +- 181 files changed, 5595 insertions(+), 4782 deletions(-) delete mode 100644 lib/libcxx/include/experimental/any delete mode 100644 lib/libcxx/include/experimental/chrono delete mode 100644 lib/libcxx/include/experimental/numeric delete mode 100644 lib/libcxx/include/experimental/optional delete mode 100644 lib/libcxx/include/experimental/ratio delete mode 100644 lib/libcxx/include/experimental/string_view delete mode 100644 lib/libcxx/include/experimental/system_error delete mode 100644 lib/libcxx/include/experimental/tuple create mode 100644 lib/libcxx/include/fenv.h delete mode 100644 lib/libcxx/include/support/xlocale/xlocale.h diff --git a/lib/libcxx/include/__bit_reference b/lib/libcxx/include/__bit_reference index c208af2b4d..cce74fb502 100644 --- a/lib/libcxx/include/__bit_reference +++ b/lib/libcxx/include/__bit_reference @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -69,7 +68,7 @@ public: _LIBCPP_INLINE_VISIBILITY void flip() _NOEXCEPT {*__seg_ ^= __mask_;} _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, false> operator&() const _NOEXCEPT - {return __bit_iterator<_Cp, false>(__seg_, static_cast(__ctz(__mask_)));} + {return __bit_iterator<_Cp, false>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: _LIBCPP_INLINE_VISIBILITY __bit_reference(__storage_pointer __s, __storage_type __m) _NOEXCEPT @@ -141,7 +140,7 @@ public: {return static_cast(*__seg_ & __mask_);} _LIBCPP_INLINE_VISIBILITY __bit_iterator<_Cp, true> operator&() const _NOEXCEPT - {return __bit_iterator<_Cp, true>(__seg_, static_cast(__ctz(__mask_)));} + {return __bit_iterator<_Cp, true>(__seg_, static_cast(__libcpp_ctz(__mask_)));} private: _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR @@ -168,7 +167,7 @@ __find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = *__first.__seg_ & __m; if (__b) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(__b))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); if (__n == __dn) return __first + __n; __n -= __dn; @@ -177,14 +176,14 @@ __find_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) if (*__first.__seg_) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(*__first.__seg_))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(*__first.__seg_))); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = *__first.__seg_ & __m; if (__b) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(__b))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } return _It(__first.__seg_, static_cast(__n)); } @@ -204,7 +203,7 @@ __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); __storage_type __b = ~*__first.__seg_ & __m; if (__b) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(__b))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); if (__n == __dn) return __first + __n; __n -= __dn; @@ -215,7 +214,7 @@ __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type { __storage_type __b = ~*__first.__seg_; if (__b) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(__b))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } // do last partial word if (__n > 0) @@ -223,7 +222,7 @@ __find_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); __storage_type __b = ~*__first.__seg_ & __m; if (__b) - return _It(__first.__seg_, static_cast(_VSTD::__ctz(__b))); + return _It(__first.__seg_, static_cast(_VSTD::__libcpp_ctz(__b))); } return _It(__first.__seg_, static_cast(__n)); } @@ -255,18 +254,18 @@ __count_bool_true(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_type __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _VSTD::__popcount(*__first.__seg_ & __m); + __r = _VSTD::__libcpp_popcount(*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _VSTD::__popcount(*__first.__seg_); + __r += _VSTD::__libcpp_popcount(*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _VSTD::__popcount(*__first.__seg_ & __m); + __r += _VSTD::__libcpp_popcount(*__first.__seg_ & __m); } return __r; } @@ -286,18 +285,18 @@ __count_bool_false(__bit_iterator<_Cp, _IsConst> __first, typename _Cp::size_typ __storage_type __clz_f = static_cast<__storage_type>(__bits_per_word - __first.__ctz_); __storage_type __dn = _VSTD::min(__clz_f, __n); __storage_type __m = (~__storage_type(0) << __first.__ctz_) & (~__storage_type(0) >> (__clz_f - __dn)); - __r = _VSTD::__popcount(~*__first.__seg_ & __m); + __r = _VSTD::__libcpp_popcount(~*__first.__seg_ & __m); __n -= __dn; ++__first.__seg_; } // do middle whole words for (; __n >= __bits_per_word; ++__first.__seg_, __n -= __bits_per_word) - __r += _VSTD::__popcount(~*__first.__seg_); + __r += _VSTD::__libcpp_popcount(~*__first.__seg_); // do last partial word if (__n > 0) { __storage_type __m = ~__storage_type(0) >> (__bits_per_word - __n); - __r += _VSTD::__popcount(~*__first.__seg_ & __m); + __r += _VSTD::__libcpp_popcount(~*__first.__seg_ & __m); } return __r; } diff --git a/lib/libcxx/include/__bsd_locale_defaults.h b/lib/libcxx/include/__bsd_locale_defaults.h index cbc407d103..2ace2a21cb 100644 --- a/lib/libcxx/include/__bsd_locale_defaults.h +++ b/lib/libcxx/include/__bsd_locale_defaults.h @@ -1,10 +1,9 @@ // -*- C++ -*- //===---------------------- __bsd_locale_defaults.h -----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // The BSDs have lots of *_l functions. We don't want to define those symbols diff --git a/lib/libcxx/include/__bsd_locale_fallbacks.h b/lib/libcxx/include/__bsd_locale_fallbacks.h index 3097b01410..a807fe0395 100644 --- a/lib/libcxx/include/__bsd_locale_fallbacks.h +++ b/lib/libcxx/include/__bsd_locale_fallbacks.h @@ -1,10 +1,9 @@ // -*- C++ -*- //===---------------------- __bsd_locale_fallbacks.h ----------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // The BSDs have lots of *_l functions. This file provides reimplementations diff --git a/lib/libcxx/include/__config b/lib/libcxx/include/__config index f0bc3483fa..1ecced9f47 100644 --- a/lib/libcxx/include/__config +++ b/lib/libcxx/include/__config @@ -1,10 +1,9 @@ // -*- C++ -*- //===--------------------------- __config ---------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -33,12 +32,16 @@ # define _GNUC_VER_NEW 0 #endif -#define _LIBCPP_VERSION 8000 +#define _LIBCPP_VERSION 9000 #ifndef _LIBCPP_ABI_VERSION # define _LIBCPP_ABI_VERSION 1 #endif +#ifndef __STDC_HOSTED__ +# define _LIBCPP_FREESTANDING +#endif + #ifndef _LIBCPP_STD_VER # if __cplusplus <= 201103L # define _LIBCPP_STD_VER 11 @@ -93,10 +96,12 @@ // Enable optimized version of __do_get_(un)signed which avoids redundant copies. # define _LIBCPP_ABI_OPTIMIZED_LOCALE_NUM_GET // Use the smallest possible integer type to represent the index of the variant. -// Previously libc++ used "unsigned int" exclusivly. +// Previously libc++ used "unsigned int" exclusively. # define _LIBCPP_ABI_VARIANT_INDEX_TYPE_OPTIMIZATION // Unstable attempt to provide a more optimized std::function # define _LIBCPP_ABI_OPTIMIZED_FUNCTION +// All the regex constants must be distinct and nonzero. +# define _LIBCPP_ABI_REGEX_CONSTANTS_NONZERO #elif _LIBCPP_ABI_VERSION == 1 # if !defined(_LIBCPP_OBJECT_FORMAT_COFF) // Enable compiling copies of now inline methods into the dylib to support @@ -182,6 +187,10 @@ #define _LIBCPP_CLANG_VER 0 #endif +#if defined(_LIBCPP_COMPILER_GCC) && __cplusplus < 201103L +#error "libc++ does not support using GCC with C++03. Please enable C++11" +#endif + // FIXME: ABI detection should be done via compiler builtin macros. This // is just a placeholder until Clang implements such macros. For now assume // that Windows compilers pretending to be MSVC++ target the Microsoft ABI, @@ -201,6 +210,10 @@ # endif #endif +#if defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) +# define _LIBCPP_ABI_VCRUNTIME +#endif + // Need to detect which libc we're using if we're on Linux. #if defined(__linux__) # include @@ -257,7 +270,7 @@ # define _LIBCPP_WIN32API # define _LIBCPP_LITTLE_ENDIAN # define _LIBCPP_SHORT_WCHAR 1 -// Both MinGW and native MSVC provide a "MSVC"-like enviroment +// Both MinGW and native MSVC provide a "MSVC"-like environment # define _LIBCPP_MSVCRT_LIKE // If mingw not explicitly detected, assume using MS C runtime only if // a MS compatibility version is specified. @@ -298,7 +311,7 @@ // random data even when using sandboxing mechanisms such as chroots, // Capsicum, etc. # define _LIBCPP_USING_ARC4_RANDOM -#elif defined(__Fuchsia__) +#elif defined(__Fuchsia__) || defined(__wasi__) # define _LIBCPP_USING_GETENTROPY #elif defined(__native_client__) // NaCl's sandbox (which PNaCl also runs in) doesn't allow filesystem access, @@ -332,7 +345,7 @@ # if defined(__FreeBSD__) # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_C11_FEATURES -# elif defined(__Fuchsia__) +# elif defined(__Fuchsia__) || defined(__wasi__) # define _LIBCPP_HAS_QUICK_EXIT # define _LIBCPP_HAS_TIMESPEC_GET # define _LIBCPP_HAS_C11_FEATURES @@ -400,10 +413,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_NO_STRONG_ENUMS #endif -#if !(__has_feature(cxx_decltype)) -#define _LIBCPP_HAS_NO_DECLTYPE -#endif - #if __has_feature(cxx_attributes) # define _LIBCPP_NORETURN [[noreturn]] #else @@ -434,18 +443,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_NO_VARIADICS #endif -#if !(__has_feature(cxx_generalized_initializers)) -#define _LIBCPP_HAS_NO_GENERALIZED_INITIALIZERS -#endif - -#if __has_feature(is_base_of) -#define _LIBCPP_HAS_IS_BASE_OF -#endif - -#if __has_feature(is_final) -#define _LIBCPP_HAS_IS_FINAL -#endif - // Objective-C++ features (opt-in) #if __has_feature(objc_arc) #define _LIBCPP_HAS_OBJC_ARC @@ -455,10 +452,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_OBJC_ARC_WEAK #endif -#if !(__has_feature(cxx_constexpr)) -#define _LIBCPP_HAS_NO_CONSTEXPR -#endif - #if !(__has_feature(cxx_relaxed_constexpr)) #define _LIBCPP_HAS_NO_CXX14_CONSTEXPR #endif @@ -471,14 +464,6 @@ typedef __char32_t char32_t; #define _LIBCPP_HAS_NO_NOEXCEPT #endif -#if __has_feature(underlying_type) -#define _LIBCPP_UNDERLYING_TYPE(T) __underlying_type(T) -#endif - -#if __has_feature(is_literal) -#define _LIBCPP_IS_LITERAL(T) __is_literal(T) -#endif - #if !defined(_LIBCPP_HAS_NO_ASAN) && !__has_feature(address_sanitizer) #define _LIBCPP_HAS_NO_ASAN #endif @@ -510,69 +495,20 @@ typedef __char32_t char32_t; #define _LIBCPP_NORETURN __attribute__((noreturn)) -#if _GNUC_VER >= 407 -#define _LIBCPP_UNDERLYING_TYPE(T) __underlying_type(T) -#define _LIBCPP_IS_LITERAL(T) __is_literal_type(T) -#define _LIBCPP_HAS_IS_FINAL -#endif - -#if defined(__GNUC__) && _GNUC_VER >= 403 -#define _LIBCPP_HAS_IS_BASE_OF -#endif - #if !__EXCEPTIONS && !defined(_LIBCPP_NO_EXCEPTIONS) #define _LIBCPP_NO_EXCEPTIONS #endif -// constexpr was added to GCC in 4.6. -#if _GNUC_VER < 406 -# define _LIBCPP_HAS_NO_CONSTEXPR -// Can only use constexpr in c++11 mode. -#elif !defined(__GXX_EXPERIMENTAL_CXX0X__) && __cplusplus < 201103L -# define _LIBCPP_HAS_NO_CONSTEXPR -#endif - // Determine if GCC supports relaxed constexpr #if !defined(__cpp_constexpr) || __cpp_constexpr < 201304L #define _LIBCPP_HAS_NO_CXX14_CONSTEXPR #endif -// GCC 5 will support variable templates +// GCC 5 supports variable templates #if !defined(__cpp_variable_templates) || __cpp_variable_templates < 201304L #define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES #endif -#ifndef __GXX_EXPERIMENTAL_CXX0X__ - -#define _LIBCPP_HAS_NO_DECLTYPE -#define _LIBCPP_HAS_NO_NULLPTR -#define _LIBCPP_HAS_NO_UNICODE_CHARS -#define _LIBCPP_HAS_NO_VARIADICS -#define _LIBCPP_HAS_NO_RVALUE_REFERENCES -#define _LIBCPP_HAS_NO_STRONG_ENUMS -#define _LIBCPP_HAS_NO_NOEXCEPT - -#else // __GXX_EXPERIMENTAL_CXX0X__ - -#if _GNUC_VER < 403 -#define _LIBCPP_HAS_NO_RVALUE_REFERENCES -#endif - - -#if _GNUC_VER < 404 -#define _LIBCPP_HAS_NO_DECLTYPE -#define _LIBCPP_HAS_NO_UNICODE_CHARS -#define _LIBCPP_HAS_NO_VARIADICS -#define _LIBCPP_HAS_NO_GENERALIZED_INITIALIZERS -#endif // _GNUC_VER < 404 - -#if _GNUC_VER < 406 -#define _LIBCPP_HAS_NO_NOEXCEPT -#define _LIBCPP_HAS_NO_NULLPTR -#endif - -#endif // __GXX_EXPERIMENTAL_CXX0X__ - #if !defined(_LIBCPP_HAS_NO_ASAN) && !defined(__SANITIZE_ADDRESS__) #define _LIBCPP_HAS_NO_ASAN #endif @@ -597,16 +533,12 @@ typedef __char32_t char32_t; #error "MSVC versions prior to Visual Studio 2015 are not supported" #endif -#define _LIBCPP_HAS_IS_BASE_OF -#define _LIBCPP_HAS_NO_CONSTEXPR #define _LIBCPP_HAS_NO_CXX14_CONSTEXPR #define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES -#define _LIBCPP_HAS_NO_NOEXCEPT #define __alignof__ __alignof #define _LIBCPP_NORETURN __declspec(noreturn) #define _ALIGNAS(x) __declspec(align(x)) #define _ALIGNAS_TYPE(x) alignas(x) -#define _LIBCPP_HAS_NO_VARIADICS #define _LIBCPP_WEAK @@ -623,12 +555,7 @@ typedef __char32_t char32_t; #define _ATTRIBUTE(x) __attribute__((x)) #define _LIBCPP_NORETURN __attribute__((noreturn)) -#define _LIBCPP_HAS_NO_GENERALIZED_INITIALIZERS -#define _LIBCPP_HAS_NO_NOEXCEPT -#define _LIBCPP_HAS_NO_NULLPTR #define _LIBCPP_HAS_NO_UNICODE_CHARS -#define _LIBCPP_HAS_IS_BASE_OF -#define _LIBCPP_HAS_IS_FINAL #define _LIBCPP_HAS_NO_VARIABLE_TEMPLATES #if defined(_AIX) @@ -659,8 +586,13 @@ typedef __char32_t char32_t; # define _LIBCPP_EXPORTED_FROM_ABI #elif defined(_LIBCPP_BUILDING_LIBRARY) # define _LIBCPP_DLL_VIS __declspec(dllexport) -# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS -# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS +# if defined(__MINGW32__) +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS _LIBCPP_DLL_VIS +# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS +# else +# define _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS +# define _LIBCPP_CLASS_TEMPLATE_INSTANTIATION_VIS _LIBCPP_DLL_VIS +# endif # define _LIBCPP_OVERRIDABLE_FUNC_VIS _LIBCPP_DLL_VIS # define _LIBCPP_EXPORTED_FROM_ABI __declspec(dllexport) #else @@ -777,7 +709,7 @@ typedef __char32_t char32_t; #else // Try to approximate the effect of exclude_from_explicit_instantiation // (which is that entities are not assumed to be provided by explicit - // template instantitations in the dylib) by always inlining those entities. + // template instantiations in the dylib) by always inlining those entities. # define _LIBCPP_EXCLUDE_FROM_EXPLICIT_INSTANTIATION _LIBCPP_ALWAYS_INLINE #endif @@ -789,6 +721,16 @@ typedef __char32_t char32_t; # endif #endif +#ifndef _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT +# ifdef _LIBCPP_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos. +# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0 +#else +// TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398 +// And we should consider defaulting to OFF. +# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1 +#endif +#endif + #ifndef _LIBCPP_HIDE_FROM_ABI # if _LIBCPP_HIDE_FROM_ABI_PER_TU # define _LIBCPP_HIDE_FROM_ABI _LIBCPP_HIDDEN _LIBCPP_INTERNAL_LINKAGE @@ -843,22 +785,6 @@ _LIBCPP_BEGIN_NAMESPACE_STD _LIBCPP_END_NAMESPACE_STD # define _NOEXCEPT_(x) #endif -#if defined(_LIBCPP_DEBUG_USE_EXCEPTIONS) -# if !defined(_LIBCPP_DEBUG) -# error cannot use _LIBCPP_DEBUG_USE_EXCEPTIONS unless _LIBCPP_DEBUG is defined -# endif -# ifdef _LIBCPP_HAS_NO_NOEXCEPT -# define _NOEXCEPT_DEBUG -# define _NOEXCEPT_DEBUG_(x) -# else -# define _NOEXCEPT_DEBUG noexcept(false) -# define _NOEXCEPT_DEBUG_(x) noexcept(false) -# endif -#else -# define _NOEXCEPT_DEBUG _NOEXCEPT -# define _NOEXCEPT_DEBUG_(x) _NOEXCEPT_(x) -#endif - #ifdef _LIBCPP_HAS_NO_UNICODE_CHARS typedef unsigned short char16_t; typedef unsigned int char32_t; @@ -869,30 +795,11 @@ typedef unsigned int char32_t; #endif #ifdef _LIBCPP_CXX03_LANG -# if __has_extension(c_static_assert) -# define static_assert(__b, __m) _Static_assert(__b, __m) -# else -extern "C++" { -template struct __static_assert_test; -template <> struct __static_assert_test {}; -template struct __static_assert_check {}; -} -# define static_assert(__b, __m) \ - typedef __static_assert_check)> \ - _LIBCPP_CONCAT(__t, __LINE__) -# endif // __has_extension(c_static_assert) +# define static_assert(...) _Static_assert(__VA_ARGS__) +# define decltype(...) __decltype(__VA_ARGS__) #endif // _LIBCPP_CXX03_LANG -#ifdef _LIBCPP_HAS_NO_DECLTYPE -// GCC 4.6 provides __decltype in all standard modes. -# if __has_keyword(__decltype) || _LIBCPP_CLANG_VER >= 304 || _GNUC_VER >= 406 -# define decltype(__x) __decltype(__x) -# else -# define decltype(__x) __typeof__(__x) -# endif -#endif - -#ifdef _LIBCPP_HAS_NO_CONSTEXPR +#ifdef _LIBCPP_CXX03_LANG # define _LIBCPP_CONSTEXPR #else # define _LIBCPP_CONSTEXPR constexpr @@ -911,9 +818,9 @@ template struct __static_assert_check {}; #endif #ifdef __GNUC__ -# define _NOALIAS __attribute__((__malloc__)) +# define _LIBCPP_NOALIAS __attribute__((__malloc__)) #else -# define _NOALIAS +# define _LIBCPP_NOALIAS #endif #if __has_feature(cxx_explicit_conversions) || defined(__IBMCPP__) || \ @@ -966,10 +873,6 @@ template struct __static_assert_check {}; #define _LIBCPP_EXTERN_TEMPLATE2(...) extern template __VA_ARGS__; #endif -#if defined(__APPLE__) && defined(__LP64__) && !defined(__x86_64__) -#define _LIBCPP_NONUNIQUE_RTTI_BIT (1ULL << 63) -#endif - #if defined(__APPLE__) || defined(__FreeBSD__) || defined(_LIBCPP_MSVCRT_LIKE) || \ defined(__sun__) || defined(__NetBSD__) || defined(__CloudABI__) #define _LIBCPP_LOCALE__L_EXTENSIONS 1 @@ -990,13 +893,10 @@ template struct __static_assert_check {}; // for align_val_t were added in 19.12, aka VS 2017 version 15.3. #if defined(_LIBCPP_MSVCRT) && defined(_MSC_VER) && _MSC_VER < 1912 # define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION -#elif defined(_LIBCPP_ABI_MICROSOFT) && !defined(_LIBCPP_NO_VCRUNTIME) -# define _LIBCPP_DEFER_NEW_TO_VCRUNTIME -# if !defined(__cpp_aligned_new) - // We're defering to Microsoft's STL to provide aligned new et al. We don't - // have it unless the language feature test macro is defined. -# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION -# endif +#elif defined(_LIBCPP_ABI_VCRUNTIME) && !defined(__cpp_aligned_new) + // We're deferring to Microsoft's STL to provide aligned new et al. We don't + // have it unless the language feature test macro is defined. +# define _LIBCPP_HAS_NO_LIBRARY_ALIGNED_ALLOCATION #endif #if defined(__APPLE__) @@ -1025,8 +925,10 @@ template struct __static_assert_check {}; #endif // Deprecation macros. -// Deprecations warnings are only enabled when _LIBCPP_ENABLE_DEPRECATION_WARNINGS is defined. -#if defined(_LIBCPP_ENABLE_DEPRECATION_WARNINGS) +// +// Deprecations warnings are always enabled, except when users explicitly opt-out +// by defining _LIBCPP_DISABLE_DEPRECATION_WARNINGS. +#if !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS) # if __has_attribute(deprecated) # define _LIBCPP_DEPRECATED __attribute__ ((deprecated)) # elif _LIBCPP_STD_VER > 11 @@ -1128,6 +1030,12 @@ template struct __static_assert_check {}; #endif #endif +#if __has_attribute(no_destroy) +# define _LIBCPP_NO_DESTROY __attribute__((__no_destroy__)) +#else +# define _LIBCPP_NO_DESTROY +#endif + #ifndef _LIBCPP_HAS_NO_ASAN _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( const void *, const void *, const void *, const void *); @@ -1158,6 +1066,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( !defined(_LIBCPP_HAS_THREAD_API_EXTERNAL) # if defined(__FreeBSD__) || \ defined(__Fuchsia__) || \ + defined(__wasi__) || \ defined(__NetBSD__) || \ defined(__linux__) || \ defined(__GNU__) || \ @@ -1188,6 +1097,23 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( _LIBCPP_HAS_NO_THREADS is defined. #endif +// The Apple, glibc, and Bionic implementation of pthreads implements +// pthread_mutex_destroy as nop for regular mutexes. Additionally, Win32 +// mutexes have no destroy mechanism. +// TODO(EricWF): Enable this optimization on Apple and Bionic platforms after +// speaking to their respective stakeholders. +#if (defined(_LIBCPP_HAS_THREAD_API_PTHREAD) && defined(__GLIBC__)) \ + || defined(_LIBCPP_HAS_THREAD_API_WIN32) +# define _LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION +#endif + +// Destroying a condvar is a nop on Windows. +// TODO(EricWF): This is potentially true for some pthread implementations +// as well. +#if defined(_LIBCPP_HAS_THREAD_API_WIN32) +# define _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION +#endif + // Systems that use capability-based security (FreeBSD with Capsicum, // Nuxi CloudABI) may only provide local filesystem access (using *at()). // Functions like open(), rename(), unlink() and stat() should not be @@ -1204,7 +1130,7 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #endif #if defined(__BIONIC__) || defined(__CloudABI__) || \ - defined(__Fuchsia__) || defined(_LIBCPP_HAS_MUSL_LIBC) + defined(__Fuchsia__) || defined(__wasi__) || defined(_LIBCPP_HAS_MUSL_LIBC) #define _LIBCPP_PROVIDES_DEFAULT_RUNE_TABLE #endif @@ -1216,13 +1142,22 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #if __has_feature(cxx_atomic) || __has_extension(c_atomic) || __has_keyword(_Atomic) # define _LIBCPP_HAS_C_ATOMIC_IMP -#elif _GNUC_VER > 407 +#elif defined(_LIBCPP_COMPILER_GCC) # define _LIBCPP_HAS_GCC_ATOMIC_IMP #endif -#if (!defined(_LIBCPP_HAS_C_ATOMIC_IMP) && !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP)) \ +#if (!defined(_LIBCPP_HAS_C_ATOMIC_IMP) && \ + !defined(_LIBCPP_HAS_GCC_ATOMIC_IMP) && \ + !defined(_LIBCPP_HAS_EXTERNAL_ATOMIC_IMP)) \ || defined(_LIBCPP_HAS_NO_THREADS) -#define _LIBCPP_HAS_NO_ATOMIC_HEADER +# define _LIBCPP_HAS_NO_ATOMIC_HEADER +#else +# ifndef _LIBCPP_ATOMIC_FLAG_TYPE +# define _LIBCPP_ATOMIC_FLAG_TYPE bool +# endif +# ifdef _LIBCPP_FREESTANDING +# define _LIBCPP_ATOMIC_ONLY_USE_BUILTINS +# endif #endif #ifndef _LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK @@ -1250,6 +1185,10 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #define _LIBCPP_HAS_NO_BUILTIN_ADDRESSOF #endif +#if !__has_builtin(__builtin_is_constant_evaluated) && _GNUC_VER < 900 +#define _LIBCPP_HAS_NO_BUILTIN_IS_CONSTANT_EVALUATED +#endif + #if !defined(_LIBCPP_HAS_NO_OFF_T_FUNCTIONS) # if defined(_LIBCPP_MSVCRT) || defined(_NEWLIB_VERSION) # define _LIBCPP_HAS_NO_OFF_T_FUNCTIONS @@ -1277,6 +1216,21 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( # define _LIBCPP_FALLTHROUGH() ((void)0) #endif +#if __has_attribute(__nodebug__) +#define _LIBCPP_NODEBUG __attribute__((__nodebug__)) +#else +#define _LIBCPP_NODEBUG +#endif + +#ifndef _LIBCPP_NODEBUG_TYPE +#if __has_attribute(__nodebug__) && \ + (defined(_LIBCPP_COMPILER_CLANG) && _LIBCPP_CLANG_VER >= 900) +#define _LIBCPP_NODEBUG_TYPE __attribute__((nodebug)) +#else +#define _LIBCPP_NODEBUG_TYPE +#endif +#endif // !defined(_LIBCPP_NODEBUG_TYPE) + #if defined(_LIBCPP_ABI_MICROSOFT) && \ (defined(_LIBCPP_COMPILER_MSVC) || __has_declspec_attribute(empty_bases)) # define _LIBCPP_DECLSPEC_EMPTY_BASES __declspec(empty_bases) @@ -1312,7 +1266,8 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( #if !defined(_LIBCPP_BUILDING_LIBRARY) && \ !defined(_LIBCPP_DISABLE_AVAILABILITY) && \ __has_feature(attribute_availability_with_strict) && \ - __has_feature(attribute_availability_in_templates) + __has_feature(attribute_availability_in_templates) && \ + __has_extension(pragma_clang_attribute_external_declaration) # ifdef __APPLE__ # define _LIBCPP_USE_AVAILABILITY_APPLE # endif @@ -1355,6 +1310,21 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR \ __attribute__((availability(macosx,strict,introduced=10.9))) \ __attribute__((availability(ios,strict,introduced=7.0))) +# define _LIBCPP_AVAILABILITY_FILESYSTEM \ + __attribute__((availability(macosx,strict,introduced=10.15))) \ + __attribute__((availability(ios,strict,introduced=13.0))) \ + __attribute__((availability(tvos,strict,introduced=13.0))) \ + __attribute__((availability(watchos,strict,introduced=6.0))) +# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH \ + _Pragma("clang attribute push(__attribute__((availability(macosx,strict,introduced=10.15))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(ios,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(tvos,strict,introduced=13.0))), apply_to=any(function,record))") \ + _Pragma("clang attribute push(__attribute__((availability(watchos,strict,introduced=6.0))), apply_to=any(function,record))") +# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") \ + _Pragma("clang attribute pop") #else # define _LIBCPP_AVAILABILITY_SHARED_MUTEX # define _LIBCPP_AVAILABILITY_BAD_VARIANT_ACCESS @@ -1366,6 +1336,9 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container( # define _LIBCPP_AVAILABILITY_TYPEINFO_VTABLE # define _LIBCPP_AVAILABILITY_LOCALE_CATEGORY # define _LIBCPP_AVAILABILITY_ATOMIC_SHARED_PTR +# define _LIBCPP_AVAILABILITY_FILESYSTEM +# define _LIBCPP_AVAILABILITY_FILESYSTEM_PUSH +# define _LIBCPP_AVAILABILITY_FILESYSTEM_POP #endif // Define availability that depends on _LIBCPP_NO_EXCEPTIONS. diff --git a/lib/libcxx/include/__config_site.in b/lib/libcxx/include/__config_site.in index 580a6aa4c0..ffbd372edf 100644 --- a/lib/libcxx/include/__config_site.in +++ b/lib/libcxx/include/__config_site.in @@ -1,9 +1,8 @@ //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -28,6 +27,7 @@ #cmakedefine _LIBCPP_HAS_THREAD_LIBRARY_EXTERNAL #cmakedefine _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS #cmakedefine _LIBCPP_NO_VCRUNTIME +#cmakedefine01 _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT #cmakedefine _LIBCPP_ABI_NAMESPACE @_LIBCPP_ABI_NAMESPACE@ @_LIBCPP_ABI_DEFINES@ diff --git a/lib/libcxx/include/__debug b/lib/libcxx/include/__debug index a8788f68f8..524c5ff028 100644 --- a/lib/libcxx/include/__debug +++ b/lib/libcxx/include/__debug @@ -1,10 +1,9 @@ // -*- C++ -*- //===--------------------------- __debug ----------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -12,6 +11,7 @@ #define _LIBCPP_DEBUG_H #include <__config> +#include #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) #pragma GCC system_header @@ -25,7 +25,6 @@ # include # include # include -# include #endif #if _LIBCPP_DEBUG_LEVEL >= 1 && !defined(_LIBCPP_ASSERT) @@ -50,10 +49,6 @@ #define _LIBCPP_DEBUG_MODE(...) ((void)0) #endif -#if _LIBCPP_DEBUG_LEVEL < 1 -class _LIBCPP_EXCEPTION_ABI __libcpp_debug_exception; -#endif - _LIBCPP_BEGIN_NAMESPACE_STD struct _LIBCPP_TEMPLATE_VIS __libcpp_debug_info { @@ -63,6 +58,9 @@ struct _LIBCPP_TEMPLATE_VIS __libcpp_debug_info { _LIBCPP_INLINE_VISIBILITY _LIBCPP_CONSTEXPR __libcpp_debug_info(const char* __f, int __l, const char* __p, const char* __m) : __file_(__f), __line_(__l), __pred_(__p), __msg_(__m) {} + + _LIBCPP_FUNC_VIS std::string what() const; + const char* __file_; int __line_; const char* __pred_; @@ -80,38 +78,11 @@ extern _LIBCPP_EXPORTED_FROM_ABI __libcpp_debug_function_type __libcpp_debug_fun _LIBCPP_NORETURN _LIBCPP_FUNC_VIS void __libcpp_abort_debug_function(__libcpp_debug_info const&); -/// __libcpp_throw_debug_function - A debug handler that throws -/// an instance of __libcpp_debug_exception when called. - _LIBCPP_NORETURN _LIBCPP_FUNC_VIS -void __libcpp_throw_debug_function(__libcpp_debug_info const&); - /// __libcpp_set_debug_function - Set the debug handler to the specified /// function. _LIBCPP_FUNC_VIS bool __libcpp_set_debug_function(__libcpp_debug_function_type __func); -// Setup the throwing debug handler during dynamic initialization. -#if _LIBCPP_DEBUG_LEVEL >= 1 && defined(_LIBCPP_DEBUG_USE_EXCEPTIONS) -# if defined(_LIBCPP_NO_EXCEPTIONS) -# error _LIBCPP_DEBUG_USE_EXCEPTIONS cannot be used when exceptions are disabled. -# endif -static bool __init_dummy = __libcpp_set_debug_function(__libcpp_throw_debug_function); -#endif - -#if _LIBCPP_DEBUG_LEVEL >= 1 || defined(_LIBCPP_BUILDING_LIBRARY) -class _LIBCPP_EXCEPTION_ABI __libcpp_debug_exception : public exception { -public: - __libcpp_debug_exception() _NOEXCEPT; - explicit __libcpp_debug_exception(__libcpp_debug_info const& __i); - __libcpp_debug_exception(__libcpp_debug_exception const&); - ~__libcpp_debug_exception() _NOEXCEPT; - const char* what() const _NOEXCEPT; -private: - struct __libcpp_debug_exception_imp; - __libcpp_debug_exception_imp *__imp_; -}; -#endif - #if _LIBCPP_DEBUG_LEVEL >= 2 || defined(_LIBCPP_BUILDING_LIBRARY) struct _LIBCPP_TYPE_VIS __c_node; @@ -251,16 +222,22 @@ public: __db_c_const_iterator __c_end() const; __db_i_const_iterator __i_end() const; + typedef __c_node*(_InsertConstruct)(void*, void*, __c_node*); + + template + _LIBCPP_INLINE_VISIBILITY static __c_node* __create_C_node(void *__mem, void *__c, __c_node *__next) { + return ::new(__mem) _C_node<_Cont>(__c, __next); + } + template _LIBCPP_INLINE_VISIBILITY void __insert_c(_Cont* __c) { - __c_node* __n = __insert_c(static_cast(__c)); - ::new(__n) _C_node<_Cont>(__n->__c_, __n->__next_); + __insert_c(static_cast(__c), &__create_C_node<_Cont>); } void __insert_i(void* __i); - __c_node* __insert_c(void* __c); + void __insert_c(void* __c, _InsertConstruct* __fn); void __erase_c(void* __c); void __insert_ic(void* __i, const void* __c); diff --git a/lib/libcxx/include/__errc b/lib/libcxx/include/__errc index d0f00b7f0b..a8ad29f364 100644 --- a/lib/libcxx/include/__errc +++ b/lib/libcxx/include/__errc @@ -1,10 +1,9 @@ // -*- C++ -*- //===---------------------------- __errc ----------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__functional_03 b/lib/libcxx/include/__functional_03 index 0a3bfbaa3d..a90cbb75b2 100644 --- a/lib/libcxx/include/__functional_03 +++ b/lib/libcxx/include/__functional_03 @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__functional_base b/lib/libcxx/include/__functional_base index 032be99bf6..9587d7ab15 100644 --- a/lib/libcxx/include/__functional_base +++ b/lib/libcxx/include/__functional_base @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -126,7 +125,7 @@ struct __weak_result_type_imp // bool is true : public __maybe_derive_from_unary_function<_Tp>, public __maybe_derive_from_binary_function<_Tp> { - typedef typename _Tp::result_type result_type; + typedef _LIBCPP_NODEBUG_TYPE typename _Tp::result_type result_type; }; template @@ -147,19 +146,19 @@ struct __weak_result_type template struct __weak_result_type<_Rp ()> { - typedef _Rp result_type; + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; }; template struct __weak_result_type<_Rp (&)()> { - typedef _Rp result_type; + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; }; template struct __weak_result_type<_Rp (*)()> { - typedef _Rp result_type; + typedef _LIBCPP_NODEBUG_TYPE _Rp result_type; }; // 1 argument case @@ -611,7 +610,7 @@ _LIBCPP_INLINE_VAR constexpr size_t uses_allocator_v = uses_allocator<_Tp, _Allo template struct __uses_alloc_ctor_imp { - typedef typename __uncvref<_Alloc>::type _RawAlloc; + typedef _LIBCPP_NODEBUG_TYPE typename __uncvref<_Alloc>::type _RawAlloc; static const bool __ua = uses_allocator<_Tp, _RawAlloc>::value; static const bool __ic = is_constructible<_Tp, allocator_arg_t, _Alloc, _Args...>::value; diff --git a/lib/libcxx/include/__functional_base_03 b/lib/libcxx/include/__functional_base_03 index 8407dcfa39..e6dac90c84 100644 --- a/lib/libcxx/include/__functional_base_03 +++ b/lib/libcxx/include/__functional_base_03 @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__hash_table b/lib/libcxx/include/__hash_table index 6f5b183105..0b953f58e9 100644 --- a/lib/libcxx/include/__hash_table +++ b/lib/libcxx/include/__hash_table @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -123,7 +122,7 @@ inline _LIBCPP_INLINE_VISIBILITY size_t __next_hash_pow2(size_t __n) { - return __n < 2 ? __n : (size_t(1) << (std::numeric_limits::digits - __clz(__n-1))); + return __n < 2 ? __n : (size_t(1) << (std::numeric_limits::digits - __libcpp_clz(__n-1))); } @@ -876,9 +875,9 @@ struct __enforce_unordered_container_requirements { template #ifndef _LIBCPP_CXX03_LANG _LIBCPP_DIAGNOSE_WARNING(!__invokable<_Equal const&, _Key const&, _Key const&>::value, - "the specified comparator type does not provide a const call operator") + "the specified comparator type does not provide a viable const call operator") _LIBCPP_DIAGNOSE_WARNING(!__invokable<_Hash const&, _Key const&>::value, - "the specified hash functor does not provide a const call operator") + "the specified hash functor does not provide a viable const call operator") #endif typename __enforce_unordered_container_requirements<_Key, _Hash, _Equal>::type __diagnose_unordered_container_requirements(int); @@ -1261,7 +1260,7 @@ public: void swap(__hash_table& __u) #if _LIBCPP_STD_VER <= 11 - _NOEXCEPT_DEBUG_( + _NOEXCEPT_( __is_nothrow_swappable::value && __is_nothrow_swappable::value && (!allocator_traits<__pointer_allocator>::propagate_on_container_swap::value || __is_nothrow_swappable<__pointer_allocator>::value) @@ -1269,7 +1268,7 @@ public: || __is_nothrow_swappable<__node_allocator>::value) ); #else - _NOEXCEPT_DEBUG_(__is_nothrow_swappable::value && __is_nothrow_swappable::value); + _NOEXCEPT_(__is_nothrow_swappable::value && __is_nothrow_swappable::value); #endif _LIBCPP_INLINE_VISIBILITY @@ -2249,7 +2248,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_unique( return _InsertReturnType{end(), false, _NodeHandle()}; pair __result = __node_insert_unique(__nh.__ptr_); if (__result.second) - __nh.__release(); + __nh.__release_ptr(); return _InsertReturnType{__result.first, __result.second, _VSTD::move(__nh)}; } @@ -2264,7 +2263,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_unique( return end(); pair __result = __node_insert_unique(__nh.__ptr_); if (__result.second) - __nh.__release(); + __nh.__release_ptr(); return __result.first; } @@ -2328,7 +2327,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_multi( if (__nh.empty()) return end(); iterator __result = __node_insert_multi(__nh.__ptr_); - __nh.__release(); + __nh.__release_ptr(); return __result; } @@ -2342,7 +2341,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_insert_multi( if (__nh.empty()) return end(); iterator __result = __node_insert_multi(__hint, __nh.__ptr_); - __nh.__release(); + __nh.__release_ptr(); return __result; } @@ -2372,6 +2371,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::__node_handle_merge_multi( template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::rehash(size_type __n) +_LIBCPP_DISABLE_UBSAN_UNSIGNED_INTEGER_CHECK { if (__n == 1) __n = 2; @@ -2807,7 +2807,7 @@ template void __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) #if _LIBCPP_STD_VER <= 11 - _NOEXCEPT_DEBUG_( + _NOEXCEPT_( __is_nothrow_swappable::value && __is_nothrow_swappable::value && (!allocator_traits<__pointer_allocator>::propagate_on_container_swap::value || __is_nothrow_swappable<__pointer_allocator>::value) @@ -2815,7 +2815,7 @@ __hash_table<_Tp, _Hash, _Equal, _Alloc>::swap(__hash_table& __u) || __is_nothrow_swappable<__node_allocator>::value) ) #else - _NOEXCEPT_DEBUG_(__is_nothrow_swappable::value && __is_nothrow_swappable::value) + _NOEXCEPT_(__is_nothrow_swappable::value && __is_nothrow_swappable::value) #endif { _LIBCPP_ASSERT(__node_traits::propagate_on_container_swap::value || diff --git a/lib/libcxx/include/__libcpp_version b/lib/libcxx/include/__libcpp_version index e002b3628b..d58c55a31d 100644 --- a/lib/libcxx/include/__libcpp_version +++ b/lib/libcxx/include/__libcpp_version @@ -1 +1 @@ -8000 +9000 diff --git a/lib/libcxx/include/__locale b/lib/libcxx/include/__locale index cde9cc85f6..d382e4d8a9 100644 --- a/lib/libcxx/include/__locale +++ b/lib/libcxx/include/__locale @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -20,6 +19,7 @@ #include #include #if defined(_LIBCPP_MSVCRT_LIKE) +# include # include #elif defined(_AIX) # include @@ -35,6 +35,9 @@ # include #elif defined(__Fuchsia__) # include +#elif defined(__wasi__) +// WASI libc uses musl's locales support. +# include #elif defined(_LIBCPP_HAS_MUSL_LIBC) # include #endif @@ -64,28 +67,41 @@ private: #elif defined(_LIBCPP_MSVCRT_LIKE) struct __libcpp_locale_guard { __libcpp_locale_guard(locale_t __l) : - __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)), - __locale_collate(setlocale(LC_COLLATE, __l.__get_locale())), - __locale_ctype(setlocale(LC_CTYPE, __l.__get_locale())), - __locale_monetary(setlocale(LC_MONETARY, __l.__get_locale())), - __locale_numeric(setlocale(LC_NUMERIC, __l.__get_locale())), - __locale_time(setlocale(LC_TIME, __l.__get_locale())) - // LC_MESSAGES is not supported on Windows. - {} + __status(_configthreadlocale(_ENABLE_PER_THREAD_LOCALE)) { + // Setting the locale can be expensive even when the locale given is + // already the current locale, so do an explicit check to see if the + // current locale is already the one we want. + const char* __lc = __setlocale(nullptr); + // If every category is the same, the locale string will simply be the + // locale name, otherwise it will be a semicolon-separated string listing + // each category. In the second case, we know at least one category won't + // be what we want, so we only have to check the first case. + if (strcmp(__l.__get_locale(), __lc) != 0) { + __locale_all = _strdup(__lc); + if (__locale_all == nullptr) + __throw_bad_alloc(); + __setlocale(__l.__get_locale()); + } + } ~__libcpp_locale_guard() { - setlocale(LC_COLLATE, __locale_collate); - setlocale(LC_CTYPE, __locale_ctype); - setlocale(LC_MONETARY, __locale_monetary); - setlocale(LC_NUMERIC, __locale_numeric); - setlocale(LC_TIME, __locale_time); - _configthreadlocale(__status); + // The CRT documentation doesn't explicitly say, but setlocale() does the + // right thing when given a semicolon-separated list of locale settings + // for the different categories in the same format as returned by + // setlocale(LC_ALL, nullptr). + if (__locale_all != nullptr) { + __setlocale(__locale_all); + free(__locale_all); + } + _configthreadlocale(__status); + } + static const char* __setlocale(const char* __locale) { + const char* __new_locale = setlocale(LC_ALL, __locale); + if (__new_locale == nullptr) + __throw_bad_alloc(); + return __new_locale; } int __status; - char* __locale_collate; - char* __locale_ctype; - char* __locale_monetary; - char* __locale_numeric; - char* __locale_time; + char* __locale_all = nullptr; }; #endif @@ -255,7 +271,10 @@ public: return do_compare(__lo1, __hi1, __lo2, __hi2); } + // FIXME(EricWF): The _LIBCPP_ALWAYS_INLINE is needed on Windows to work + // around a dllimport bug that expects an external instantiation. _LIBCPP_INLINE_VISIBILITY + _LIBCPP_ALWAYS_INLINE string_type transform(const char_type* __lo, const char_type* __hi) const { return do_transform(__lo, __hi); @@ -389,6 +408,11 @@ public: static const mask punct = _ISpunct; static const mask xdigit = _ISxdigit; static const mask blank = _ISblank; +#if defined(__mips__) + static const mask __regex_word = static_cast(_ISbit(15)); +#else + static const mask __regex_word = 0x80; +#endif #elif defined(_LIBCPP_MSVCRT_LIKE) typedef unsigned short mask; static const mask space = _SPACE; @@ -401,6 +425,7 @@ public: static const mask punct = _PUNCT; static const mask xdigit = _HEX; static const mask blank = _BLANK; + static const mask __regex_word = 0x80; # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT #elif defined(__APPLE__) || defined(__FreeBSD__) || defined(__EMSCRIPTEN__) || defined(__NetBSD__) # ifdef __APPLE__ @@ -422,8 +447,12 @@ public: # if defined(__NetBSD__) static const mask blank = _CTYPE_BL; + // NetBSD defines classes up to 0x2000 + // see sys/ctype_bits.h, _CTYPE_Q + static const mask __regex_word = 0x8000; # else static const mask blank = _CTYPE_B; + static const mask __regex_word = 0x80; # endif #elif defined(__sun__) || defined(_AIX) typedef unsigned int mask; @@ -437,6 +466,7 @@ public: static const mask punct = _ISPUNCT; static const mask xdigit = _ISXDIGIT; static const mask blank = _ISBLANK; + static const mask __regex_word = 0x80; #elif defined(_NEWLIB_VERSION) // Same type as Newlib's _ctype_ array in newlib/libc/include/ctype.h. typedef char mask; @@ -450,6 +480,7 @@ public: static const mask punct = _P; static const mask xdigit = _X | _N; static const mask blank = _B; + static const mask __regex_word = 0x80; # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_PRINT # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_ALPHA # define _LIBCPP_CTYPE_MASK_IS_COMPOSITE_XDIGIT @@ -465,6 +496,7 @@ public: static const mask punct = 1<<7; static const mask xdigit = 1<<8; static const mask blank = 1<<9; + static const mask __regex_word = 1<<10; #endif static const mask alnum = alpha | digit; static const mask graph = alnum | punct; @@ -1230,8 +1262,6 @@ _LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) _LIBCPP_EXTERN_TEMPLATE2(class _LIBCPP_EXTERN_TEMPLATE_TYPE_VIS codecvt_byname) -_LIBCPP_NORETURN _LIBCPP_FUNC_VIS void __throw_runtime_error(const char*); - template struct __narrow_to_utf8 { diff --git a/lib/libcxx/include/__mutex_base b/lib/libcxx/include/__mutex_base index da21a5f8eb..f828beaf78 100644 --- a/lib/libcxx/include/__mutex_base +++ b/lib/libcxx/include/__mutex_base @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -37,28 +36,24 @@ _LIBCPP_BEGIN_NAMESPACE_STD # endif #endif // _LIBCPP_THREAD_SAFETY_ANNOTATION + class _LIBCPP_TYPE_VIS _LIBCPP_THREAD_SAFETY_ANNOTATION(capability("mutex")) mutex { -#ifndef _LIBCPP_CXX03_LANG __libcpp_mutex_t __m_ = _LIBCPP_MUTEX_INITIALIZER; -#else - __libcpp_mutex_t __m_; -#endif public: _LIBCPP_INLINE_VISIBILITY -#ifndef _LIBCPP_CXX03_LANG - constexpr mutex() = default; + _LIBCPP_CONSTEXPR mutex() = default; + + mutex(const mutex&) = delete; + mutex& operator=(const mutex&) = delete; + +#if defined(_LIBCPP_HAS_TRIVIAL_MUTEX_DESTRUCTION) + ~mutex() = default; #else - mutex() _NOEXCEPT {__m_ = (__libcpp_mutex_t)_LIBCPP_MUTEX_INITIALIZER;} + ~mutex() _NOEXCEPT; #endif - ~mutex(); -private: - mutex(const mutex&);// = delete; - mutex& operator=(const mutex&);// = delete; - -public: void lock() _LIBCPP_THREAD_SAFETY_ANNOTATION(acquire_capability()); bool try_lock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(try_acquire_capability(true)); void unlock() _NOEXCEPT _LIBCPP_THREAD_SAFETY_ANNOTATION(release_capability()); @@ -287,26 +282,20 @@ _LIBCPP_DECLARE_STRONG_ENUM_EPILOG(cv_status) class _LIBCPP_TYPE_VIS condition_variable { -#ifndef _LIBCPP_CXX03_LANG __libcpp_condvar_t __cv_ = _LIBCPP_CONDVAR_INITIALIZER; -#else - __libcpp_condvar_t __cv_; -#endif - public: _LIBCPP_INLINE_VISIBILITY -#ifndef _LIBCPP_CXX03_LANG - constexpr condition_variable() _NOEXCEPT = default; + _LIBCPP_CONSTEXPR condition_variable() _NOEXCEPT = default; + +#ifdef _LIBCPP_HAS_TRIVIAL_CONDVAR_DESTRUCTION + ~condition_variable() = default; #else - condition_variable() _NOEXCEPT {__cv_ = (__libcpp_condvar_t)_LIBCPP_CONDVAR_INITIALIZER;} -#endif ~condition_variable(); +#endif -private: - condition_variable(const condition_variable&); // = delete; - condition_variable& operator=(const condition_variable&); // = delete; + condition_variable(const condition_variable&) = delete; + condition_variable& operator=(const condition_variable&) = delete; -public: void notify_one() _NOEXCEPT; void notify_all() _NOEXCEPT; diff --git a/lib/libcxx/include/__node_handle b/lib/libcxx/include/__node_handle index a9cf3b7217..be1fe17a87 100644 --- a/lib/libcxx/include/__node_handle +++ b/lib/libcxx/include/__node_handle @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -57,7 +56,7 @@ private: optional __alloc_; _LIBCPP_INLINE_VISIBILITY - void __release() + void __release_ptr() { __ptr_ = nullptr; __alloc_ = _VSTD::nullopt; @@ -194,8 +193,7 @@ using __map_node_handle = __basic_node_handle< _NodeType, _Alloc, __map_node_handle_specifics>; template -_LIBCPP_TEMPLATE_VIS -struct __insert_return_type +struct _LIBCPP_TEMPLATE_VIS __insert_return_type { _Iterator position; bool inserted; diff --git a/lib/libcxx/include/__nullptr b/lib/libcxx/include/__nullptr index aa3b4d2145..45529a710b 100644 --- a/lib/libcxx/include/__nullptr +++ b/lib/libcxx/include/__nullptr @@ -1,10 +1,9 @@ // -*- C++ -*- //===--------------------------- __nullptr --------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__sso_allocator b/lib/libcxx/include/__sso_allocator index 8aca0495d7..3930128738 100644 --- a/lib/libcxx/include/__sso_allocator +++ b/lib/libcxx/include/__sso_allocator @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__std_stream b/lib/libcxx/include/__std_stream index db90795f66..5a9a470a97 100644 --- a/lib/libcxx/include/__std_stream +++ b/lib/libcxx/include/__std_stream @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__string b/lib/libcxx/include/__string index 1ddeec7149..a88b976be3 100644 --- a/lib/libcxx/include/__string +++ b/lib/libcxx/include/__string @@ -1,10 +1,9 @@ // -*- C++ -*- //===-------------------------- __string ----------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// diff --git a/lib/libcxx/include/__threading_support b/lib/libcxx/include/__threading_support index 2024900627..589fe2096f 100644 --- a/lib/libcxx/include/__threading_support +++ b/lib/libcxx/include/__threading_support @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -23,6 +22,8 @@ # include <__external_threading> #elif !defined(_LIBCPP_HAS_NO_THREADS) +typedef ::timespec __libcpp_timespec_t; + #if defined(_LIBCPP_HAS_THREAD_API_PTHREAD) # include # include @@ -149,7 +150,7 @@ int __libcpp_condvar_wait(__libcpp_condvar_t* __cv, __libcpp_mutex_t* __m); _LIBCPP_THREAD_ABI_VISIBILITY _LIBCPP_NO_THREAD_SAFETY_ANALYSIS int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, - timespec *__ts); + __libcpp_timespec_t *__ts); _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); @@ -157,7 +158,7 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t* __cv); // Execute once _LIBCPP_THREAD_ABI_VISIBILITY int __libcpp_execute_once(__libcpp_exec_once_flag *flag, - void (*init_routine)(void)); + void (*init_routine)()); // Thread id _LIBCPP_THREAD_ABI_VISIBILITY @@ -288,7 +289,7 @@ int __libcpp_condvar_wait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m) } int __libcpp_condvar_timedwait(__libcpp_condvar_t *__cv, __libcpp_mutex_t *__m, - timespec *__ts) + __libcpp_timespec_t *__ts) { return pthread_cond_timedwait(__cv, __m, __ts); } @@ -300,7 +301,7 @@ int __libcpp_condvar_destroy(__libcpp_condvar_t *__cv) // Execute once int __libcpp_execute_once(__libcpp_exec_once_flag *flag, - void (*init_routine)(void)) { + void (*init_routine)()) { return pthread_once(flag, init_routine); } @@ -357,7 +358,7 @@ void __libcpp_thread_sleep_for(const chrono::nanoseconds& __ns) { using namespace chrono; seconds __s = duration_cast(__ns); - timespec __ts; + __libcpp_timespec_t __ts; typedef decltype(__ts.tv_sec) ts_sec; _LIBCPP_CONSTEXPR ts_sec __ts_sec_max = numeric_limits::max(); diff --git a/lib/libcxx/include/__tree b/lib/libcxx/include/__tree index 814851085b..15b03ec857 100644 --- a/lib/libcxx/include/__tree +++ b/lib/libcxx/include/__tree @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -27,6 +26,13 @@ _LIBCPP_PUSH_MACROS _LIBCPP_BEGIN_NAMESPACE_STD +#if defined(__GNUC__) && !defined(__clang__) // gcc.gnu.org/PR37804 +template class _LIBCPP_TEMPLATE_VIS map; +template class _LIBCPP_TEMPLATE_VIS multimap; +template class _LIBCPP_TEMPLATE_VIS set; +template class _LIBCPP_TEMPLATE_VIS multiset; +#endif + template class __tree; template class _LIBCPP_TEMPLATE_VIS __tree_iterator; @@ -965,7 +971,7 @@ private: template #ifndef _LIBCPP_CXX03_LANG _LIBCPP_DIAGNOSE_WARNING(!std::__invokable<_Compare const&, _Tp const&, _Tp const&>::value, - "the specified comparator type does not provide a const call operator") + "the specified comparator type does not provide a viable const call operator") #endif int __diagnose_non_const_comparator(); @@ -1090,8 +1096,8 @@ public: __tree(const value_compare& __comp, const allocator_type& __a); __tree(const __tree& __t); __tree& operator=(const __tree& __t); - template - void __assign_unique(_InputIterator __first, _InputIterator __last); + template + void __assign_unique(_ForwardIterator __first, _ForwardIterator __last); template void __assign_multi(_InputIterator __first, _InputIterator __last); #ifndef _LIBCPP_CXX03_LANG @@ -1326,10 +1332,7 @@ public: #endif // !_LIBCPP_CXX03_LANG _LIBCPP_INLINE_VISIBILITY - pair __node_insert_unique(__node_pointer __nd); - _LIBCPP_INLINE_VISIBILITY - iterator __node_insert_unique(const_iterator __p, - __node_pointer __nd); + pair __node_assign_unique(const __container_value_type& __v, __node_pointer __dest); _LIBCPP_INLINE_VISIBILITY iterator __node_insert_multi(__node_pointer __nd); @@ -1509,8 +1512,50 @@ private: _LIBCPP_INLINE_VISIBILITY void __move_assign_alloc(__tree&, false_type) _NOEXCEPT {} - __node_pointer __detach(); - static __node_pointer __detach(__node_pointer); + struct _DetachedTreeCache { + _LIBCPP_INLINE_VISIBILITY + explicit _DetachedTreeCache(__tree *__t) _NOEXCEPT : __t_(__t), + __cache_root_(__detach_from_tree(__t)) { + __advance(); + } + + _LIBCPP_INLINE_VISIBILITY + __node_pointer __get() const _NOEXCEPT { + return __cache_elem_; + } + + _LIBCPP_INLINE_VISIBILITY + void __advance() _NOEXCEPT { + __cache_elem_ = __cache_root_; + if (__cache_root_) { + __cache_root_ = __detach_next(__cache_root_); + } + } + + _LIBCPP_INLINE_VISIBILITY + ~_DetachedTreeCache() { + __t_->destroy(__cache_elem_); + if (__cache_root_) { + while (__cache_root_->__parent_ != nullptr) + __cache_root_ = static_cast<__node_pointer>(__cache_root_->__parent_); + __t_->destroy(__cache_root_); + } + } + + _DetachedTreeCache(_DetachedTreeCache const&) = delete; + _DetachedTreeCache& operator=(_DetachedTreeCache const&) = delete; + + private: + _LIBCPP_INLINE_VISIBILITY + static __node_pointer __detach_from_tree(__tree *__t) _NOEXCEPT; + _LIBCPP_INLINE_VISIBILITY + static __node_pointer __detach_next(__node_pointer) _NOEXCEPT; + + __tree *__t_; + __node_pointer __cache_root_; + __node_pointer __cache_elem_; + }; + template friend class _LIBCPP_TEMPLATE_VIS map; template friend class _LIBCPP_TEMPLATE_VIS multimap; @@ -1548,13 +1593,13 @@ __tree<_Tp, _Compare, _Allocator>::__tree(const value_compare& __comp, // Precondition: size() != 0 template typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::__detach() +__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_from_tree(__tree *__t) _NOEXCEPT { - __node_pointer __cache = static_cast<__node_pointer>(__begin_node()); - __begin_node() = __end_node(); - __end_node()->__left_->__parent_ = nullptr; - __end_node()->__left_ = nullptr; - size() = 0; + __node_pointer __cache = static_cast<__node_pointer>(__t->__begin_node()); + __t->__begin_node() = __t->__end_node(); + __t->__end_node()->__left_->__parent_ = nullptr; + __t->__end_node()->__left_ = nullptr; + __t->size() = 0; // __cache->__left_ == nullptr if (__cache->__right_ != nullptr) __cache = static_cast<__node_pointer>(__cache->__right_); @@ -1569,7 +1614,7 @@ __tree<_Tp, _Compare, _Allocator>::__detach() // This is no longer a red-black tree template typename __tree<_Tp, _Compare, _Allocator>::__node_pointer -__tree<_Tp, _Compare, _Allocator>::__detach(__node_pointer __cache) +__tree<_Tp, _Compare, _Allocator>::_DetachedTreeCache::__detach_next(__node_pointer __cache) _NOEXCEPT { if (__cache->__parent_ == nullptr) return nullptr; @@ -1603,45 +1648,23 @@ __tree<_Tp, _Compare, _Allocator>::operator=(const __tree& __t) } template -template +template void -__tree<_Tp, _Compare, _Allocator>::__assign_unique(_InputIterator __first, _InputIterator __last) +__tree<_Tp, _Compare, _Allocator>::__assign_unique(_ForwardIterator __first, _ForwardIterator __last) { - typedef iterator_traits<_InputIterator> _ITraits; + typedef iterator_traits<_ForwardIterator> _ITraits; typedef typename _ITraits::value_type _ItValueType; static_assert((is_same<_ItValueType, __container_value_type>::value), "__assign_unique may only be called with the containers value type"); - + static_assert(__is_forward_iterator<_ForwardIterator>::value, + "__assign_unique requires a forward iterator"); if (size() != 0) { - __node_pointer __cache = __detach(); -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - for (; __cache != nullptr && __first != __last; ++__first) - { - __cache->__value_ = *__first; - __node_pointer __next = __detach(__cache); - __node_insert_unique(__cache); - __cache = __next; + _DetachedTreeCache __cache(this); + for (; __cache.__get() != nullptr && __first != __last; ++__first) { + if (__node_assign_unique(*__first, __cache.__get()).second) + __cache.__advance(); } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); - throw; - } -#endif // _LIBCPP_NO_EXCEPTIONS - if (__cache != nullptr) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); - } } for (; __first != __last; ++__first) __insert_unique(*__first); @@ -1660,33 +1683,11 @@ __tree<_Tp, _Compare, _Allocator>::__assign_multi(_InputIterator __first, _Input " or the nodes value type"); if (size() != 0) { - __node_pointer __cache = __detach(); -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - for (; __cache != nullptr && __first != __last; ++__first) - { - __cache->__value_ = *__first; - __node_pointer __next = __detach(__cache); - __node_insert_multi(__cache); - __cache = __next; - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); - throw; - } -#endif // _LIBCPP_NO_EXCEPTIONS - if (__cache != nullptr) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); + _DetachedTreeCache __cache(this); + for (; __cache.__get() && __first != __last; ++__first) { + __cache.__get()->__value_ = *__first; + __node_insert_multi(__cache.__get()); + __cache.__advance(); } } for (; __first != __last; ++__first) @@ -1784,33 +1785,11 @@ __tree<_Tp, _Compare, _Allocator>::__move_assign(__tree& __t, false_type) const_iterator __e = end(); if (size() != 0) { - __node_pointer __cache = __detach(); -#ifndef _LIBCPP_NO_EXCEPTIONS - try - { -#endif // _LIBCPP_NO_EXCEPTIONS - while (__cache != nullptr && __t.size() != 0) - { - __cache->__value_ = _VSTD::move(__t.remove(__t.begin())->__value_); - __node_pointer __next = __detach(__cache); - __node_insert_multi(__cache); - __cache = __next; - } -#ifndef _LIBCPP_NO_EXCEPTIONS - } - catch (...) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); - throw; - } -#endif // _LIBCPP_NO_EXCEPTIONS - if (__cache != nullptr) - { - while (__cache->__parent_ != nullptr) - __cache = static_cast<__node_pointer>(__cache->__parent_); - destroy(__cache); + _DetachedTreeCache __cache(this); + while (__cache.__get() != nullptr && __t.size() != 0) { + __cache.__get()->__value_ = _VSTD::move(__t.remove(__t.begin())->__value_); + __node_insert_multi(__cache.__get()); + __cache.__advance(); } } while (__t.size() != 0) @@ -2319,14 +2298,15 @@ __tree<_Tp, _Compare, _Allocator>::__insert_multi(const_iterator __p, const __co template pair::iterator, bool> -__tree<_Tp, _Compare, _Allocator>::__node_insert_unique(__node_pointer __nd) +__tree<_Tp, _Compare, _Allocator>::__node_assign_unique(const __container_value_type& __v, __node_pointer __nd) { __parent_pointer __parent; - __node_base_pointer& __child = __find_equal(__parent, __nd->__value_); + __node_base_pointer& __child = __find_equal(__parent, _NodeTypes::__get_key(__v)); __node_pointer __r = static_cast<__node_pointer>(__child); bool __inserted = false; if (__child == nullptr) { + __nd->__value_ = __v; __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); __r = __nd; __inserted = true; @@ -2334,22 +2314,6 @@ __tree<_Tp, _Compare, _Allocator>::__node_insert_unique(__node_pointer __nd) return pair(iterator(__r), __inserted); } -template -typename __tree<_Tp, _Compare, _Allocator>::iterator -__tree<_Tp, _Compare, _Allocator>::__node_insert_unique(const_iterator __p, - __node_pointer __nd) -{ - __parent_pointer __parent; - __node_base_pointer __dummy; - __node_base_pointer& __child = __find_equal(__p, __parent, __nd->__value_); - __node_pointer __r = static_cast<__node_pointer>(__child); - if (__child == nullptr) - { - __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__nd)); - __r = __nd; - } - return iterator(__r); -} template typename __tree<_Tp, _Compare, _Allocator>::iterator @@ -2408,7 +2372,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique( __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); - __nh.__release(); + __nh.__release_ptr(); return _InsertReturnType{iterator(__ptr), true, _NodeHandle()}; } @@ -2433,7 +2397,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_unique( __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); __r = __ptr; - __nh.__release(); + __nh.__release_ptr(); } return iterator(__r); } @@ -2498,7 +2462,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi(_NodeHandle&& __nh __node_base_pointer& __child = __find_leaf_high( __parent, _NodeTypes::__get_key(__ptr->__value_)); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); - __nh.__release(); + __nh.__release_ptr(); return iterator(__ptr); } @@ -2517,7 +2481,7 @@ __tree<_Tp, _Compare, _Allocator>::__node_handle_insert_multi( __node_base_pointer& __child = __find_leaf(__hint, __parent, _NodeTypes::__get_key(__ptr->__value_)); __insert_node_at(__parent, __child, static_cast<__node_base_pointer>(__ptr)); - __nh.__release(); + __nh.__release_ptr(); return iterator(__ptr); } diff --git a/lib/libcxx/include/__tuple b/lib/libcxx/include/__tuple index 3b23d78afa..196f3c2b5a 100644 --- a/lib/libcxx/include/__tuple +++ b/lib/libcxx/include/__tuple @@ -1,10 +1,9 @@ // -*- C++ -*- //===----------------------------------------------------------------------===// // -// The LLVM Compiler Infrastructure -// -// This file is dual licensed under the MIT and the University of Illinois Open -// Source Licenses. See LICENSE.TXT for details. +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// @@ -54,27 +53,24 @@ template struct _LIBCPP_TEMPLATE_VIS tuple_size : publ template struct _LIBCPP_TEMPLATE_VIS tuple_size : public tuple_size<_Tp> {}; #endif -template class _LIBCPP_TEMPLATE_VIS tuple_element; +template struct _LIBCPP_TEMPLATE_VIS tuple_element; template -class _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const _Tp> +struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const _Tp> { -public: - typedef typename add_const::type>::type type; + typedef _LIBCPP_NODEBUG_TYPE typename add_const::type>::type type; }; template -class _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, volatile _Tp> +struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, volatile _Tp> { -public: - typedef typename add_volatile::type>::type type; + typedef _LIBCPP_NODEBUG_TYPE typename add_volatile::type>::type type; }; template -class _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const volatile _Tp> +struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, const volatile _Tp> { -public: - typedef typename add_cv::type>::type type; + typedef _LIBCPP_NODEBUG_TYPE typename add_cv::type>::type type; }; template struct __tuple_like : false_type {}; @@ -103,7 +99,7 @@ namespace __detail { template struct __repeat; template struct __repeat<__integer_sequence<_Tp, _Np...>, _Extra...> { - typedef __integer_sequence<_Tp, + typedef _LIBCPP_NODEBUG_TYPE __integer_sequence<_Tp, _Np..., sizeof...(_Np) + _Np..., 2 * sizeof...(_Np) + _Np..., @@ -257,7 +253,7 @@ template struct __tuple_types {}; namespace __indexer_detail { template -struct __indexed { using type = _Tp; }; +struct __indexed { using type _LIBCPP_NODEBUG_TYPE = _Tp; }; template struct __indexer; @@ -272,7 +268,7 @@ __indexed<_Idx, _Tp> __at_index(__indexed<_Idx, _Tp> const&); } // namespace __indexer_detail template -using __type_pack_element = typename decltype( +using __type_pack_element _LIBCPP_NODEBUG_TYPE = typename decltype( __indexer_detail::__at_index<_Idx>( __indexer_detail::__indexer< __tuple_types<_Types...>, @@ -282,11 +278,10 @@ using __type_pack_element = typename decltype( #endif template -class _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, __tuple_types<_Types...>> +struct _LIBCPP_TEMPLATE_VIS tuple_element<_Ip, __tuple_types<_Types...>> { -public: static_assert(_Ip < sizeof...(_Types), "tuple_element index out of range"); - typedef __type_pack_element<_Ip, _Types...> type; + typedef _LIBCPP_NODEBUG_TYPE __type_pack_element<_Ip, _Types...> type; }; @@ -306,34 +301,34 @@ struct __apply_cv_mf { }; template <> struct __apply_cv_mf { - template using __apply = const _Tp; + template using __apply _LIBCPP_NODEBUG_TYPE = const _Tp; }; template <> struct __apply_cv_mf { - template using __apply = volatile _Tp; + template using __apply _LIBCPP_NODEBUG_TYPE = volatile _Tp; }; template <> struct __apply_cv_mf { - template using __apply = const volatile _Tp; + template using __apply _LIBCPP_NODEBUG_TYPE = const volatile _Tp; }; template <> struct __apply_cv_mf { - template using __apply = _Tp&; + template using __apply _LIBCPP_NODEBUG_TYPE = _Tp&; }; template <> struct __apply_cv_mf { - template using __apply = const _Tp&; + template using __apply _LIBCPP_NODEBUG_TYPE = const _Tp&; }; template <> struct __apply_cv_mf { - template using __apply = volatile _Tp&; + template using __apply _LIBCPP_NODEBUG_TYPE = volatile _Tp&; }; template <> struct __apply_cv_mf { - template using __apply = const volatile _Tp&; + template using __apply _LIBCPP_NODEBUG_TYPE = const volatile _Tp&; }; template ::type> -using __apply_cv_t = __apply_cv_mf< +using __apply_cv_t _LIBCPP_NODEBUG_TYPE = __apply_cv_mf< is_lvalue_reference<_Tp>::value, is_const<_RawTp>::value, is_volatile<_RawTp>::value>; @@ -352,7 +347,7 @@ template