diff --git a/CMakeLists.txt b/CMakeLists.txt index b9b9310946..d51f753ac0 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -537,6 +537,7 @@ set(ZIG_STAGE2_SOURCES "${CMAKE_SOURCE_DIR}/src/ir.zig" "${CMAKE_SOURCE_DIR}/src/libc_installation.zig" "${CMAKE_SOURCE_DIR}/src/libcxx.zig" + "${CMAKE_SOURCE_DIR}/src/libtsan.zig" "${CMAKE_SOURCE_DIR}/src/libunwind.zig" "${CMAKE_SOURCE_DIR}/src/link.zig" "${CMAKE_SOURCE_DIR}/src/link/C.zig" diff --git a/lib/libunwind/src/gcc_personality_v0.c b/lib/libunwind/src/gcc_personality_v0.c new file mode 100644 index 0000000000..c946497d75 --- /dev/null +++ b/lib/libunwind/src/gcc_personality_v0.c @@ -0,0 +1,234 @@ +//===-- gcc_personality_v0.c - Implement __gcc_personality_v0 -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#if __ARM_EABI__ +#ifdef COMPILER_RT_ARMHF_TARGET +#define COMPILER_RT_ABI +#else +#define COMPILER_RT_ABI __attribute__((__pcs__("aapcs"))) +#endif +#else +#define COMPILER_RT_ABI +#endif + +#define compilerrt_abort() __builtin_unreachable() + +#include + +// Pointer encodings documented at: +// http://refspecs.freestandards.org/LSB_1.3.0/gLSB/gLSB/ehframehdr.html + +#define DW_EH_PE_omit 0xff // no data follows + +#define DW_EH_PE_absptr 0x00 +#define DW_EH_PE_uleb128 0x01 +#define DW_EH_PE_udata2 0x02 +#define DW_EH_PE_udata4 0x03 +#define DW_EH_PE_udata8 0x04 +#define DW_EH_PE_sleb128 0x09 +#define DW_EH_PE_sdata2 0x0A +#define DW_EH_PE_sdata4 0x0B +#define DW_EH_PE_sdata8 0x0C + +#define DW_EH_PE_pcrel 0x10 +#define DW_EH_PE_textrel 0x20 +#define DW_EH_PE_datarel 0x30 +#define DW_EH_PE_funcrel 0x40 +#define DW_EH_PE_aligned 0x50 +#define DW_EH_PE_indirect 0x80 // gcc extension + +// read a uleb128 encoded value and advance pointer +static uintptr_t readULEB128(const uint8_t **data) { + uintptr_t result = 0; + uintptr_t shift = 0; + unsigned char byte; + const uint8_t *p = *data; + do { + byte = *p++; + result |= (byte & 0x7f) << shift; + shift += 7; + } while (byte & 0x80); + *data = p; + return result; +} + +// read a pointer encoded value and advance pointer +static uintptr_t readEncodedPointer(const uint8_t **data, uint8_t encoding) { + const uint8_t *p = *data; + uintptr_t result = 0; + + if (encoding == DW_EH_PE_omit) + return 0; + + // first get value + switch (encoding & 0x0F) { + case DW_EH_PE_absptr: + result = *((const uintptr_t *)p); + p += sizeof(uintptr_t); + break; + case DW_EH_PE_uleb128: + result = readULEB128(&p); + break; + case DW_EH_PE_udata2: + result = *((const uint16_t *)p); + p += sizeof(uint16_t); + break; + case DW_EH_PE_udata4: + result = *((const uint32_t *)p); + p += sizeof(uint32_t); + break; + case DW_EH_PE_udata8: + result = *((const uint64_t *)p); + p += sizeof(uint64_t); + break; + case DW_EH_PE_sdata2: + result = *((const int16_t *)p); + p += sizeof(int16_t); + break; + case DW_EH_PE_sdata4: + result = *((const int32_t *)p); + p += sizeof(int32_t); + break; + case DW_EH_PE_sdata8: + result = *((const int64_t *)p); + p += sizeof(int64_t); + break; + case DW_EH_PE_sleb128: + default: + // not supported + compilerrt_abort(); + break; + } + + // then add relative offset + switch (encoding & 0x70) { + case DW_EH_PE_absptr: + // do nothing + break; + case DW_EH_PE_pcrel: + result += (uintptr_t)(*data); + break; + case DW_EH_PE_textrel: + case DW_EH_PE_datarel: + case DW_EH_PE_funcrel: + case DW_EH_PE_aligned: + default: + // not supported + compilerrt_abort(); + break; + } + + // then apply indirection + if (encoding & DW_EH_PE_indirect) { + result = *((const uintptr_t *)result); + } + + *data = p; + return result; +} + +#if defined(__arm__) && !defined(__USING_SJLJ_EXCEPTIONS__) && \ + !defined(__ARM_DWARF_EH__) +#define USING_ARM_EHABI 1 +_Unwind_Reason_Code __gnu_unwind_frame(struct _Unwind_Exception *, + struct _Unwind_Context *); +#endif + +static inline _Unwind_Reason_Code +continueUnwind(struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) { +#if USING_ARM_EHABI + // On ARM EHABI the personality routine is responsible for actually + // unwinding a single stack frame before returning (ARM EHABI Sec. 6.1). + if (__gnu_unwind_frame(exceptionObject, context) != _URC_OK) + return _URC_FAILURE; +#endif + return _URC_CONTINUE_UNWIND; +} + +// The C compiler makes references to __gcc_personality_v0 in +// the dwarf unwind information for translation units that use +// __attribute__((cleanup(xx))) on local variables. +// This personality routine is called by the system unwinder +// on each frame as the stack is unwound during a C++ exception +// throw through a C function compiled with -fexceptions. +#if __USING_SJLJ_EXCEPTIONS__ +// the setjump-longjump based exceptions personality routine has a +// different name +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_sj0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) +#elif USING_ARM_EHABI +// The ARM EHABI personality routine has a different signature. +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + _Unwind_State state, struct _Unwind_Exception *exceptionObject, + struct _Unwind_Context *context) +#else +COMPILER_RT_ABI _Unwind_Reason_Code __gcc_personality_v0( + int version, _Unwind_Action actions, uint64_t exceptionClass, + struct _Unwind_Exception *exceptionObject, struct _Unwind_Context *context) +#endif +{ + // Since C does not have catch clauses, there is nothing to do during + // phase 1 (the search phase). +#if USING_ARM_EHABI + // After resuming from a cleanup we should also continue on to the next + // frame straight away. + if ((state & _US_ACTION_MASK) != _US_UNWIND_FRAME_STARTING) +#else + if (actions & _UA_SEARCH_PHASE) +#endif + return continueUnwind(exceptionObject, context); + + // There is nothing to do if there is no LSDA for this frame. + const uint8_t *lsda = (uint8_t *)_Unwind_GetLanguageSpecificData(context); + if (lsda == (uint8_t *)0) + return continueUnwind(exceptionObject, context); + + uintptr_t pc = (uintptr_t)_Unwind_GetIP(context) - 1; + uintptr_t funcStart = (uintptr_t)_Unwind_GetRegionStart(context); + uintptr_t pcOffset = pc - funcStart; + + // Parse LSDA header. + uint8_t lpStartEncoding = *lsda++; + if (lpStartEncoding != DW_EH_PE_omit) { + readEncodedPointer(&lsda, lpStartEncoding); + } + uint8_t ttypeEncoding = *lsda++; + if (ttypeEncoding != DW_EH_PE_omit) { + readULEB128(&lsda); + } + // Walk call-site table looking for range that includes current PC. + uint8_t callSiteEncoding = *lsda++; + uint32_t callSiteTableLength = readULEB128(&lsda); + const uint8_t *callSiteTableStart = lsda; + const uint8_t *callSiteTableEnd = callSiteTableStart + callSiteTableLength; + const uint8_t *p = callSiteTableStart; + while (p < callSiteTableEnd) { + uintptr_t start = readEncodedPointer(&p, callSiteEncoding); + uintptr_t length = readEncodedPointer(&p, callSiteEncoding); + uintptr_t landingPad = readEncodedPointer(&p, callSiteEncoding); + readULEB128(&p); // action value not used for C code + if (landingPad == 0) + continue; // no landing pad for this entry + if ((start <= pcOffset) && (pcOffset < (start + length))) { + // Found landing pad for the PC. + // Set Instruction Pointer to so we re-enter function + // at landing pad. The landing pad is created by the compiler + // to take two parameters in registers. + _Unwind_SetGR(context, __builtin_eh_return_data_regno(0), + (uintptr_t)exceptionObject); + _Unwind_SetGR(context, __builtin_eh_return_data_regno(1), 0); + _Unwind_SetIP(context, (funcStart + landingPad)); + return _URC_INSTALL_CONTEXT; + } + } + + // No landing pad found, continue unwinding. + return continueUnwind(exceptionObject, context); +} diff --git a/lib/tsan/interception/interception.h b/lib/tsan/interception/interception.h new file mode 100644 index 0000000000..d27a8ccf92 --- /dev/null +++ b/lib/tsan/interception/interception.h @@ -0,0 +1,304 @@ +//===-- interception.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Machinery for providing replacements/wrappers for system functions. +//===----------------------------------------------------------------------===// + +#ifndef INTERCEPTION_H +#define INTERCEPTION_H + +#include "sanitizer_common/sanitizer_internal_defs.h" + +#if !SANITIZER_LINUX && !SANITIZER_FREEBSD && !SANITIZER_MAC && \ + !SANITIZER_NETBSD && !SANITIZER_OPENBSD && !SANITIZER_WINDOWS && \ + !SANITIZER_FUCHSIA && !SANITIZER_RTEMS && !SANITIZER_SOLARIS +# error "Interception doesn't work on this operating system." +#endif + +// These typedefs should be used only in the interceptor definitions to replace +// the standard system types (e.g. SSIZE_T instead of ssize_t) +typedef __sanitizer::uptr SIZE_T; +typedef __sanitizer::sptr SSIZE_T; +typedef __sanitizer::sptr PTRDIFF_T; +typedef __sanitizer::s64 INTMAX_T; +typedef __sanitizer::u64 UINTMAX_T; +typedef __sanitizer::OFF_T OFF_T; +typedef __sanitizer::OFF64_T OFF64_T; + +// How to add an interceptor: +// Suppose you need to wrap/replace system function (generally, from libc): +// int foo(const char *bar, double baz); +// You'll need to: +// 1) define INTERCEPTOR(int, foo, const char *bar, double baz) { ... } in +// your source file. See the notes below for cases when +// INTERCEPTOR_WITH_SUFFIX(...) should be used instead. +// 2) Call "INTERCEPT_FUNCTION(foo)" prior to the first call of "foo". +// INTERCEPT_FUNCTION(foo) evaluates to "true" iff the function was +// intercepted successfully. +// You can access original function by calling REAL(foo)(bar, baz). +// By default, REAL(foo) will be visible only inside your interceptor, and if +// you want to use it in other parts of RTL, you'll need to: +// 3a) add DECLARE_REAL(int, foo, const char*, double) to a +// header file. +// However, if the call "INTERCEPT_FUNCTION(foo)" and definition for +// INTERCEPTOR(..., foo, ...) are in different files, you'll instead need to: +// 3b) add DECLARE_REAL_AND_INTERCEPTOR(int, foo, const char*, double) +// to a header file. + +// Notes: 1. Things may not work properly if macro INTERCEPTOR(...) {...} or +// DECLARE_REAL(...) are located inside namespaces. +// 2. On Mac you can also use: "OVERRIDE_FUNCTION(foo, zoo)" to +// effectively redirect calls from "foo" to "zoo". In this case +// you aren't required to implement +// INTERCEPTOR(int, foo, const char *bar, double baz) {...} +// but instead you'll have to add +// DECLARE_REAL(int, foo, const char *bar, double baz) in your +// source file (to define a pointer to overriden function). +// 3. Some Mac functions have symbol variants discriminated by +// additional suffixes, e.g. _$UNIX2003 (see +// https://developer.apple.com/library/mac/#releasenotes/Darwin/SymbolVariantsRelNotes/index.html +// for more details). To intercept such functions you need to use the +// INTERCEPTOR_WITH_SUFFIX(...) macro. + +// How it works: +// To replace system functions on Linux we just need to declare functions +// with same names in our library and then obtain the real function pointers +// using dlsym(). +// There is one complication. A user may also intercept some of the functions +// we intercept. To resolve this we declare our interceptors with __interceptor_ +// prefix, and then make actual interceptors weak aliases to __interceptor_ +// functions. +// +// This is not so on Mac OS, where the two-level namespace makes +// our replacement functions invisible to other libraries. This may be overcomed +// using the DYLD_FORCE_FLAT_NAMESPACE, but some errors loading the shared +// libraries in Chromium were noticed when doing so. +// Instead we create a dylib containing a __DATA,__interpose section that +// associates library functions with their wrappers. When this dylib is +// preloaded before an executable using DYLD_INSERT_LIBRARIES, it routes all +// the calls to interposed functions done through stubs to the wrapper +// functions. +// As it's decided at compile time which functions are to be intercepted on Mac, +// INTERCEPT_FUNCTION() is effectively a no-op on this system. + +#if SANITIZER_MAC +#include // For __DARWIN_ALIAS_C(). + +// Just a pair of pointers. +struct interpose_substitution { + const __sanitizer::uptr replacement; + const __sanitizer::uptr original; +}; + +// For a function foo() create a global pair of pointers { wrap_foo, foo } in +// the __DATA,__interpose section. +// As a result all the calls to foo() will be routed to wrap_foo() at runtime. +#define INTERPOSER(func_name) __attribute__((used)) \ +const interpose_substitution substitution_##func_name[] \ + __attribute__((section("__DATA, __interpose"))) = { \ + { reinterpret_cast(WRAP(func_name)), \ + reinterpret_cast(func_name) } \ +} + +// For a function foo() and a wrapper function bar() create a global pair +// of pointers { bar, foo } in the __DATA,__interpose section. +// As a result all the calls to foo() will be routed to bar() at runtime. +#define INTERPOSER_2(func_name, wrapper_name) __attribute__((used)) \ +const interpose_substitution substitution_##func_name[] \ + __attribute__((section("__DATA, __interpose"))) = { \ + { reinterpret_cast(wrapper_name), \ + reinterpret_cast(func_name) } \ +} + +# define WRAP(x) wrap_##x +# define WRAPPER_NAME(x) "wrap_"#x +# define INTERCEPTOR_ATTRIBUTE +# define DECLARE_WRAPPER(ret_type, func, ...) + +#elif SANITIZER_WINDOWS +# define WRAP(x) __asan_wrap_##x +# define WRAPPER_NAME(x) "__asan_wrap_"#x +# define INTERCEPTOR_ATTRIBUTE __declspec(dllexport) +# define DECLARE_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__); +# define DECLARE_WRAPPER_WINAPI(ret_type, func, ...) \ + extern "C" __declspec(dllimport) ret_type __stdcall func(__VA_ARGS__); +#elif SANITIZER_RTEMS +# define WRAP(x) x +# define WRAPPER_NAME(x) #x +# define INTERCEPTOR_ATTRIBUTE +# define DECLARE_WRAPPER(ret_type, func, ...) +#elif SANITIZER_FREEBSD || SANITIZER_NETBSD +# define WRAP(x) __interceptor_ ## x +# define WRAPPER_NAME(x) "__interceptor_" #x +# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +// FreeBSD's dynamic linker (incompliantly) gives non-weak symbols higher +// priority than weak ones so weak aliases won't work for indirect calls +// in position-independent (-fPIC / -fPIE) mode. +# define DECLARE_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__) \ + __attribute__((alias("__interceptor_" #func), visibility("default"))); +#elif !SANITIZER_FUCHSIA +# define WRAP(x) __interceptor_ ## x +# define WRAPPER_NAME(x) "__interceptor_" #x +# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +# define DECLARE_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__) \ + __attribute__((weak, alias("__interceptor_" #func), visibility("default"))); +#endif + +#if SANITIZER_FUCHSIA +// There is no general interception at all on Fuchsia. +// Sanitizer runtimes just define functions directly to preempt them, +// and have bespoke ways to access the underlying libc functions. +# include +# define INTERCEPTOR_ATTRIBUTE __attribute__((visibility("default"))) +# define REAL(x) __unsanitized_##x +# define DECLARE_REAL(ret_type, func, ...) +#elif SANITIZER_RTEMS +# define REAL(x) __real_ ## x +# define DECLARE_REAL(ret_type, func, ...) \ + extern "C" ret_type REAL(func)(__VA_ARGS__); +#elif !SANITIZER_MAC +# define PTR_TO_REAL(x) real_##x +# define REAL(x) __interception::PTR_TO_REAL(x) +# define FUNC_TYPE(x) x##_type + +# define DECLARE_REAL(ret_type, func, ...) \ + typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + extern FUNC_TYPE(func) PTR_TO_REAL(func); \ + } +# define ASSIGN_REAL(dst, src) REAL(dst) = REAL(src) +#else // SANITIZER_MAC +# define REAL(x) x +# define DECLARE_REAL(ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__); +# define ASSIGN_REAL(x, y) +#endif // SANITIZER_MAC + +#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS +# define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) \ + DECLARE_REAL(ret_type, func, __VA_ARGS__) \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); +// Declare an interceptor and its wrapper defined in a different translation +// unit (ex. asm). +# define DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(ret_type, func, ...) \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); \ + extern "C" ret_type func(__VA_ARGS__); +#else +# define DECLARE_REAL_AND_INTERCEPTOR(ret_type, func, ...) +# define DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(ret_type, func, ...) +#endif + +// Generally, you don't need to use DEFINE_REAL by itself, as INTERCEPTOR +// macros does its job. In exceptional cases you may need to call REAL(foo) +// without defining INTERCEPTOR(..., foo, ...). For example, if you override +// foo with an interceptor for other function. +#if !SANITIZER_MAC && !SANITIZER_FUCHSIA && !SANITIZER_RTEMS +# define DEFINE_REAL(ret_type, func, ...) \ + typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + FUNC_TYPE(func) PTR_TO_REAL(func); \ + } +#else +# define DEFINE_REAL(ret_type, func, ...) +#endif + +#if SANITIZER_FUCHSIA + +// We need to define the __interceptor_func name just to get +// sanitizer_common/scripts/gen_dynamic_list.py to export func. +// But we don't need to export __interceptor_func to get that. +#define INTERCEPTOR(ret_type, func, ...) \ + extern "C"[[ gnu::alias(#func), gnu::visibility("hidden") ]] ret_type \ + __interceptor_##func(__VA_ARGS__); \ + extern "C" INTERCEPTOR_ATTRIBUTE ret_type func(__VA_ARGS__) + +#elif !SANITIZER_MAC + +#define INTERCEPTOR(ret_type, func, ...) \ + DEFINE_REAL(ret_type, func, __VA_ARGS__) \ + DECLARE_WRAPPER(ret_type, func, __VA_ARGS__) \ + extern "C" \ + INTERCEPTOR_ATTRIBUTE \ + ret_type WRAP(func)(__VA_ARGS__) + +// We don't need INTERCEPTOR_WITH_SUFFIX on non-Darwin for now. +#define INTERCEPTOR_WITH_SUFFIX(ret_type, func, ...) \ + INTERCEPTOR(ret_type, func, __VA_ARGS__) + +#else // SANITIZER_MAC + +#define INTERCEPTOR_ZZZ(suffix, ret_type, func, ...) \ + extern "C" ret_type func(__VA_ARGS__) suffix; \ + extern "C" ret_type WRAP(func)(__VA_ARGS__); \ + INTERPOSER(func); \ + extern "C" INTERCEPTOR_ATTRIBUTE ret_type WRAP(func)(__VA_ARGS__) + +#define INTERCEPTOR(ret_type, func, ...) \ + INTERCEPTOR_ZZZ(/*no symbol variants*/, ret_type, func, __VA_ARGS__) + +#define INTERCEPTOR_WITH_SUFFIX(ret_type, func, ...) \ + INTERCEPTOR_ZZZ(__DARWIN_ALIAS_C(func), ret_type, func, __VA_ARGS__) + +// Override |overridee| with |overrider|. +#define OVERRIDE_FUNCTION(overridee, overrider) \ + INTERPOSER_2(overridee, WRAP(overrider)) +#endif + +#if SANITIZER_WINDOWS +# define INTERCEPTOR_WINAPI(ret_type, func, ...) \ + typedef ret_type (__stdcall *FUNC_TYPE(func))(__VA_ARGS__); \ + namespace __interception { \ + FUNC_TYPE(func) PTR_TO_REAL(func); \ + } \ + extern "C" \ + INTERCEPTOR_ATTRIBUTE \ + ret_type __stdcall WRAP(func)(__VA_ARGS__) +#endif + +// ISO C++ forbids casting between pointer-to-function and pointer-to-object, +// so we use casting via an integral type __interception::uptr, +// assuming that system is POSIX-compliant. Using other hacks seem +// challenging, as we don't even pass function type to +// INTERCEPT_FUNCTION macro, only its name. +namespace __interception { +#if defined(_WIN64) +typedef unsigned long long uptr; +#else +typedef unsigned long uptr; +#endif // _WIN64 +} // namespace __interception + +#define INCLUDED_FROM_INTERCEPTION_LIB + +#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || \ + SANITIZER_OPENBSD || SANITIZER_SOLARIS + +# include "interception_linux.h" +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) +# define INTERCEPT_FUNCTION_VER(func, symver) \ + INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) +#elif SANITIZER_MAC +# include "interception_mac.h" +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_MAC(func) +# define INTERCEPT_FUNCTION_VER(func, symver) \ + INTERCEPT_FUNCTION_VER_MAC(func, symver) +#elif SANITIZER_WINDOWS +# include "interception_win.h" +# define INTERCEPT_FUNCTION(func) INTERCEPT_FUNCTION_WIN(func) +# define INTERCEPT_FUNCTION_VER(func, symver) \ + INTERCEPT_FUNCTION_VER_WIN(func, symver) +#endif + +#undef INCLUDED_FROM_INTERCEPTION_LIB + +#endif // INTERCEPTION_H diff --git a/lib/tsan/interception/interception_linux.cpp b/lib/tsan/interception/interception_linux.cpp new file mode 100644 index 0000000000..950cd51265 --- /dev/null +++ b/lib/tsan/interception/interception_linux.cpp @@ -0,0 +1,83 @@ +//===-- interception_linux.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Linux-specific interception methods. +//===----------------------------------------------------------------------===// + +#include "interception.h" + +#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || \ + SANITIZER_OPENBSD || SANITIZER_SOLARIS + +#include // for dlsym() and dlvsym() + +namespace __interception { + +#if SANITIZER_NETBSD +static int StrCmp(const char *s1, const char *s2) { + while (true) { + if (*s1 != *s2) + return false; + if (*s1 == 0) + return true; + s1++; + s2++; + } +} +#endif + +static void *GetFuncAddr(const char *name, uptr wrapper_addr) { +#if SANITIZER_NETBSD + // FIXME: Find a better way to handle renames + if (StrCmp(name, "sigaction")) + name = "__sigaction14"; +#endif + void *addr = dlsym(RTLD_NEXT, name); + if (!addr) { + // If the lookup using RTLD_NEXT failed, the sanitizer runtime library is + // later in the library search order than the DSO that we are trying to + // intercept, which means that we cannot intercept this function. We still + // want the address of the real definition, though, so look it up using + // RTLD_DEFAULT. + addr = dlsym(RTLD_DEFAULT, name); + + // In case `name' is not loaded, dlsym ends up finding the actual wrapper. + // We don't want to intercept the wrapper and have it point to itself. + if ((uptr)addr == wrapper_addr) + addr = nullptr; + } + return addr; +} + +bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func, + uptr wrapper) { + void *addr = GetFuncAddr(name, wrapper); + *ptr_to_real = (uptr)addr; + return addr && (func == wrapper); +} + +// Android and Solaris do not have dlvsym +#if !SANITIZER_ANDROID && !SANITIZER_SOLARIS && !SANITIZER_OPENBSD +static void *GetFuncAddr(const char *name, const char *ver) { + return dlvsym(RTLD_NEXT, name, ver); +} + +bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, + uptr func, uptr wrapper) { + void *addr = GetFuncAddr(name, ver); + *ptr_to_real = (uptr)addr; + return addr && (func == wrapper); +} +#endif // !SANITIZER_ANDROID + +} // namespace __interception + +#endif // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || + // SANITIZER_OPENBSD || SANITIZER_SOLARIS diff --git a/lib/tsan/interception/interception_linux.h b/lib/tsan/interception/interception_linux.h new file mode 100644 index 0000000000..e578da0cf6 --- /dev/null +++ b/lib/tsan/interception/interception_linux.h @@ -0,0 +1,53 @@ +//===-- interception_linux.h ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Linux-specific interception methods. +//===----------------------------------------------------------------------===// + +#if SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || \ + SANITIZER_OPENBSD || SANITIZER_SOLARIS + +#if !defined(INCLUDED_FROM_INTERCEPTION_LIB) +# error "interception_linux.h should be included from interception library only" +#endif + +#ifndef INTERCEPTION_LINUX_H +#define INTERCEPTION_LINUX_H + +namespace __interception { +bool InterceptFunction(const char *name, uptr *ptr_to_real, uptr func, + uptr wrapper); +bool InterceptFunction(const char *name, const char *ver, uptr *ptr_to_real, + uptr func, uptr wrapper); +} // namespace __interception + +#define INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) \ + ::__interception::InterceptFunction( \ + #func, \ + (::__interception::uptr *) & REAL(func), \ + (::__interception::uptr) & (func), \ + (::__interception::uptr) & WRAP(func)) + +// Android, Solaris and OpenBSD do not have dlvsym +#if !SANITIZER_ANDROID && !SANITIZER_SOLARIS && !SANITIZER_OPENBSD +#define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \ + ::__interception::InterceptFunction( \ + #func, symver, \ + (::__interception::uptr *) & REAL(func), \ + (::__interception::uptr) & (func), \ + (::__interception::uptr) & WRAP(func)) +#else +#define INTERCEPT_FUNCTION_VER_LINUX_OR_FREEBSD(func, symver) \ + INTERCEPT_FUNCTION_LINUX_OR_FREEBSD(func) +#endif // !SANITIZER_ANDROID && !SANITIZER_SOLARIS + +#endif // INTERCEPTION_LINUX_H +#endif // SANITIZER_LINUX || SANITIZER_FREEBSD || SANITIZER_NETBSD || + // SANITIZER_OPENBSD || SANITIZER_SOLARIS diff --git a/lib/tsan/interception/interception_mac.cpp b/lib/tsan/interception/interception_mac.cpp new file mode 100644 index 0000000000..fb6eadcff5 --- /dev/null +++ b/lib/tsan/interception/interception_mac.cpp @@ -0,0 +1,18 @@ +//===-- interception_mac.cpp ------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Mac-specific interception methods. +//===----------------------------------------------------------------------===// + +#include "interception.h" + +#if SANITIZER_MAC + +#endif // SANITIZER_MAC diff --git a/lib/tsan/interception/interception_mac.h b/lib/tsan/interception/interception_mac.h new file mode 100644 index 0000000000..eddedb8959 --- /dev/null +++ b/lib/tsan/interception/interception_mac.h @@ -0,0 +1,27 @@ +//===-- interception_mac.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Mac-specific interception methods. +//===----------------------------------------------------------------------===// + +#if SANITIZER_MAC + +#if !defined(INCLUDED_FROM_INTERCEPTION_LIB) +# error "interception_mac.h should be included from interception.h only" +#endif + +#ifndef INTERCEPTION_MAC_H +#define INTERCEPTION_MAC_H + +#define INTERCEPT_FUNCTION_MAC(func) +#define INTERCEPT_FUNCTION_VER_MAC(func, symver) + +#endif // INTERCEPTION_MAC_H +#endif // SANITIZER_MAC diff --git a/lib/tsan/interception/interception_type_test.cpp b/lib/tsan/interception/interception_type_test.cpp new file mode 100644 index 0000000000..a611604a70 --- /dev/null +++ b/lib/tsan/interception/interception_type_test.cpp @@ -0,0 +1,39 @@ +//===-- interception_type_test.cpp ------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Compile-time tests of the internal type definitions. +//===----------------------------------------------------------------------===// + +#include "interception.h" + +#if SANITIZER_LINUX || SANITIZER_MAC + +#include +#include +#include + +COMPILER_CHECK(sizeof(::SIZE_T) == sizeof(size_t)); +COMPILER_CHECK(sizeof(::SSIZE_T) == sizeof(ssize_t)); +COMPILER_CHECK(sizeof(::PTRDIFF_T) == sizeof(ptrdiff_t)); +COMPILER_CHECK(sizeof(::INTMAX_T) == sizeof(intmax_t)); + +#if !SANITIZER_MAC +COMPILER_CHECK(sizeof(::OFF64_T) == sizeof(off64_t)); +#endif + +// The following are the cases when pread (and friends) is used instead of +// pread64. In those cases we need OFF_T to match off_t. We don't care about the +// rest (they depend on _FILE_OFFSET_BITS setting when building an application). +# if SANITIZER_ANDROID || !defined _FILE_OFFSET_BITS || \ + _FILE_OFFSET_BITS != 64 +COMPILER_CHECK(sizeof(::OFF_T) == sizeof(off_t)); +# endif + +#endif diff --git a/lib/tsan/interception/interception_win.cpp b/lib/tsan/interception/interception_win.cpp new file mode 100644 index 0000000000..1a1c327e61 --- /dev/null +++ b/lib/tsan/interception/interception_win.cpp @@ -0,0 +1,1022 @@ +//===-- interception_linux.cpp ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is a part of AddressSanitizer, an address sanity checker. +// +// Windows-specific interception methods. +// +// This file is implementing several hooking techniques to intercept calls +// to functions. The hooks are dynamically installed by modifying the assembly +// code. +// +// The hooking techniques are making assumptions on the way the code is +// generated and are safe under these assumptions. +// +// On 64-bit architecture, there is no direct 64-bit jump instruction. To allow +// arbitrary branching on the whole memory space, the notion of trampoline +// region is used. A trampoline region is a memory space withing 2G boundary +// where it is safe to add custom assembly code to build 64-bit jumps. +// +// Hooking techniques +// ================== +// +// 1) Detour +// +// The Detour hooking technique is assuming the presence of an header with +// padding and an overridable 2-bytes nop instruction (mov edi, edi). The +// nop instruction can safely be replaced by a 2-bytes jump without any need +// to save the instruction. A jump to the target is encoded in the function +// header and the nop instruction is replaced by a short jump to the header. +// +// head: 5 x nop head: jmp +// func: mov edi, edi --> func: jmp short +// [...] real: [...] +// +// This technique is only implemented on 32-bit architecture. +// Most of the time, Windows API are hookable with the detour technique. +// +// 2) Redirect Jump +// +// The redirect jump is applicable when the first instruction is a direct +// jump. The instruction is replaced by jump to the hook. +// +// func: jmp