diff --git a/src/coreclr/nativeaot/Runtime/CMakeLists.txt b/src/coreclr/nativeaot/Runtime/CMakeLists.txt index 3ad3ceba7c8f0e..20f8430d048786 100644 --- a/src/coreclr/nativeaot/Runtime/CMakeLists.txt +++ b/src/coreclr/nativeaot/Runtime/CMakeLists.txt @@ -175,6 +175,7 @@ else() unix/cgroup.cpp unix/HardwareExceptions.cpp unix/UnixContext.cpp + unix/UnixSignals.cpp unix/UnwindHelpers.cpp unix/UnixNativeCodeManager.cpp ../libunwind/src/Unwind-EHABI.cpp diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index fe2a4fe6a6b1bd..3477ba1f932ca2 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -28,7 +28,7 @@ enum GCRefKind : unsigned char GCRK_Scalar = 0x00, GCRK_Object = 0x01, GCRK_Byref = 0x02, -#ifdef TARGET_ARM64 +#ifdef TARGET_64BIT // Composite return kinds for value types returned in two registers (encoded with two bits per register) GCRK_Scalar_Obj = (GCRK_Object << 2) | GCRK_Scalar, GCRK_Obj_Obj = (GCRK_Object << 2) | GCRK_Object, @@ -66,6 +66,35 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) return returnKind; } +#elif defined(TARGET_AMD64) + +// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back +C_ASSERT(PTFF_RAX_IS_GCREF == ((uint64_t)GCRK_Object << 16)); +C_ASSERT(PTFF_RAX_IS_BYREF == ((uint64_t)GCRK_Byref << 16)); +C_ASSERT(PTFF_RDX_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 16)); +C_ASSERT(PTFF_RDX_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 16)); + +inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) +{ + if (returnKind == GCRK_Scalar) + return 0; + + return PTFF_SAVE_RAX | PTFF_SAVE_RDX | ((uint64_t)returnKind << 16); +} + +inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) +{ + GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_RAX_IS_GCREF | PTFF_RAX_IS_BYREF | PTFF_RDX_IS_GCREF | PTFF_RDX_IS_BYREF)) >> 16); +#if defined(TARGET_UNIX) + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_RAX) && (transFrameFlags & PTFF_SAVE_RDX))); +#else + ASSERT((returnKind == GCRK_Scalar) || (transFrameFlags & PTFF_SAVE_RAX)); +#endif + return returnKind; +} + +#endif + // Extract individual GCRefKind components from a composite return kind inline GCRefKind ExtractReg0ReturnKind(GCRefKind returnKind) { @@ -78,7 +107,6 @@ inline GCRefKind ExtractReg1ReturnKind(GCRefKind returnKind) ASSERT(returnKind <= GCRK_LastValid); return (GCRefKind)(returnKind >> 2); } -#endif // TARGET_ARM64 // // MethodInfo is placeholder type used to allocate space for MethodInfo. Maximum size @@ -162,6 +190,8 @@ class ICodeManager virtual uintptr_t GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet) = 0; + virtual bool IsUnwindable(PTR_VOID pvAddress) = 0; + virtual bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 82293920996ee1..5e8751f694544e 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -596,8 +596,6 @@ REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetCompleteThreadContext(HANDLE hThread REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalSetThreadContext(HANDLE hThread, _Out_ CONTEXT * pCtx); REDHAWK_PALIMPORT void REDHAWK_PALAPI PalRestoreContext(CONTEXT * pCtx); -REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx); - REDHAWK_PALIMPORT int32_t REDHAWK_PALAPI PalGetProcessCpuCount(); // Retrieves the entire range of memory dedicated to the calling thread's stack. This does @@ -683,6 +681,13 @@ REDHAWK_PALIMPORT uint64_t REDHAWK_PALAPI PalGetTickCount64(); REDHAWK_PALIMPORT void REDHAWK_PALAPI PalTerminateCurrentProcess(uint32_t exitCode); REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer); +#ifdef TARGET_UNIX +struct UNIX_CONTEXT; +#define NATIVE_CONTEXT UNIX_CONTEXT +#else +#define NATIVE_CONTEXT CONTEXT +#endif + #ifdef TARGET_UNIX REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSetHardwareExceptionHandler(PHARDWARE_EXCEPTION_HANDLER handler); #else @@ -693,8 +698,9 @@ typedef uint32_t (__stdcall *BackgroundCallback)(_In_opt_ void* pCallbackContext REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartBackgroundGCThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartFinalizerThread(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext); -typedef UInt32_BOOL (*PalHijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext); -REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext); +typedef void (*PalHijackCallback)(_In_ NATIVE_CONTEXT* pThreadContext, _In_opt_ void* pThreadToHijack); +REDHAWK_PALIMPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback); #ifdef FEATURE_ETW REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalEventEnabled(REGHANDLE regHandle, _In_ const EVENT_DESCRIPTOR* eventDescriptor); diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 5a3197ff649c62..39fb367410c341 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -27,6 +27,10 @@ #include "RuntimeInstance.h" #include "rhbinder.h" +#ifdef TARGET_UNIX +#include "UnixContext.h" +#endif + // warning C4061: enumerator '{blah}' in switch of enum '{blarg}' is not explicitly handled by a case label #pragma warning(disable:4061) @@ -93,13 +97,11 @@ StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransition STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ GC ]\n"); ASSERT(!pThreadToWalk->DangerousCrossThreadIsHijacked()); -#ifdef FEATURE_SUSPEND_REDIRECTION - if (pInitialTransitionFrame == REDIRECTED_THREAD_MARKER) + if (pInitialTransitionFrame == INTERRUPTED_THREAD_MARKER) { - InternalInit(pThreadToWalk, pThreadToWalk->GetRedirectionContext(), GcStackWalkFlags | ActiveStackFrame); + InternalInit(pThreadToWalk, pThreadToWalk->GetInterruptedContext(), GcStackWalkFlags | ActiveStackFrame); } else -#endif { InternalInit(pThreadToWalk, pInitialTransitionFrame, GcStackWalkFlags); } @@ -109,7 +111,14 @@ StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PInvokeTransition StackFrameIterator::StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx) { - STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init---- [ hijack ]\n"); + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init with limited ctx---- [ hijack ]\n"); + InternalInit(pThreadToWalk, pCtx, 0); + PrepareToYieldFrame(); +} + +StackFrameIterator::StackFrameIterator(Thread* pThreadToWalk, NATIVE_CONTEXT* pCtx) +{ + STRESS_LOG0(LF_STACKWALK, LL_INFO10000, "----Init with native ctx---- [ hijack ]\n"); InternalInit(pThreadToWalk, pCtx, 0); PrepareToYieldFrame(); } @@ -283,15 +292,11 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF if (pFrame->m_Flags & PTFF_SAVE_R11) { m_RegDisplay.pR11 = pPreservedRegsCursor++; } #endif // TARGET_AMD64 - if (pFrame->m_Flags & PTFF_RAX_IS_GCREF) - { - m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax; - m_HijackedReturnValueKind = GCRK_Object; - } - if (pFrame->m_Flags & PTFF_RAX_IS_BYREF) + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) { - m_pHijackedReturnValue = (PTR_RtuObjectRef) m_RegDisplay.pRax; - m_HijackedReturnValueKind = GCRK_Byref; + m_pHijackedReturnValue = (PTR_RtuObjectRef)m_RegDisplay.pRax; + m_HijackedReturnValueKind = retValueKind; } #endif // TARGET_ARM @@ -508,10 +513,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO #endif // TARGET_ARM } -// Prepare to start a stack walk from the context listed in the supplied CONTEXT. -// The supplied context can describe a location in either managed or unmanaged code. In the -// latter case the iterator is left in an invalid state when this function returns. -void StackFrameIterator::InternalInit(Thread * pThreadToWalk, CONTEXT* pCtx, uint32_t dwFlags) +// Prepare to start a stack walk from the context listed in the supplied NATIVE_CONTEXT. +// The supplied context can describe a location in managed code. +void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags) { ASSERT((dwFlags & MethodStateCalculated) == 0); @@ -524,10 +528,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, CONTEXT* pCtx, uin // properly walk it in parallel. ResetNextExInfoForSP(pCtx->GetSp()); - // This codepath is used by the hijack stackwalk and we can get arbitrary ControlPCs from there. If this - // context has a non-managed control PC, then we're done. - if (!m_pInstance->IsManaged(dac_cast(pCtx->GetIp()))) - return; + // This codepath is used by the hijack stackwalk. The IP must be in managed code. + ASSERT(m_pInstance->IsManaged(dac_cast(pCtx->GetIp()))); // // control state @@ -536,82 +538,90 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, CONTEXT* pCtx, uin m_RegDisplay.SP = pCtx->GetSp(); m_RegDisplay.IP = pCtx->GetIp(); +#ifdef TARGET_UNIX +#define PTR_TO_REG(ptr, reg) (&((ptr)->reg())) +#else +#define PTR_TO_REG(ptr, reg) (&((ptr)->reg)) +#endif + #ifdef TARGET_ARM64 - m_RegDisplay.pIP = PTR_TO_MEMBER(CONTEXT, pCtx, Pc); + m_RegDisplay.pIP = (PTR_PCODE)PTR_TO_REG(pCtx, Pc); // // preserved regs // - m_RegDisplay.pX19 = PTR_TO_MEMBER(CONTEXT, pCtx, X19); - m_RegDisplay.pX20 = PTR_TO_MEMBER(CONTEXT, pCtx, X20); - m_RegDisplay.pX21 = PTR_TO_MEMBER(CONTEXT, pCtx, X21); - m_RegDisplay.pX22 = PTR_TO_MEMBER(CONTEXT, pCtx, X22); - m_RegDisplay.pX23 = PTR_TO_MEMBER(CONTEXT, pCtx, X23); - m_RegDisplay.pX24 = PTR_TO_MEMBER(CONTEXT, pCtx, X24); - m_RegDisplay.pX25 = PTR_TO_MEMBER(CONTEXT, pCtx, X25); - m_RegDisplay.pX26 = PTR_TO_MEMBER(CONTEXT, pCtx, X26); - m_RegDisplay.pX27 = PTR_TO_MEMBER(CONTEXT, pCtx, X27); - m_RegDisplay.pX28 = PTR_TO_MEMBER(CONTEXT, pCtx, X28); - m_RegDisplay.pFP = PTR_TO_MEMBER(CONTEXT, pCtx, Fp); - m_RegDisplay.pLR = PTR_TO_MEMBER(CONTEXT, pCtx, Lr); + m_RegDisplay.pX19 = (PTR_UIntNative)PTR_TO_REG(pCtx, X19); + m_RegDisplay.pX20 = (PTR_UIntNative)PTR_TO_REG(pCtx, X20); + m_RegDisplay.pX21 = (PTR_UIntNative)PTR_TO_REG(pCtx, X21); + m_RegDisplay.pX22 = (PTR_UIntNative)PTR_TO_REG(pCtx, X22); + m_RegDisplay.pX23 = (PTR_UIntNative)PTR_TO_REG(pCtx, X23); + m_RegDisplay.pX24 = (PTR_UIntNative)PTR_TO_REG(pCtx, X24); + m_RegDisplay.pX25 = (PTR_UIntNative)PTR_TO_REG(pCtx, X25); + m_RegDisplay.pX26 = (PTR_UIntNative)PTR_TO_REG(pCtx, X26); + m_RegDisplay.pX27 = (PTR_UIntNative)PTR_TO_REG(pCtx, X27); + m_RegDisplay.pX28 = (PTR_UIntNative)PTR_TO_REG(pCtx, X28); + m_RegDisplay.pFP = (PTR_UIntNative)PTR_TO_REG(pCtx, Fp); + m_RegDisplay.pLR = (PTR_UIntNative)PTR_TO_REG(pCtx, Lr); // // scratch regs // - m_RegDisplay.pX0 = PTR_TO_MEMBER(CONTEXT, pCtx, X0); - m_RegDisplay.pX1 = PTR_TO_MEMBER(CONTEXT, pCtx, X1); - m_RegDisplay.pX2 = PTR_TO_MEMBER(CONTEXT, pCtx, X2); - m_RegDisplay.pX3 = PTR_TO_MEMBER(CONTEXT, pCtx, X3); - m_RegDisplay.pX4 = PTR_TO_MEMBER(CONTEXT, pCtx, X4); - m_RegDisplay.pX5 = PTR_TO_MEMBER(CONTEXT, pCtx, X5); - m_RegDisplay.pX6 = PTR_TO_MEMBER(CONTEXT, pCtx, X6); - m_RegDisplay.pX7 = PTR_TO_MEMBER(CONTEXT, pCtx, X7); - m_RegDisplay.pX8 = PTR_TO_MEMBER(CONTEXT, pCtx, X8); - m_RegDisplay.pX9 = PTR_TO_MEMBER(CONTEXT, pCtx, X9); - m_RegDisplay.pX10 = PTR_TO_MEMBER(CONTEXT, pCtx, X10); - m_RegDisplay.pX11 = PTR_TO_MEMBER(CONTEXT, pCtx, X11); - m_RegDisplay.pX12 = PTR_TO_MEMBER(CONTEXT, pCtx, X12); - m_RegDisplay.pX13 = PTR_TO_MEMBER(CONTEXT, pCtx, X13); - m_RegDisplay.pX14 = PTR_TO_MEMBER(CONTEXT, pCtx, X14); - m_RegDisplay.pX15 = PTR_TO_MEMBER(CONTEXT, pCtx, X15); - m_RegDisplay.pX16 = PTR_TO_MEMBER(CONTEXT, pCtx, X16); - m_RegDisplay.pX17 = PTR_TO_MEMBER(CONTEXT, pCtx, X17); - m_RegDisplay.pX18 = PTR_TO_MEMBER(CONTEXT, pCtx, X18); + m_RegDisplay.pX0 = (PTR_UIntNative)PTR_TO_REG(pCtx, X0); + m_RegDisplay.pX1 = (PTR_UIntNative)PTR_TO_REG(pCtx, X1); + m_RegDisplay.pX2 = (PTR_UIntNative)PTR_TO_REG(pCtx, X2); + m_RegDisplay.pX3 = (PTR_UIntNative)PTR_TO_REG(pCtx, X3); + m_RegDisplay.pX4 = (PTR_UIntNative)PTR_TO_REG(pCtx, X4); + m_RegDisplay.pX5 = (PTR_UIntNative)PTR_TO_REG(pCtx, X5); + m_RegDisplay.pX6 = (PTR_UIntNative)PTR_TO_REG(pCtx, X6); + m_RegDisplay.pX7 = (PTR_UIntNative)PTR_TO_REG(pCtx, X7); + m_RegDisplay.pX8 = (PTR_UIntNative)PTR_TO_REG(pCtx, X8); + m_RegDisplay.pX9 = (PTR_UIntNative)PTR_TO_REG(pCtx, X9); + m_RegDisplay.pX10 = (PTR_UIntNative)PTR_TO_REG(pCtx, X10); + m_RegDisplay.pX11 = (PTR_UIntNative)PTR_TO_REG(pCtx, X11); + m_RegDisplay.pX12 = (PTR_UIntNative)PTR_TO_REG(pCtx, X12); + m_RegDisplay.pX13 = (PTR_UIntNative)PTR_TO_REG(pCtx, X13); + m_RegDisplay.pX14 = (PTR_UIntNative)PTR_TO_REG(pCtx, X14); + m_RegDisplay.pX15 = (PTR_UIntNative)PTR_TO_REG(pCtx, X15); + m_RegDisplay.pX16 = (PTR_UIntNative)PTR_TO_REG(pCtx, X16); + m_RegDisplay.pX17 = (PTR_UIntNative)PTR_TO_REG(pCtx, X17); + m_RegDisplay.pX18 = (PTR_UIntNative)PTR_TO_REG(pCtx, X18); #elif defined(TARGET_X86) || defined(TARGET_AMD64) - m_RegDisplay.pIP = (PTR_PCODE)PTR_TO_MEMBER(CONTEXT, pCtx, Rip); + m_RegDisplay.pIP = (PTR_PCODE)PTR_TO_REG(pCtx, Rip); // // preserved regs // - m_RegDisplay.pRbp = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rbp); - m_RegDisplay.pRsi = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rsi); - m_RegDisplay.pRdi = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rdi); - m_RegDisplay.pRbx = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rbx); + m_RegDisplay.pRbp = (PTR_UIntNative)PTR_TO_REG(pCtx, Rbp); + m_RegDisplay.pRsi = (PTR_UIntNative)PTR_TO_REG(pCtx, Rsi); + m_RegDisplay.pRdi = (PTR_UIntNative)PTR_TO_REG(pCtx, Rdi); + m_RegDisplay.pRbx = (PTR_UIntNative)PTR_TO_REG(pCtx, Rbx); #ifdef TARGET_AMD64 - m_RegDisplay.pR12 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R12); - m_RegDisplay.pR13 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R13); - m_RegDisplay.pR14 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R14); - m_RegDisplay.pR15 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R15); + m_RegDisplay.pR12 = (PTR_UIntNative)PTR_TO_REG(pCtx, R12); + m_RegDisplay.pR13 = (PTR_UIntNative)PTR_TO_REG(pCtx, R13); + m_RegDisplay.pR14 = (PTR_UIntNative)PTR_TO_REG(pCtx, R14); + m_RegDisplay.pR15 = (PTR_UIntNative)PTR_TO_REG(pCtx, R15); #endif // TARGET_AMD64 // // scratch regs // - m_RegDisplay.pRax = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rax); - m_RegDisplay.pRcx = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rcx); - m_RegDisplay.pRdx = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, Rdx); + m_RegDisplay.pRax = (PTR_UIntNative)PTR_TO_REG(pCtx, Rax); + m_RegDisplay.pRcx = (PTR_UIntNative)PTR_TO_REG(pCtx, Rcx); + m_RegDisplay.pRdx = (PTR_UIntNative)PTR_TO_REG(pCtx, Rdx); #ifdef TARGET_AMD64 - m_RegDisplay.pR8 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R8); - m_RegDisplay.pR9 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R9); - m_RegDisplay.pR10 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R10); - m_RegDisplay.pR11 = (PTR_UIntNative)PTR_TO_MEMBER(CONTEXT, pCtx, R11); + m_RegDisplay.pR8 = (PTR_UIntNative)PTR_TO_REG(pCtx, R8); + m_RegDisplay.pR9 = (PTR_UIntNative)PTR_TO_REG(pCtx, R9); + m_RegDisplay.pR10 = (PTR_UIntNative)PTR_TO_REG(pCtx, R10); + m_RegDisplay.pR11 = (PTR_UIntNative)PTR_TO_REG(pCtx, R11); #endif // TARGET_AMD64 #else PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); #endif // TARGET_ARM + +#undef PTR_TO_REG } PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index ca0e48168db2f4..2e87767cb22058 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -34,6 +34,7 @@ class StackFrameIterator public: StackFrameIterator() {} StackFrameIterator(Thread * pThreadToWalk, PInvokeTransitionFrame* pInitialTransitionFrame); + StackFrameIterator(Thread* pThreadToWalk, NATIVE_CONTEXT* pCtx); StackFrameIterator(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx); bool IsValid(); @@ -82,7 +83,7 @@ class StackFrameIterator void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, uint32_t dwFlags); // GC stackwalk void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, uint32_t dwFlags); // EH and hijack stackwalk, and collided unwind - void InternalInit(Thread * pThreadToWalk, CONTEXT* pCtx, uint32_t dwFlags); // GC stackwalk of redirected thread + void InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags); // GC stackwalk of redirected thread void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk void InternalInitForStackTrace(); // Environment.StackTrace diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc index 622546d8a35278..cb1f9830eb89aa 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmMacros.inc @@ -365,11 +365,11 @@ PTFF_SAVE_R14 equ 00000040h PTFF_SAVE_R15 equ 00000080h PTFF_SAVE_ALL_PRESERVED equ 000000F7h ;; NOTE: RBP is not included in this set! PTFF_SAVE_RSP equ 00008000h -PTFF_SAVE_RAX equ 00000100h ;; RAX is saved if it contains a GC ref and we're in hijack handler +PTFF_SAVE_RAX equ 00000100h ;; RAX is saved in hijack handler - in case it contains a GC ref PTFF_SAVE_ALL_SCRATCH equ 00007F00h PTFF_RAX_IS_GCREF equ 00010000h ;; iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar PTFF_RAX_IS_BYREF equ 00020000h ;; iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar -PTFF_THREAD_ABORT equ 00040000h ;; indicates that ThreadAbortException should be thrown when returning from the transition +PTFF_THREAD_ABORT equ 00100000h ;; indicates that ThreadAbortException should be thrown when returning from the transition ;; These must match the TrapThreadsFlags enum TrapThreadsFlags_None equ 0 diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S index 04f0294ec1fa59..810c7e35b90cd2 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.S @@ -2,8 +2,210 @@ // The .NET Foundation licenses this file to you under the MIT license. .intel_syntax noprefix +#include // generated by the build from AsmOffsets.cpp #include +// +// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves RAX/RDX and accepts the register +// bitmask in RCX +// +// On entry: +// - BITMASK: bitmask describing pushes, may be volatile register or constant value +// - RAX: managed function return value, may be an object or byref +// - preserved regs: need to stay preserved, may contain objects or byrefs +// +// INVARIANTS +// - The macro assumes it is called from a prolog, prior to a frame pointer being setup. +// - All preserved registers remain unchanged from their values in managed code. +// +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + push_register rdx // save RDX, it might contain an objectref + push_register rax // save RAX, it might contain an objectref + lea \trashReg, [rsp + 0x18] + push_register \trashReg // save caller`s RSP + push_nonvol_reg r15 // save preserved registers + push_nonvol_reg r14 // .. + push_nonvol_reg r13 // .. + push_nonvol_reg r12 // .. + push_nonvol_reg rbx // .. + push_register \BITMASK // save the register bitmask passed in by caller + push_register \threadReg // Thread * (unused by stackwalker) + push_nonvol_reg rbp // save caller`s RBP + mov \trashReg, [rsp + 11*8] // Find the return address + push_register \trashReg // save m_RIP + lea \trashReg, [rsp + 0] // trashReg == address of frame + + // allocate space for xmm0, xmm1 and alignment + alloc_stack 0x28 + + // save xmm0 and xmm1 in case they are used as return values + movdqa [rsp + 0x10], xmm0 + movdqa [rsp + 0] , xmm1 + + // link the frame into the Thread + mov [\threadReg + OFFSETOF__Thread__m_pDeferredTransitionFrame], \trashReg +.endm + +// +// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +// registers and return value to their values from before the probe was called (while also updating any +// object refs or byrefs). +.macro POP_PROBE_FRAME + movdqa xmm1, [rsp + 0] + movdqa xmm0, [rsp + 0x10] + add rsp, 0x28 + 8 // skip xmm0, xmm1 and discard RIP + pop rbp + pop rax // discard Thread* + pop rax // discard BITMASK + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + pop rax // discard caller RSP + pop rax + pop rdx +.endm + +// +// Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this +// thread if it finds it at an IP that isn`t managed code. +// +// Register state on entry: +// R11: thread pointer +// +// Register state on exit: +// R9: trashed +// +.macro ClearHijackState + xor r9, r9 + mov [r11 + OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation], r9 + mov [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress], r9 + mov [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags], r9 +.endm + + +// +// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +// clears the hijack state. +// +// Register state on entry: +// All registers correct for return to the original return address. +// +// Register state on exit: +// R11: thread pointer +// RCX: return value flags +// RAX, RDX preserved, other volatile regs trashed +// +.macro FixupHijackedCallstack + // preserve RAX, RDX as they may contain retuvalues + push rax + push rdx + + // rax = GetThread(), makes nested calls + INLINE_GETTHREAD + mov r11, rax + + pop rdx + pop rax + + // + // Fix the stack by pushing the original return address + // + mov rcx, [r11 + OFFSETOF__Thread__m_pvHijackedReturnAddress] + push rcx + + mov rcx, [r11 + OFFSETOF__Thread__m_uHijackedReturnValueFlags] + + ClearHijackState +.endm + +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// +// RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion +// +// +// INPUT: RDI: transition frame +// +// TRASHES: RCX, RDI, R8, R9, R10, R11 +// +////////////////////////////////////////////////////////////////////////////////////////////////////////////// +NESTED_ENTRY RhpWaitForGCNoAbort, _TEXT, NoHandler + END_PROLOGUE + + mov rdx, [rdi + OFFSETOF__PInvokeTransitionFrame__m_pThread] + + test dword ptr [rdx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_DoNotTriggerGc + jnz Done + + // passing transition frame pointer in rdi + call C_FUNC(RhpWaitForGC2) + +Done: + ret + +NESTED_END RhpWaitForGCNoAbort, _TEXT + +// +// Set the Thread state and wait for a GC to complete. +// +// Register state on entry: +// RBX: thread pointer +// +// Register state on exit: +// RBX: thread pointer +// All other registers trashed +// + +.macro WaitForGCCompletion + test dword ptr [rbx + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc + jnz LOCAL_LABEL(NoWait) + + mov rdi, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + call C_FUNC(RhpWaitForGCNoAbort) +LOCAL_LABEL(NoWait): + +.endm + +// +// +// +// GC Probe Hijack target +// +// + +NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler + END_PROLOGUE + FixupHijackedCallstack + or ecx, DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_RAX + PTFF_SAVE_RDX + jmp C_FUNC(RhpGcProbe) +NESTED_END RhpGcProbeHijack, _TEXT + +NESTED_ENTRY RhpGcProbe, _TEXT, NoHandler + test dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_TrapThreads + jnz LOCAL_LABEL(RhpGcProbe_Trap) + ret +LOCAL_LABEL(RhpGcProbe_Trap): + PUSH_PROBE_FRAME r11, rax, rcx + END_PROLOGUE + + mov rbx, r11 + WaitForGCCompletion + + mov rax, [rbx + OFFSETOF__Thread__m_pDeferredTransitionFrame] + test dword ptr [rax + OFFSETOF__PInvokeTransitionFrame__m_Flags], PTFF_THREAD_ABORT + jnz LOCAL_LABEL(Abort) + POP_PROBE_FRAME + ret +LOCAL_LABEL(Abort): + POP_PROBE_FRAME + mov rcx, STATUS_REDHAWK_THREAD_ABORT + pop rdx // return address as exception RIP + jmp C_FUNC(RhpThrowHwEx) // Throw the ThreadAbortException as a special kind of hardware exception + +NESTED_END RhpGcProbe, _TEXT + + LEAF_ENTRY RhpGcPoll, _TEXT cmp dword ptr [C_VAR(RhpTrapThreads)], TrapThreadsFlags_None jne LOCAL_LABEL(RhpGcPoll_RarePath) diff --git a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm index ffb26f305a1996..2a83258629bd58 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm @@ -531,269 +531,6 @@ EXTERN g_fHasFastFxsave : BYTE FXSAVE_SIZE equ 512 -;; Trap to GC. -;; Set up the P/Invoke transition frame with the return address as the safe point. -;; All registers, both volatile and non-volatile, are preserved. -;; The function should be called not jumped because it's expecting the return address -NESTED_ENTRY RhpTrapToGC, _TEXT - - sizeof_OutgoingScratchSpace equ 20h - sizeof_PInvokeFrame equ OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + 15*8 - sizeof_XmmAlignPad equ 8 - sizeof_XmmSave equ FXSAVE_SIZE - sizeof_MachineFrame equ 6*8 - sizeof_InitialPushedArgs equ 2*8 ;; eflags, return value - sizeof_FixedFrame equ sizeof_OutgoingScratchSpace + sizeof_PInvokeFrame + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame - - ;; On the stack on entry: - ;; [rsp ] -> Return address - - ;; save eflags before we trash them - pushfq - - ;; What we want to get to: - ;; - ;; [rsp ] -> outgoing scratch area - ;; - ;; [rsp + 20] -> m_RIP -------| - ;; [rsp + 28] -> m_FramePointer | - ;; [rsp + 30] -> m_pThread | - ;; [rsp + 38] -> m_Flags / m_dwAlignPad2 | - ;; [rsp + 40] -> rbx save | - ;; [rsp + 48] -> rsi save | - ;; [rsp + 50] -> rdi save | - ;; [rsp + 58] -> r12 save | - ;; [rsp + 60] -> r13 save | - ;; [rsp + 68] -> r14 save | PInvokeTransitionFrame - ;; [rsp + 70] -> r15 save | - ;; [rsp + 78] -> rsp save | - ;; [rsp + 80] -> rax save | - ;; [rsp + 88] -> rcx save | - ;; [rsp + 90] -> rdx save | - ;; [rsp + 98] -> r8 save | - ;; [rsp + a0] -> r9 save | - ;; [rsp + a8] -> r10 save | - ;; [rsp + b0] -> r11 save -------| - ;; - ;; [rsp + b8] -> [XmmAlignPad] - ;; - ;; [rsp + c0] -> FXSAVE area - ;; - ;; [rsp +2c0] | RIP | - ;; [rsp +2c8] | CS | - ;; [rsp +2d0] | EFLAGS | <-- 'machine frame' - ;; [rsp +2d8] | RSP | - ;; [rsp +2e0] | SS | - ;; [rsp +2e8] | padding | - ;; - ;; [rsp +2f0] [PSP] - ;; [rsp +2f8] [optional stack alignment] - ;; - ;; [PSP - 10] -> eflags save - ;; [PSP - 8] -> Return address - ;; [PSP] -> caller's frame - - test rsp, 0Fh - jz AlreadyAligned - - sub rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 ; +8 to save PSP, - push r11 ; save incoming R11 into save location - lea r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 8 + sizeof_InitialPushedArgs] - jmp PspCalculated - - AlreadyAligned: - - sub rsp, sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 ; +8 to save RSP, +8 to re-align PSP, - push r11 ; save incoming R11 into save location - lea r11, [rsp + 8 + sizeof_XmmAlignPad + sizeof_XmmSave + sizeof_MachineFrame + 16 + sizeof_InitialPushedArgs] - - PspCalculated: - push r10 ; save incoming R10 into save location - xor r10d, r10d - - ;; - ;; Populate the 'machine frame' in the diagram above. We have only pushed up to the 'r10 save', so we have not - ;; yet pushed 0xA8 bytes of that diagram. - ;; - ;; [rsp + {offset-in-target-frame-layout-diagram} - {as-yet-unpushed-stack-size}] - mov [rsp + 2c0h - 0a8h], r10 ; init RIP to zero - mov [rsp + 2c8h - 0a8h], r10 ; init CS to zero - mov [rsp + 2d0h - 0a8h], r10 ; init EFLAGS to zero - mov [rsp + 2d8h - 0a8h], r11 ; save PSP in the 'machine frame' - mov [rsp + 2e0h - 0a8h], r10 ; init SS to zero - mov [rsp + 2f0h - 0a8h], r11 ; save PSP - - .pushframe - .allocstack sizeof_XmmAlignPad + sizeof_XmmSave + 2*8 ;; only 2 of the regs from the PInvokeTransitionFrame are on the stack - - push_vol_reg r9 - push_vol_reg r8 - push_vol_reg rdx - push_vol_reg rcx - push_vol_reg rax - push_vol_reg r11 ; PSP gets saved into the PInvokeTransitionFrame - push_nonvol_reg r15 - push_nonvol_reg r14 - push_nonvol_reg r13 - push_nonvol_reg r12 - push_nonvol_reg rdi - push_nonvol_reg rsi - push_nonvol_reg rbx - push_vol_reg PROBE_SAVE_FLAGS_EVERYTHING ; m_Flags / m_dwAlignPad2 - - ;; rdx <- GetThread(), TRASHES rcx - INLINE_GETTHREAD rdx, rcx - - push_vol_reg rdx ; m_pThread - push_nonvol_reg rbp ; m_FramePointer - push_vol_reg r10 ; m_RIP - - alloc_stack sizeof_OutgoingScratchSpace - END_PROLOGUE - - mov rbx, r11 ; put PSP into RBX - mov rsi, rdx ; put Thread* into RSI - - ; RBX is PSP - ; RSI is Thread* - - fxsave [rsp + 0c0h] - - cmp [g_fHasFastFxsave], 0 ; fast fxsave won't save the xmm registers, so we must do it - jz DontSaveXmmAgain - - ;; 0C0h -> offset of FXSAVE area - ;; 0A0h -> offset of xmm0 save area within the FXSAVE area - movdqa [rsp + 0c0h + 0a0h + 0*10h], xmm0 - movdqa [rsp + 0c0h + 0a0h + 1*10h], xmm1 - movdqa [rsp + 0c0h + 0a0h + 2*10h], xmm2 - movdqa [rsp + 0c0h + 0a0h + 3*10h], xmm3 - movdqa [rsp + 0c0h + 0a0h + 4*10h], xmm4 - movdqa [rsp + 0c0h + 0a0h + 5*10h], xmm5 - movdqa [rsp + 0c0h + 0a0h + 6*10h], xmm6 - movdqa [rsp + 0c0h + 0a0h + 7*10h], xmm7 - movdqa [rsp + 0c0h + 0a0h + 8*10h], xmm8 - movdqa [rsp + 0c0h + 0a0h + 9*10h], xmm9 - movdqa [rsp + 0c0h + 0a0h + 10*10h], xmm10 - movdqa [rsp + 0c0h + 0a0h + 11*10h], xmm11 - movdqa [rsp + 0c0h + 0a0h + 12*10h], xmm12 - movdqa [rsp + 0c0h + 0a0h + 13*10h], xmm13 - movdqa [rsp + 0c0h + 0a0h + 14*10h], xmm14 - movdqa [rsp + 0c0h + 0a0h + 15*10h], xmm15 - -DontSaveXmmAgain: - mov rax, [rbx - 8] - mov [rsp + 2c0h], rax ; save return address into 'machine frame' - mov [rsp + 20h], rax ; save return address into PInvokeTransitionFrame - - ; Early out if GC stress is currently suppressed. Do this after we have computed the real address to - ; return to but before we link the transition frame onto m_pDeferredTransitionFrame (because hitting this - ; condition implies we're running restricted callouts during a GC itself and we could end up - ; overwriting a co-op frame set by the code that caused the GC in the first place, e.g. a GC.Collect - ; call). - test dword ptr [rsi + OFFSETOF__Thread__m_ThreadStateFlags], TSF_SuppressGcStress + TSF_DoNotTriggerGc - jnz DoneWaitingForGc - - ; link the frame into the Thread - lea rcx, [rsp + sizeof_OutgoingScratchSpace] ; rcx <- PInvokeTransitionFrame* - mov [rsi + OFFSETOF__Thread__m_pDeferredTransitionFrame], rcx - - ;; - ;; Unhijack this thread, if necessary. - ;; - INLINE_THREAD_UNHIJACK rsi, rax, rcx ;; trashes RAX, RCX - -ifdef FEATURE_GC_STRESS - xor eax, eax - cmp [g_fGcStressStarted], eax - jz @F - - mov rdx, [rsp + 2c0h] - mov rcx, [g_pTheRuntimeInstance] - call RuntimeInstance__ShouldHijackLoopForGcStress - cmp al, 0 - je @F - - call REDHAWKGCINTERFACE__STRESSGC -@@: -endif ;; FEATURE_GC_STRESS - - lea rcx, [rsp + sizeof_OutgoingScratchSpace] ; calculate PInvokeTransitionFrame pointer - call RhpWaitForGCNoAbort - - DoneWaitingForGc: - - fxrstor [rsp + 0c0h] - - cmp [g_fHasFastFxsave], 0 - jz DontRestoreXmmAgain - - movdqa xmm0 , [rsp + 0c0h + 0a0h + 0*10h] - movdqa xmm1 , [rsp + 0c0h + 0a0h + 1*10h] - movdqa xmm2 , [rsp + 0c0h + 0a0h + 2*10h] - movdqa xmm3 , [rsp + 0c0h + 0a0h + 3*10h] - movdqa xmm4 , [rsp + 0c0h + 0a0h + 4*10h] - movdqa xmm5 , [rsp + 0c0h + 0a0h + 5*10h] - movdqa xmm6 , [rsp + 0c0h + 0a0h + 6*10h] - movdqa xmm7 , [rsp + 0c0h + 0a0h + 7*10h] - movdqa xmm8 , [rsp + 0c0h + 0a0h + 8*10h] - movdqa xmm9 , [rsp + 0c0h + 0a0h + 9*10h] - movdqa xmm10, [rsp + 0c0h + 0a0h + 10*10h] - movdqa xmm11, [rsp + 0c0h + 0a0h + 11*10h] - movdqa xmm12, [rsp + 0c0h + 0a0h + 12*10h] - movdqa xmm13, [rsp + 0c0h + 0a0h + 13*10h] - movdqa xmm14, [rsp + 0c0h + 0a0h + 14*10h] - movdqa xmm15, [rsp + 0c0h + 0a0h + 15*10h] - -DontRestoreXmmAgain: - add rsp, sizeof_OutgoingScratchSpace - mov eax, [rsp + OFFSETOF__PInvokeTransitionFrame__m_Flags] - test eax, PTFF_THREAD_ABORT - pop rax ; m_RIP - pop rbp ; m_FramePointer - pop rax ; m_pThread - pop rax ; m_Flags / m_dwAlign2 - pop rbx - pop rsi - pop rdi - pop r12 - pop r13 - pop r14 - pop r15 - pop rax ; RSP - pop rax ; RAX save - pop rcx - pop rdx - pop r8 - pop r9 - pop r10 - pop r11 - - ;; restore PSP - ;; 2F0h -> offset of the PSP area - ;; 0B8h -> offset of the end of the integer register area which is already popped - mov rsp, [rsp + 2f0h - 0b8h] - - ;; RSP is PSP at this point and the stack looks like this: - ;; [PSP - 10] -> eflags save - ;; [PSP - 8] -> return address - ;; [PSP] -> caller's frame - ;; - ;; The final step is to restore eflags and return - - lea rsp, [rsp - 10h] - jz @f ;; result of the test instruction before the pops above - popfq ;; restore flags - mov rcx, STATUS_REDHAWK_THREAD_ABORT - pop rdx ;; return address as exception RIP - jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception - -@@: - popfq ;; restore flags - ret - -NESTED_END RhpTrapToGC, _TEXT - ifdef FEATURE_GC_STRESS ;; ;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this. diff --git a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm index 5366fdaf4a95c8..ffa74efc257d3a 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/PInvoke.asm @@ -5,7 +5,7 @@ include asmmacros.inc ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; -;; RhpWaitForGCNoAbort -- rare path for RhpPInvokeReturn +;; RhpWaitForGCNoAbort -- rare path for WaitForGCCompletion ;; ;; ;; INPUT: RCX: transition frame diff --git a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm index 3921344e561711..b232e380fb075e 100644 --- a/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/arm64/GcProbe.asm @@ -187,64 +187,6 @@ __PPF_ThreadReg SETS "x2" EPILOG_RETURN MEND -;; In order to avoid trashing VFP registers across the loop hijack we must save all user registers, so that -;; registers used by the loop being hijacked will not be affected. Unlike ARM32 where neon registers (NQ0, ..., NQ15) -;; are fully covered by the floating point registers D0 ... D31, we have 32 neon registers Q0, ... Q31 on ARM64 -;; which are not fully covered by the register D0 ... D31. Therefore we must explicitly save all Q registers. -EXTRA_SAVE_SIZE equ (32*16) - - MACRO - ALLOC_LOOP_HIJACK_FRAME - - PROLOG_STACK_ALLOC EXTRA_SAVE_SIZE - - ;; Save all neon registers - PROLOG_NOP stp q0, q1, [sp] - PROLOG_NOP stp q2, q3, [sp, #0x20] - PROLOG_NOP stp q4, q5, [sp, #0x40] - PROLOG_NOP stp q6, q7, [sp, #0x60] - PROLOG_NOP stp q8, q9, [sp, #0x80] - PROLOG_NOP stp q10, q11, [sp, #0xA0] - PROLOG_NOP stp q12, q13, [sp, #0xC0] - PROLOG_NOP stp q14, q15, [sp, #0xE0] - PROLOG_NOP stp q16, q17, [sp, #0x100] - PROLOG_NOP stp q18, q19, [sp, #0x120] - PROLOG_NOP stp q20, q21, [sp, #0x140] - PROLOG_NOP stp q22, q23, [sp, #0x160] - PROLOG_NOP stp q24, q25, [sp, #0x180] - PROLOG_NOP stp q26, q27, [sp, #0x1A0] - PROLOG_NOP stp q28, q29, [sp, #0x1C0] - PROLOG_NOP stp q30, q31, [sp, #0x1E0] - - ALLOC_PROBE_FRAME 0, {false} - MEND - - MACRO - FREE_LOOP_HIJACK_FRAME - - FREE_PROBE_FRAME 0, {false} - - ;; restore all neon registers - PROLOG_NOP ldp q0, q1, [sp] - PROLOG_NOP ldp q2, q3, [sp, #0x20] - PROLOG_NOP ldp q4, q5, [sp, #0x40] - PROLOG_NOP ldp q6, q7, [sp, #0x60] - PROLOG_NOP ldp q8, q9, [sp, #0x80] - PROLOG_NOP ldp q10, q11, [sp, #0xA0] - PROLOG_NOP ldp q12, q13, [sp, #0xC0] - PROLOG_NOP ldp q14, q15, [sp, #0xE0] - PROLOG_NOP ldp q16, q17, [sp, #0x100] - PROLOG_NOP ldp q18, q19, [sp, #0x120] - PROLOG_NOP ldp q20, q21, [sp, #0x140] - PROLOG_NOP ldp q22, q23, [sp, #0x160] - PROLOG_NOP ldp q24, q25, [sp, #0x180] - PROLOG_NOP ldp q26, q27, [sp, #0x1A0] - PROLOG_NOP ldp q28, q29, [sp, #0x1C0] - PROLOG_NOP ldp q30, q31, [sp, #0x1E0] - - EPILOG_STACK_FREE EXTRA_SAVE_SIZE - MEND - ;; ;; Macro to clear the hijack state. This is safe to do because the suspension code will not Unhijack this ;; thread if it finds it at an IP that isn't managed code. diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index 9d5e676efe1fe9..4372c5ce1922a1 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -374,9 +374,11 @@ enum PInvokeTransitionFrameFlags PTFF_SAVE_R11 = 0x00004000, PTFF_RAX_IS_GCREF = 0x00010000, // used by hijack handler to report return value of hijacked method - PTFF_RAX_IS_BYREF = 0x00020000, // used by hijack handler to report return value of hijacked method + PTFF_RAX_IS_BYREF = 0x00020000, + PTFF_RDX_IS_GCREF = 0x00040000, + PTFF_RDX_IS_BYREF = 0x00080000, - PTFF_THREAD_ABORT = 0x00040000, // indicates that ThreadAbortException should be thrown when returning from the transition + PTFF_THREAD_ABORT = 0x00100000, // indicates that ThreadAbortException should be thrown when returning from the transition }; #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/portable.cpp b/src/coreclr/nativeaot/Runtime/portable.cpp index 12b622c0ab7050..af11b2acf8a09d 100644 --- a/src/coreclr/nativeaot/Runtime/portable.cpp +++ b/src/coreclr/nativeaot/Runtime/portable.cpp @@ -406,18 +406,6 @@ void * ReturnFromCallDescrThunk; // Return address hijacking // #if !defined (HOST_ARM64) -COOP_PINVOKE_HELPER(void, RhpGcProbeHijackScalar, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcProbeHijackObject, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} -COOP_PINVOKE_HELPER(void, RhpGcProbeHijackByref, ()) -{ - ASSERT_UNCONDITIONALLY("NYI"); -} COOP_PINVOKE_HELPER(void, RhpGcStressHijackScalar, ()) { ASSERT_UNCONDITIONALLY("NYI"); diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index 3500e46f48a4e0..3e536fb24f26a6 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -77,7 +77,6 @@ void Thread::WaitForGC(PInvokeTransitionFrame* pTransitionFrame) // set preemptive mode VolatileStoreWithoutBarrier(&m_pTransitionFrame, pTransitionFrame); - Unhijack(); RedhawkGCInterface::WaitForGCCompletion(); // must be in cooperative mode when checking the trap flag @@ -140,8 +139,6 @@ void Thread::EnablePreemptiveMode() ASSERT(m_pDeferredTransitionFrame != NULL); #endif - Unhijack(); - // set preemptive mode VolatileStoreWithoutBarrier(&m_pTransitionFrame, m_pDeferredTransitionFrame); } @@ -288,8 +285,8 @@ void Thread::Construct() #ifdef FEATURE_SUSPEND_REDIRECTION m_redirectionContextBuffer = NULL; - m_redirectionContext = NULL; #endif //FEATURE_SUSPEND_REDIRECTION + m_interruptedContext = NULL; } bool Thread::IsInitialized() @@ -388,10 +385,12 @@ void GcScanWasmShadowStack(void * pfnEnumCallback, void * pvCallbackData) void Thread::GcScanRoots(void * pfnEnumCallback, void * pvCallbackData) { + this->CrossThreadUnhijack(); + #ifdef HOST_WASM GcScanWasmShadowStack(pfnEnumCallback, pvCallbackData); #else - StackFrameIterator frameIterator(this, GetTransitionFrame()); + StackFrameIterator frameIterator(this, GetTransitionFrame()); GcScanRootsWorker(pfnEnumCallback, pvCallbackData, frameIterator); #endif } @@ -440,22 +439,21 @@ void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, St if (frameIterator.GetHijackedReturnValueLocation(&pHijackedReturnValue, &returnValueKind)) { -#ifdef TARGET_ARM64 GCRefKind reg0Kind = ExtractReg0ReturnKind(returnValueKind); - GCRefKind reg1Kind = ExtractReg1ReturnKind(returnValueKind); - - // X0 and X1 are saved next to each other in this order if (reg0Kind != GCRK_Scalar) { RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, reg0Kind, pfnEnumCallback, pvCallbackData); } + +#if defined(TARGET_ARM64) || defined(TARGET_UNIX) + GCRefKind reg1Kind = ExtractReg1ReturnKind(returnValueKind); if (reg1Kind != GCRK_Scalar) { + // X0/X1 or RAX/RDX are saved in hijack frame next to each other in this order RedhawkGCInterface::EnumGcRef(pHijackedReturnValue + 1, reg1Kind, pfnEnumCallback, pvCallbackData); } -#else - RedhawkGCInterface::EnumGcRef(pHijackedReturnValue, returnValueKind, pfnEnumCallback, pvCallbackData); -#endif +#endif // TARGET_ARM64 || TARGET_UNIX + } #ifndef DACCESS_COMPILE @@ -547,7 +545,14 @@ void Thread::GcScanRootsWorker(void * pfnEnumCallback, void * pvCallbackData, St EXTERN_C void FASTCALL RhpSuspendRedirected(); -#ifndef TARGET_ARM64 +#if defined(TARGET_ARM64) || defined(TARGET_UNIX) +EXTERN_C void FASTCALL RhpGcProbeHijack(); + +static void* NormalHijackTargets[1] = +{ + reinterpret_cast(RhpGcProbeHijack) +}; +#else // TARGET_ARM64 || TARGET_UNIX EXTERN_C void FASTCALL RhpGcProbeHijackScalar(); EXTERN_C void FASTCALL RhpGcProbeHijackObject(); EXTERN_C void FASTCALL RhpGcProbeHijackByref(); @@ -558,14 +563,7 @@ static void* NormalHijackTargets[3] = reinterpret_cast(RhpGcProbeHijackObject), // GCRK_Object = 1, reinterpret_cast(RhpGcProbeHijackByref) // GCRK_Byref = 2, }; -#else // TARGET_ARM64 -EXTERN_C void FASTCALL RhpGcProbeHijack(); - -static void* NormalHijackTargets[1] = -{ - reinterpret_cast(RhpGcProbeHijack) -}; -#endif // TARGET_ARM64 +#endif // TARGET_ARM64 || TARGET_UNIX #ifdef FEATURE_GC_STRESS #ifndef TARGET_ARM64 @@ -607,60 +605,95 @@ bool Thread::IsHijackTarget(void * address) return false; } -bool Thread::Hijack() +void Thread::Hijack() { ASSERT(ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread()); - ASSERT_MSG(ThreadStore::GetSuspendingThread() != this, "You may not hijack a thread from itself."); if (m_hPalThread == INVALID_HANDLE_VALUE) { // cannot proceed - return false; + return; } - // requires THREAD_SUSPEND_RESUME / THREAD_GET_CONTEXT / THREAD_SET_CONTEXT permissions +#if defined(TARGET_ARM64) && defined(TARGET_UNIX) + // TODO: RhpGcProbe and related asm helpers NYI for ARM64/UNIX. + // disabling hijacking for now. + return; +#endif - Thread* pCurrentThread = ThreadStore::GetCurrentThread(); - uint32_t result = PalHijack(m_hPalThread, HijackCallback, this); - return result == 0; + // PalHijack will call HijackCallback or make the target thread call it. + // It may also do nothing if the target thread is in inconvenient state. + PalHijack(m_hPalThread, this); } -UInt32_BOOL Thread::HijackCallback(HANDLE /*hThread*/, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext) +void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack) { - Thread* pThread = (Thread*) pCallbackContext; + // If we are no longer trying to suspend, no need to do anything. + // This is just an optimization. It is ok to race with the setting the trap flag here. + // If we need to suspend, we will be called again. + if (!ThreadStore::IsTrapThreadsRequested()) + return; + + Thread* pThread = (Thread*) pThreadToHijack; + if (pThread == NULL) + { + pThread = ThreadStore::GetCurrentThread(); + + ASSERT(pThread != NULL); + ASSERT(pThread != ThreadStore::GetSuspendingThread()); + } // we have a thread stopped, and we do not know where exactly. // it could be in a system call or in our own runtime holding locks // current thread should not block or allocate while we determine whether the location is in managed code. - if (pThread->CacheTransitionFrameForSuspend()) + + if (pThread->m_pTransitionFrame != NULL) { // This thread has already made it to preemptive (posted a transition frame) // we do not need to hijack it - return true; + return; } - void* pvAddress = (void*)pThreadContext->IP; + void* pvAddress = (void*)pThreadContext->GetIp(); RuntimeInstance* runtime = GetRuntimeInstance(); if (!runtime->IsManaged(pvAddress)) { // Running in cooperative mode, but not managed. // We cannot continue. - return false; + return; + } + + if (pThread->IsDoNotTriggerGcSet()) + { + return; } ICodeManager* codeManager = runtime->GetCodeManagerForAddress(pvAddress); - if (codeManager->IsSafePoint(pvAddress)) + + // we may be able to do GC stack walk right where the threads is now, + // as long as it is on a GC safe point and if we can unwind the stack at that location. + if (codeManager->IsSafePoint(pvAddress) && + codeManager->IsUnwindable(pvAddress)) { + // if we are not given a thread to hijack + // perform in-line wait on the current thread + if (pThreadToHijack == NULL) + { + ASSERT(pThread->m_interruptedContext == NULL); + pThread->InlineSuspend(pThreadContext); + return; + } + #ifdef FEATURE_SUSPEND_REDIRECTION if (pThread->Redirect()) { - return true; + return; } #endif //FEATURE_SUSPEND_REDIRECTION } - return pThread->InternalHijack(pThreadContext, NormalHijackTargets); + pThread->HijackReturnAddress(pThreadContext, NormalHijackTargets); } #ifdef FEATURE_GC_STRESS @@ -701,87 +734,109 @@ void Thread::HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx) } if (bForceGC || pInstance->ShouldHijackCallsiteForGcStress(ip)) { - pCurrentThread->InternalHijack(pSuspendCtx, GcStressHijackTargets); + pCurrentThread->HijackReturnAddress(pSuspendCtx, GcStressHijackTargets); } } #endif // FEATURE_GC_STRESS -// This function is called in one of two scenarios: -// 1) from a thread to place a return hijack onto its own stack. This is only done for GC stress cases -// via Thread::HijackForGcStress above. -// 2) from another thread to place a return hijack onto this thread's stack. In this case the target -// thread is OS suspended someplace in managed code. The only constraint on the suspension is that the -// stack be crawlable enough to yield the location of the return address. -bool Thread::InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[]) +// This function is called from a thread to place a return hijack onto its own stack for GC stress cases +// via Thread::HijackForGcStress above. The only constraint on the suspension is that the +// stack be crawlable enough to yield the location of the return address. +void Thread::HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void* pvHijackTargets[]) { - bool fSuccess = false; - if (IsDoNotTriggerGcSet()) - return false; + return; StackFrameIterator frameIterator(this, pSuspendCtx); - - if (frameIterator.IsValid()) + if (!frameIterator.IsValid()) { - frameIterator.CalculateCurrentMethodState(); + return; + } - PTR_PTR_VOID ppvRetAddrLocation; - GCRefKind retValueKind; + HijackReturnAddressWorker(&frameIterator, pvHijackTargets); +} - if (frameIterator.GetCodeManager()->GetReturnAddressHijackInfo(frameIterator.GetMethodInfo(), - frameIterator.GetRegisterSet(), - &ppvRetAddrLocation, - &retValueKind)) - { - // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction. - // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking - // via LR register modification. Therefore it is important to check our ability to hijack the thread before - // unhijacking it. - CrossThreadUnhijack(); - - void* pvRetAddr = *ppvRetAddrLocation; - ASSERT(ppvRetAddrLocation != NULL); - ASSERT(pvRetAddr != NULL); - - ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); - - m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; - m_pvHijackedReturnAddress = pvRetAddr; -#ifdef TARGET_ARM64 - m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); - *ppvRetAddrLocation = pvHijackTargets[0]; +// This function is called in one of two scenarios: +// 1) from another thread to place a return hijack onto this thread's stack. In this case the target +// thread is OS suspended at pSuspendCtx in managed code. +// 2) from a thread to place a return hijack onto its own stack for GC suspension. In this case the target +// thread is interrupted at pSuspendCtx in managed code via a signal or similar. +void Thread::HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void * pvHijackTargets[]) +{ + ASSERT(!IsDoNotTriggerGcSet()); + + StackFrameIterator frameIterator(this, pSuspendCtx); + ASSERT(frameIterator.IsValid()); + + HijackReturnAddressWorker(&frameIterator, pvHijackTargets); +} + +void Thread::HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]) +{ + PTR_PTR_VOID ppvRetAddrLocation; + GCRefKind retValueKind; + + frameIterator->CalculateCurrentMethodState(); + if (frameIterator->GetCodeManager()->GetReturnAddressHijackInfo(frameIterator->GetMethodInfo(), + frameIterator->GetRegisterSet(), + &ppvRetAddrLocation, + &retValueKind)) + { + ASSERT(ppvRetAddrLocation != NULL); + + // check if hijack location is the same + if (m_ppvHijackedReturnAddressLocation == ppvRetAddrLocation) + return; + + // ARM64 epilogs have a window between loading the hijackable return address into LR and the RET instruction. + // We cannot hijack or unhijack a thread while it is suspended in that window unless we implement hijacking + // via LR register modification. Therefore it is important to check our ability to hijack the thread before + // unhijacking it. + CrossThreadUnhijack(); + + void* pvRetAddr = *ppvRetAddrLocation; + ASSERT(pvRetAddr != NULL); + ASSERT(StackFrameIterator::IsValidReturnAddress(pvRetAddr)); + + m_ppvHijackedReturnAddressLocation = ppvRetAddrLocation; + m_pvHijackedReturnAddress = pvRetAddr; +#if defined(TARGET_ARM64) || defined(TARGET_UNIX) + m_uHijackedReturnValueFlags = ReturnKindToTransitionFrameFlags(retValueKind); + *ppvRetAddrLocation = pvHijackTargets[0]; #else - void* pvHijackTarget = pvHijackTargets[retValueKind]; - ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target"); - *ppvRetAddrLocation = pvHijackTarget; + void* pvHijackTarget = pvHijackTargets[retValueKind]; + ASSERT_MSG(IsHijackTarget(pvHijackTarget), "unexpected method used as hijack target"); + *ppvRetAddrLocation = pvHijackTarget; #endif - fSuccess = true; - } - } - STRESS_LOG3(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p, result = %d\n", - GetPalThreadIdForLogging(), pSuspendCtx->GetIp(), fSuccess); + STRESS_LOG2(LF_STACKWALK, LL_INFO10000, "InternalHijack: TgtThread = %llx, IP = %p\n", + GetPalThreadIdForLogging(), frameIterator->GetRegisterSet()->GetIP()); + } +} - return fSuccess; +NATIVE_CONTEXT* Thread::GetInterruptedContext() +{ + ASSERT(m_interruptedContext != NULL); + return m_interruptedContext; } #ifdef FEATURE_SUSPEND_REDIRECTION -CONTEXT* Thread::GetRedirectionContext() + +NATIVE_CONTEXT* Thread::EnsureRedirectionContext() { - if (m_redirectionContext == NULL) + if (m_redirectionContextBuffer == NULL) { - m_redirectionContext = PalAllocateCompleteOSContext(&m_redirectionContextBuffer); + m_interruptedContext = PalAllocateCompleteOSContext(&m_redirectionContextBuffer); } - return m_redirectionContext; + return m_interruptedContext; } bool Thread::Redirect() { - if (IsDoNotTriggerGcSet()) - return false; + ASSERT(!IsDoNotTriggerGcSet()); - CONTEXT* redirectionContext = GetRedirectionContext(); + NATIVE_CONTEXT* redirectionContext = EnsureRedirectionContext(); if (redirectionContext == NULL) return false; @@ -802,24 +857,55 @@ bool Thread::Redirect() } #endif //FEATURE_SUSPEND_REDIRECTION +bool Thread::InlineSuspend(NATIVE_CONTEXT* interruptedContext) +{ + ASSERT(!IsDoNotTriggerGcSet()); + + Unhijack(); + + m_interruptedContext = interruptedContext; + WaitForGC(INTERRUPTED_THREAD_MARKER); + m_interruptedContext = NULL; + + return true; +} + // This is the standard Unhijack, which is only allowed to be called on your own thread. // Note that all the asm-implemented Unhijacks should also only be operating on their // own thread. void Thread::Unhijack() { ASSERT(ThreadStore::GetCurrentThread() == this); + ASSERT(IsCurrentThreadInCooperativeMode()); + UnhijackWorker(); } -// This unhijack routine is only called from Thread::InternalHijack() to undo a possibly existing -// hijack before placing a new one. Although there are many code sequences (here and in asm) to -// perform an unhijack operation, they will never execute concurrently. A thread may unhijack itself -// at any time so long as it does so from unmanaged code. This ensures that another thread will not -// suspend it and attempt to unhijack it, since we only suspend threads that are executing managed -// code. +// This unhijack routine is called to undo a hijack, that is potentially on a different thread. +// +// Although there are many code sequences (here and in asm) to +// perform an unhijack operation, they will never execute concurrently: +// +// - A thread may unhijack itself at any time so long as it does that from unmanaged code while in coop mode. +// This ensures that coop thread can access its stack synchronously. +// Unhijacking from unmanaged code ensures that another thread will not attempt to hijack it, +// since we only hijack threads that are executing managed code. +// +// - A GC thread may access a thread asynchronously, including unhijacking it. +// Asynchronously accessed thread must be in preemptive mode and should not +// access the managed portion of its stack. +// +// - A thread that owns the suspension can access another thread as long as the other thread is +// in preemptive mode or suspended in managed code. +// Either way the other thread cannot be accessing its hijack. +// void Thread::CrossThreadUnhijack() { - ASSERT((ThreadStore::GetCurrentThread() == this) || DebugIsSuspended()); + ASSERT(((ThreadStore::GetCurrentThread() == this) && IsCurrentThreadInCooperativeMode()) || + ThreadStore::GetCurrentThread()->IsGCSpecial() || + ThreadStore::GetCurrentThread() == ThreadStore::GetSuspendingThread() + ); + UnhijackWorker(); } @@ -840,26 +926,8 @@ void Thread::UnhijackWorker() // Clear the hijack state. m_ppvHijackedReturnAddressLocation = NULL; m_pvHijackedReturnAddress = NULL; -#ifdef TARGET_ARM64 m_uHijackedReturnValueFlags = 0; -#endif -} - -#if _DEBUG -bool Thread::DebugIsSuspended() -{ - ASSERT(ThreadStore::GetCurrentThread() != this); -#if 0 - PalSuspendThread(m_hPalThread); - uint32_t suspendCount = PalResumeThread(m_hPalThread); - return (suspendCount > 0); -#else - // @TODO: I don't trust the above implementation, so I want to implement this myself - // by marking the thread state as "yes, we suspended it" and checking that state here. - return true; -#endif } -#endif // @TODO: it would be very, very nice if we did not have to bleed knowledge of hijacking // and hijack state to other components in the runtime. For now, these are only used @@ -990,10 +1058,10 @@ EXTERN_C NOINLINE void FASTCALL RhpGcPoll2(PInvokeTransitionFrame* pFrame) EXTERN_C NOINLINE void FASTCALL RhpSuspendRedirected() { Thread* pThread = ThreadStore::GetCurrentThread(); - pThread->WaitForGC(REDIRECTED_THREAD_MARKER); + pThread->WaitForGC(INTERRUPTED_THREAD_MARKER); // restore execution at interrupted location - PalRestoreContext(pThread->GetRedirectionContext()); + PalRestoreContext(pThread->GetInterruptedContext()); UNREACHABLE(); } diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index 4aab6d88eaa78d..95cc8e5521c99b 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -8,6 +8,10 @@ class ThreadStore; class CLREventStatic; class Thread; +#ifdef TARGET_UNIX +#include "UnixContext.h" +#endif + // The offsets of some fields in the thread (in particular, m_pTransitionFrame) are known to the compiler and get // inlined into the code. Let's make sure they don't change just because we enable/disable server GC in a particular // runtime build. @@ -28,7 +32,10 @@ class Thread; #endif // HOST_64BIT #define TOP_OF_STACK_MARKER ((PInvokeTransitionFrame*)(ptrdiff_t)-1) -#define REDIRECTED_THREAD_MARKER ((PInvokeTransitionFrame*)(ptrdiff_t)-2) + +// the thread has been interrupted and context for the interruption point +// can be retrieved via GetInterruptedContext() +#define INTERRUPTED_THREAD_MARKER ((PInvokeTransitionFrame*)(ptrdiff_t)-2) enum SyncRequestResult { @@ -76,7 +83,7 @@ struct ThreadBuffer void ** m_ppvHijackedReturnAddressLocation; void * m_pvHijackedReturnAddress; #ifdef HOST_64BIT - uintptr_t m_uHijackedReturnValueFlags; // used on ARM64 only; however, ARM64 and AMD64 share field offsets + uintptr_t m_uHijackedReturnValueFlags; // used on ARM64 and UNIX only; however, ARM64 and AMD64 share field offsets #endif // HOST_64BIT PTR_ExInfo m_pExInfoStackHead; Object* m_threadAbortException; // ThreadAbortException instance -set only during thread abort @@ -88,10 +95,11 @@ struct ThreadBuffer uint64_t m_uPalThreadIdForLogging; // @TODO: likely debug-only EEThreadId m_threadId; PTR_VOID m_pThreadStressLog; // pointer to head of thread's StressLogChunks + NATIVE_CONTEXT* m_interruptedContext; // context for an asynchronously interrupted thread. #ifdef FEATURE_SUSPEND_REDIRECTION - uint8_t* m_redirectionContextBuffer; // storage for redirection context, allocated on demand - CONTEXT* m_redirectionContext; // legacy context somewhere inside the context buffer + uint8_t* m_redirectionContextBuffer; // storage for redirection context, allocated on demand #endif //FEATURE_SUSPEND_REDIRECTION + #ifdef FEATURE_GC_STRESS uint32_t m_uRand; // current per-thread random number #endif // FEATURE_GC_STRESS @@ -134,8 +142,11 @@ class Thread : private ThreadBuffer void ClearState(ThreadStateFlags flags); bool IsStateSet(ThreadStateFlags flags); - static UInt32_BOOL HijackCallback(HANDLE hThread, PAL_LIMITED_CONTEXT* pThreadContext, void* pCallbackContext); - bool InternalHijack(PAL_LIMITED_CONTEXT * pSuspendCtx, void * pvHijackTargets[]); + static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack); + void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, void * pvHijackTargets[]); + void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, void* pvHijackTargets[]); + void HijackReturnAddressWorker(StackFrameIterator* frameIterator, void* pvHijackTargets[]); + bool InlineSuspend(NATIVE_CONTEXT* interruptedContext); #ifdef FEATURE_SUSPEND_REDIRECTION bool Redirect(); @@ -146,9 +157,6 @@ class Thread : private ThreadBuffer void CrossThreadUnhijack(); void UnhijackWorker(); void EnsureRuntimeInitialized(); -#ifdef _DEBUG - bool DebugIsSuspended(); -#endif // // SyncState members @@ -176,7 +184,7 @@ class Thread : private ThreadBuffer bool GcScanRoots(GcScanRootsCallbackFunc * pfnCallback, void * token, PTR_PAL_LIMITED_CONTEXT pInitialContext); #endif - bool Hijack(); + void Hijack(); void Unhijack(); #ifdef FEATURE_GC_STRESS static void HijackForGcStress(PAL_LIMITED_CONTEXT * pSuspendCtx); @@ -263,8 +271,10 @@ class Thread : private ThreadBuffer Object* GetThreadStaticStorageForModule(uint32_t moduleIndex); bool SetThreadStaticStorageForModule(Object* pStorage, uint32_t moduleIndex); + NATIVE_CONTEXT* GetInterruptedContext(); + #ifdef FEATURE_SUSPEND_REDIRECTION - CONTEXT* GetRedirectionContext(); + NATIVE_CONTEXT* EnsureRedirectionContext(); #endif //FEATURE_SUSPEND_REDIRECTION }; diff --git a/src/coreclr/nativeaot/Runtime/threadstore.cpp b/src/coreclr/nativeaot/Runtime/threadstore.cpp index 205aa4b423737f..46c10f7fbc5427 100644 --- a/src/coreclr/nativeaot/Runtime/threadstore.cpp +++ b/src/coreclr/nativeaot/Runtime/threadstore.cpp @@ -82,6 +82,9 @@ ThreadStore * ThreadStore::Create(RuntimeInstance * pRuntimeInstance) if (NULL == pNewThreadStore) return NULL; + if (!PalRegisterHijackCallback(Thread::HijackCallback)) + return NULL; + pNewThreadStore->m_pRuntimeInstance = pRuntimeInstance; pNewThreadStore.SuppressRelease(); @@ -229,14 +232,6 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) keepWaiting = true; pTargetThread->Hijack(); } - else if (pTargetThread->DangerousCrossThreadIsHijacked()) - { - // Once a thread is safely in preemptive mode, we must wait until it is also - // unhijacked. This is done because, otherwise, we might race on into the - // stackwalk and find the hijack still on the stack, which will cause the - // stackwalking code to crash. - keepWaiting = true; - } } END_FOREACH_THREAD @@ -251,6 +246,8 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent) // too long (we probably don't need a 15ms wait here). Instead, we'll just burn some // cycles. // @TODO: need tuning for spin + // @TODO: need tuning for this whole loop as well. + // we are likley too aggressive with interruptions which may result in longer pauses. YieldProcessorNormalizedForPreSkylakeCount(normalizationInfo, 10000); } } diff --git a/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp b/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp index 3c1ce8d13eedaa..f4bfc35840e57f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/HardwareExceptions.cpp @@ -8,9 +8,8 @@ #include "daccess.h" #include "regdisplay.h" #include "UnixContext.h" - -#include #include "HardwareExceptions.h" +#include "UnixSignals.h" #if !HAVE_SIGINFO_T #error Cannot handle hardware exceptions on this platform @@ -49,8 +48,6 @@ struct sigaction g_previousSIGSEGV; struct sigaction g_previousSIGFPE; -typedef void (*SignalHandler)(int code, siginfo_t *siginfo, void *context); - // Exception handler for hardware exceptions static PHARDWARE_EXCEPTION_HANDLER g_hardwareExceptionHandler = NULL; @@ -537,53 +534,6 @@ bool HardwareExceptionHandler(int code, siginfo_t *siginfo, void *context, void* return false; } -// Add handler for hardware exception signal -bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction) -{ - struct sigaction newAction; - - newAction.sa_flags = SA_RESTART; - newAction.sa_handler = NULL; - newAction.sa_sigaction = handler; - newAction.sa_flags |= SA_SIGINFO; - - sigemptyset(&newAction.sa_mask); - - if (sigaction(signal, NULL, previousAction) == -1) - { - ASSERT_UNCONDITIONALLY("Failed to get previous signal handler"); - return false; - } - - if (previousAction->sa_flags & SA_ONSTACK) - { - // If the previous signal handler uses an alternate stack, we need to use it too - // so that when we chain-call the previous handler, it is called on the kind of - // stack it expects. - // We also copy the signal mask to make sure that if some signals were blocked - // from execution on the alternate stack by the previous action, we honor that. - newAction.sa_flags |= SA_ONSTACK; - newAction.sa_mask = previousAction->sa_mask; - } - - if (sigaction(signal, &newAction, previousAction) == -1) - { - ASSERT_UNCONDITIONALLY("Failed to install signal handler"); - return false; - } - - return true; -} - -// Restore original handler for hardware exception signal -void RestoreSignalHandler(int signal_id, struct sigaction *previousAction) -{ - if (-1 == sigaction(signal_id, previousAction, NULL)) - { - ASSERT_UNCONDITIONALLY("RestoreSignalHandler: sigaction() call failed"); - } -} - // Handler for the SIGSEGV signal void SIGSEGVHandler(int code, siginfo_t *siginfo, void *context) { diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index aa5cfeb3cde21d..0c776a204dc57f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -14,6 +14,8 @@ #include #include "gcenv.h" #include "holder.h" +#include "UnixSignals.h" +#include "UnixContext.h" #include "HardwareExceptions.h" #include "cgroupcpu.h" @@ -544,9 +546,13 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds) REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread() { - // sched_yield yields to another thread in the current process. This implementation - // won't work well for cross-process synchronization. - return sched_yield() == 0; + // sched_yield yields to another thread in the current process. + sched_yield(); + + // The return value of sched_yield indicates the success of the call and does not tell whether a context switch happened. + // On Linux sched_yield is documented as never failing. + // Since we do not know if there was a context switch, we will just return `false`. + return false; } extern "C" UInt32_BOOL CloseHandle(HANDLE handle) @@ -942,12 +948,76 @@ extern "C" uint16_t RtlCaptureStackBackTrace(uint32_t arg1, uint32_t arg2, void* return 0; } -typedef uint32_t (__stdcall *HijackCallback)(HANDLE hThread, _In_ PAL_LIMITED_CONTEXT* pThreadContext, _In_opt_ void* pCallbackContext); +static PalHijackCallback g_pHijackCallback; +static struct sigaction g_previousActivationHandler; -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ HijackCallback callback, _In_opt_ void* pCallbackContext) +static void ActivationHandler(int code, siginfo_t* siginfo, void* context) { - // UNIXTODO: Implement PalHijack - return E_FAIL; + // Only accept activations from the current process + if (g_pHijackCallback != NULL && (siginfo->si_pid == getpid() +#ifdef HOST_OSX + // On OSX si_pid is sometimes 0. It was confirmed by Apple to be expected, as the si_pid is tracked at the process level. So when multiple + // signals are in flight in the same process at the same time, it may be overwritten / zeroed. + || siginfo->si_pid == 0 +#endif + )) + { + // Make sure that errno is not modified + int savedErrNo = errno; + g_pHijackCallback((NATIVE_CONTEXT*)context, NULL); + errno = savedErrNo; + } + else + { + // Call the original handler when it is not ignored or default (terminate). + if (g_previousActivationHandler.sa_flags & SA_SIGINFO) + { + _ASSERTE(g_previousActivationHandler.sa_sigaction != NULL); + g_previousActivationHandler.sa_sigaction(code, siginfo, context); + } + else + { + if (g_previousActivationHandler.sa_handler != SIG_IGN && + g_previousActivationHandler.sa_handler != SIG_DFL) + { + _ASSERTE(g_previousActivationHandler.sa_handler != NULL); + g_previousActivationHandler.sa_handler(code); + } + } + } +} + +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback) +{ + ASSERT(g_pHijackCallback == NULL); + g_pHijackCallback = callback; + + return AddSignalHandler(INJECT_ACTIVATION_SIGNAL, ActivationHandler, &g_previousActivationHandler); +} + +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack) +{ + ThreadUnixHandle* threadHandle = (ThreadUnixHandle*)hThread; + int status = pthread_kill(*threadHandle->GetObject(), INJECT_ACTIVATION_SIGNAL); + // We can get EAGAIN when printing stack overflow stack trace and when other threads hit + // stack overflow too. Those are held in the sigsegv_handler with blocked signals until + // the process exits. + +#ifdef __APPLE__ + // On Apple, pthread_kill is not allowed to be sent to dispatch queue threads + if (status == ENOTSUP) + { + return; + } +#endif + + if ((status != 0) && (status != EAGAIN)) + { + // Failure to send the signal is fatal. There are only two cases when sending + // the signal can fail. First, if the signal ID is invalid and second, + // if the thread doesn't exist anymore. + abort(); + } } extern "C" uint32_t WaitForSingleObjectEx(HANDLE handle, uint32_t milliseconds, UInt32_BOOL alertable) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 7db90464aa26f8..d18b2edad59036 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -12,10 +12,6 @@ #include -#if HAVE_UCONTEXT_T -#include -#endif // HAVE_UCONTEXT_T - #include "UnixContext.h" #include "UnwindHelpers.h" @@ -51,11 +47,25 @@ int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) #ifdef HOST_ARM64 -#define MCREG_Pc(mc) ((mc)->__ss.__pc) -#define MCREG_Sp(mc) ((mc)->__ss.__sp) -#define MCREG_Lr(mc) ((mc)->__ss.__lr) #define MCREG_X0(mc) ((mc)->__ss.__x[0]) #define MCREG_X1(mc) ((mc)->__ss.__x[1]) +#define MCREG_X2(mc) ((mc)->__ss.__x[2]) +#define MCREG_X3(mc) ((mc)->__ss.__x[3]) +#define MCREG_X4(mc) ((mc)->__ss.__x[4]) +#define MCREG_X5(mc) ((mc)->__ss.__x[5]) +#define MCREG_X6(mc) ((mc)->__ss.__x[6]) +#define MCREG_X7(mc) ((mc)->__ss.__x[7]) +#define MCREG_X8(mc) ((mc)->__ss.__x[8]) +#define MCREG_X9(mc) ((mc)->__ss.__x[9]) +#define MCREG_X10(mc) ((mc)->__ss.__x[10]) +#define MCREG_X11(mc) ((mc)->__ss.__x[11]) +#define MCREG_X12(mc) ((mc)->__ss.__x[12]) +#define MCREG_X13(mc) ((mc)->__ss.__x[13]) +#define MCREG_X14(mc) ((mc)->__ss.__x[14]) +#define MCREG_X15(mc) ((mc)->__ss.__x[15]) +#define MCREG_X16(mc) ((mc)->__ss.__x[16]) +#define MCREG_X17(mc) ((mc)->__ss.__x[17]) +#define MCREG_X18(mc) ((mc)->__ss.__x[18]) #define MCREG_X19(mc) ((mc)->__ss.__x[19]) #define MCREG_X20(mc) ((mc)->__ss.__x[20]) #define MCREG_X21(mc) ((mc)->__ss.__x[21]) @@ -67,6 +77,9 @@ int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) #define MCREG_X27(mc) ((mc)->__ss.__x[27]) #define MCREG_X28(mc) ((mc)->__ss.__x[28]) #define MCREG_Fp(mc) ((mc)->__ss.__fp) +#define MCREG_Lr(mc) ((mc)->__ss.__lr) +#define MCREG_Sp(mc) ((mc)->__ss.__sp) +#define MCREG_Pc(mc) ((mc)->__ss.__pc) #elif HOST_AMD64 // HOST_ARM64 @@ -172,11 +185,25 @@ int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) #if defined(HOST_ARM64) -#define MCREG_Pc(mc) ((mc).pc) -#define MCREG_Sp(mc) ((mc).sp) -#define MCREG_Lr(mc) ((mc).regs[30]) #define MCREG_X0(mc) ((mc).regs[0]) #define MCREG_X1(mc) ((mc).regs[1]) +#define MCREG_X2(mc) ((mc).regs[2]) +#define MCREG_X3(mc) ((mc).regs[3]) +#define MCREG_X4(mc) ((mc).regs[4]) +#define MCREG_X5(mc) ((mc).regs[5]) +#define MCREG_X6(mc) ((mc).regs[6]) +#define MCREG_X7(mc) ((mc).regs[7]) +#define MCREG_X8(mc) ((mc).regs[8]) +#define MCREG_X9(mc) ((mc).regs[9]) +#define MCREG_X10(mc) ((mc).regs[10]) +#define MCREG_X11(mc) ((mc).regs[11]) +#define MCREG_X12(mc) ((mc).regs[12]) +#define MCREG_X13(mc) ((mc).regs[13]) +#define MCREG_X14(mc) ((mc).regs[14]) +#define MCREG_X15(mc) ((mc).regs[15]) +#define MCREG_X16(mc) ((mc).regs[16]) +#define MCREG_X17(mc) ((mc).regs[17]) +#define MCREG_X18(mc) ((mc).regs[18]) #define MCREG_X19(mc) ((mc).regs[19]) #define MCREG_X20(mc) ((mc).regs[20]) #define MCREG_X21(mc) ((mc).regs[21]) @@ -188,6 +215,9 @@ int unw_get_save_loc(unw_cursor_t*, int, unw_save_loc_t*) #define MCREG_X27(mc) ((mc).regs[27]) #define MCREG_X28(mc) ((mc).regs[28]) #define MCREG_Fp(mc) ((mc).regs[29]) +#define MCREG_Lr(mc) ((mc).regs[30]) +#define MCREG_Sp(mc) ((mc).sp) +#define MCREG_Pc(mc) ((mc).pc) #else @@ -634,7 +664,7 @@ uint64_t GetPC(void* context) #endif // HOST_AMD64 // Find LSDA and start address for a function at address controlPC -bool FindProcInfo(uintptr_t controlPC, uintptr_t* startAddress, uintptr_t* lsda) +bool FindProcInfo(uintptr_t controlPC, uintptr_t* startAddress, uintptr_t* endAddress, uintptr_t* lsda) { unw_proc_info_t procInfo; @@ -652,6 +682,7 @@ bool FindProcInfo(uintptr_t controlPC, uintptr_t* startAddress, uintptr_t* lsda) *lsda = procInfo.lsda; #endif *startAddress = procInfo.start_ip; + *endAddress = procInfo.end_ip; return true; } @@ -661,3 +692,63 @@ bool VirtualUnwind(REGDISPLAY* pRegisterSet) { return UnwindHelpers::StepFrame(pRegisterSet); } + +#ifdef TARGET_ARM64 + + uint64_t& UNIX_CONTEXT::X0() { return (uint64_t&)MCREG_X0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X1() { return (uint64_t&)MCREG_X1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X2() { return (uint64_t&)MCREG_X2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X3() { return (uint64_t&)MCREG_X3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X4() { return (uint64_t&)MCREG_X4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X5() { return (uint64_t&)MCREG_X5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X6() { return (uint64_t&)MCREG_X6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X7() { return (uint64_t&)MCREG_X7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X8() { return (uint64_t&)MCREG_X8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X9() { return (uint64_t&)MCREG_X9(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X10() { return (uint64_t&)MCREG_X10(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X11() { return (uint64_t&)MCREG_X11(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X12() { return (uint64_t&)MCREG_X12(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X13() { return (uint64_t&)MCREG_X13(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X14() { return (uint64_t&)MCREG_X14(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X15() { return (uint64_t&)MCREG_X15(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X16() { return (uint64_t&)MCREG_X16(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X17() { return (uint64_t&)MCREG_X17(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X18() { return (uint64_t&)MCREG_X18(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X19() { return (uint64_t&)MCREG_X19(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X20() { return (uint64_t&)MCREG_X20(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X21() { return (uint64_t&)MCREG_X21(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X22() { return (uint64_t&)MCREG_X22(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X23() { return (uint64_t&)MCREG_X23(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X24() { return (uint64_t&)MCREG_X24(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X25() { return (uint64_t&)MCREG_X25(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X26() { return (uint64_t&)MCREG_X26(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X27() { return (uint64_t&)MCREG_X27(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::X28() { return (uint64_t&)MCREG_X28(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } // X29 + uint64_t& UNIX_CONTEXT::Lr() { return (uint64_t&)MCREG_Lr(ctx.uc_mcontext); } // X30 + uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } + +#elif defined(TARGET_AMD64) + uint64_t& UNIX_CONTEXT::Rax(){ return (uint64_t&)MCREG_Rax(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rcx(){ return (uint64_t&)MCREG_Rcx(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rdx(){ return (uint64_t&)MCREG_Rdx(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rbx(){ return (uint64_t&)MCREG_Rbx(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rsp(){ return (uint64_t&)MCREG_Rsp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rbp(){ return (uint64_t&)MCREG_Rbp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rsi(){ return (uint64_t&)MCREG_Rsi(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rdi(){ return (uint64_t&)MCREG_Rdi(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R8(){ return (uint64_t&)MCREG_R8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R9(){ return (uint64_t&)MCREG_R9(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R10(){ return (uint64_t&)MCREG_R10(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R11(){ return (uint64_t&)MCREG_R11(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R12(){ return (uint64_t&)MCREG_R12(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R13(){ return (uint64_t&)MCREG_R13(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R14(){ return (uint64_t&)MCREG_R14(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R15(){ return (uint64_t&)MCREG_R15(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Rip(){ return (uint64_t&)MCREG_Rip(ctx.uc_mcontext); } + +#else + PORTABILITY_ASSERT("UNIX_CONTEXT"); +#endif // TARGET_ARM + diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index dec744a7282d09..112dfcf86ad9d5 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -4,13 +4,15 @@ #ifndef __UNIX_CONTEXT_H__ #define __UNIX_CONTEXT_H__ +#include + // Convert Unix native context to PAL_LIMITED_CONTEXT void NativeContextToPalContext(const void* context, PAL_LIMITED_CONTEXT* palContext); // Redirect Unix native context to the PAL_LIMITED_CONTEXT and also set the first two argument registers void RedirectNativeContext(void* context, const PAL_LIMITED_CONTEXT* palContext, uintptr_t arg0Reg, uintptr_t arg1Reg); // Find LSDA and start address for a function at address controlPC -bool FindProcInfo(uintptr_t controlPC, uintptr_t* startAddress, uintptr_t* lsda); +bool FindProcInfo(uintptr_t controlPC, uintptr_t* startAddress, uintptr_t* endAddress, uintptr_t* lsda); // Virtually unwind stack to the caller of the context specified by the REGDISPLAY bool VirtualUnwind(REGDISPLAY* pRegisterSet); @@ -22,4 +24,73 @@ uint64_t GetRegisterValueByIndex(void* context, uint32_t index); uint64_t GetPC(void* context); #endif // HOST_AMD64 +struct UNIX_CONTEXT +{ + ucontext_t ctx; + +#ifdef TARGET_ARM64 + + uint64_t& X0(); + uint64_t& X1(); + uint64_t& X2(); + uint64_t& X3(); + uint64_t& X4(); + uint64_t& X5(); + uint64_t& X6(); + uint64_t& X7(); + uint64_t& X8(); + uint64_t& X9(); + uint64_t& X10(); + uint64_t& X11(); + uint64_t& X12(); + uint64_t& X13(); + uint64_t& X14(); + uint64_t& X15(); + uint64_t& X16(); + uint64_t& X17(); + uint64_t& X18(); + uint64_t& X19(); + uint64_t& X20(); + uint64_t& X21(); + uint64_t& X22(); + uint64_t& X23(); + uint64_t& X24(); + uint64_t& X25(); + uint64_t& X26(); + uint64_t& X27(); + uint64_t& X28(); + uint64_t& Fp(); // X29 + uint64_t& Lr(); // X30 + uint64_t& Sp(); + uint64_t& Pc(); + + uintptr_t GetIp() { return (uintptr_t)Pc(); } + uintptr_t GetSp() { return (uintptr_t)Sp(); } + +#elif defined(TARGET_AMD64) + uint64_t& Rax(); + uint64_t& Rcx(); + uint64_t& Rdx(); + uint64_t& Rbx(); + uint64_t& Rsp(); + uint64_t& Rbp(); + uint64_t& Rsi(); + uint64_t& Rdi(); + uint64_t& R8 (); + uint64_t& R9 (); + uint64_t& R10(); + uint64_t& R11(); + uint64_t& R12(); + uint64_t& R13(); + uint64_t& R14(); + uint64_t& R15(); + uint64_t& Rip(); + + uintptr_t GetIp() { return (uintptr_t)Rip(); } + uintptr_t GetSp() { return (uintptr_t)Rsp(); } +#else + PORTABILITY_ASSERT("UNIX_CONTEXT"); +#endif // TARGET_ARM +}; + #endif // __UNIX_CONTEXT_H__ diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 4aa8e9ab7343ff..d35423e66e7473 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -63,10 +63,10 @@ bool UnixNativeCodeManager::FindMethodInfo(PTR_VOID ControlPC, } UnixNativeMethodInfo * pMethodInfo = (UnixNativeMethodInfo *)pMethodInfoOut; - uintptr_t startAddress; + uintptr_t startAddress, endAddress; uintptr_t lsda; - if (!FindProcInfo((uintptr_t)ControlPC, &startAddress, &lsda)) + if (!FindProcInfo((uintptr_t)ControlPC, &startAddress, &endAddress, &lsda)) { return false; } @@ -127,19 +127,9 @@ PTR_VOID UnixNativeCodeManager::GetFramePointer(MethodInfo * pMethodInfo, return NULL; } -bool UnixNativeCodeManager::IsSafePoint(PTR_VOID pvAddress) +uint32_t UnixNativeCodeManager::GetCodeOffset(MethodInfo* pMethodInfo, PTR_VOID address, /*out*/ PTR_UInt8* gcInfo) { - // @TODO: IsSafePoint - return false; -} - -void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, - PTR_VOID safePointAddress, - REGDISPLAY * pRegisterSet, - GCEnumContext * hCallback, - bool isActiveStackFrame) -{ - UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; PTR_UInt8 p = pNativeMethodInfo->pMainLSDA; @@ -151,23 +141,71 @@ void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) p += sizeof(int32_t); - uint32_t codeOffset = (uint32_t)(PINSTRToPCODE(dac_cast(safePointAddress)) - PINSTRToPCODE(dac_cast(pNativeMethodInfo->pMethodStartAddress))); + *gcInfo = p; + + uint32_t codeOffset = (uint32_t)(PINSTRToPCODE(dac_cast(address)) - PINSTRToPCODE(dac_cast(pNativeMethodInfo->pMethodStartAddress))); + return codeOffset; +} + +bool UnixNativeCodeManager::IsSafePoint(PTR_VOID pvAddress) +{ + MethodInfo pMethodInfo; + if (!FindMethodInfo(pvAddress, &pMethodInfo)) + { + return false; + } + + PTR_UInt8 gcInfo; + uint32_t codeOffset = GetCodeOffset(&pMethodInfo, pvAddress, &gcInfo); GcInfoDecoder decoder( - GCInfoToken(p), + GCInfoToken(gcInfo), + GcInfoDecoderFlags(DECODE_INTERRUPTIBILITY), + codeOffset + ); + + return decoder.IsInterruptible(); +} + +void UnixNativeCodeManager::EnumGcRefs(MethodInfo * pMethodInfo, + PTR_VOID safePointAddress, + REGDISPLAY * pRegisterSet, + GCEnumContext * hCallback, + bool isActiveStackFrame) +{ + PTR_UInt8 gcInfo; + uint32_t codeOffset = GetCodeOffset(pMethodInfo, safePointAddress, &gcInfo); + + if (!isActiveStackFrame) + { + // If we are not in the active method, we are currently pointing + // to the return address. That may not be reachable after a call (if call does not return) + // or reachable via a jump and thus have a different live set. + // Therefore we simply adjust the offset to inside of call instruction. + // NOTE: The GcInfoDecoder depends on this; if you change it, you must + // revisit the GcInfoEncoder/Decoder + codeOffset--; + } + + GcInfoDecoder decoder( + GCInfoToken(gcInfo), GcInfoDecoderFlags(DECODE_GC_LIFETIMES | DECODE_SECURITY_OBJECT | DECODE_VARARG), - codeOffset - 1 // TODO: isActiveStackFrame + codeOffset ); ICodeManagerFlags flags = (ICodeManagerFlags)0; - if (pNativeMethodInfo->executionAborted) + if (((UnixNativeMethodInfo*)pMethodInfo)->executionAborted) flags = ICodeManagerFlags::ExecutionAborted; + if (IsFilter(pMethodInfo)) flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::NoReportUntracked); + if (isActiveStackFrame) + flags = (ICodeManagerFlags)(flags | ICodeManagerFlags::ActiveStackFrame); + if (!decoder.EnumerateLiveSlots( pRegisterSet, - false /* reportScratchSlots */, + isActiveStackFrame /* reportScratchSlots */, flags, hCallback->pCallback, hCallback @@ -281,7 +319,7 @@ bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, *ppPreviousTransitionFrame = NULL; - if (!VirtualUnwind(pRegisterSet)) + if (!VirtualUnwind(pRegisterSet)) { return false; } @@ -289,13 +327,316 @@ bool UnixNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, return true; } +bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) +{ + // VirtualUnwind can't unwind epilogues. + return TrailingEpilogueInstructionsCount(pvAddress) == 0; +} + +#define SIZE64_PREFIX 0x48 +#define ADD_IMM8_OP 0x83 +#define ADD_IMM32_OP 0x81 +#define JMP_IMM8_OP 0xeb +#define JMP_IMM32_OP 0xe9 +#define JMP_IND_OP 0xff +#define LEA_OP 0x8d +#define REPNE_PREFIX 0xf2 +#define REP_PREFIX 0xf3 +#define POP_OP 0x58 +#define RET_OP 0xc3 +#define RET_OP_2 0xc2 +#define INT3_OP 0xcc + +#define IS_REX_PREFIX(x) (((x) & 0xf0) == 0x40) + +// when stopped in an epilogue, returns the count of remaining stack-consuming instructions +// otherwise returns +// 0 - not in epilogue, +// -1 - unknown. +int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(PTR_VOID pvAddress) +{ +#ifdef TARGET_AMD64 + + // + // Everything below is inspired by the code in minkernel\ntos\rtl\amd64\exdsptch.c file from Windows + // For details see similar code in OOPStackUnwinderAMD64::UnwindEpilogue + // + // + // + // A canonical epilogue sequence consists of the following operations: + // + // 1. Optional cleanup of fixed and dynamic stack allocations, which is + // considered to be outside of the epilogue region. + // + // add rsp, imm + // or + // lea rsp, disp[fp] + // + // 2. Zero or more pop nonvolatile-integer-register[0..15] instructions. + // + // pop r64 + // or + // REX.R pop r64 + // + // 3. An optional one-byte pop r64 to a volatile register to clean up an + // RFLAGS register pushed with pushfq. + // + // pop rcx + // + // 4. A control transfer instruction (ret or jump, in a case of a tailcall) + // For the purpose of inferring the state of the stack, ret and jump can be + // considered the same. + // + // ret 0 + // or + // jmp imm + // or + // jmp [target] + // + // 5. Occasionally we may see a breakpoint, possibly placed by the debugger. + // In such case we do not know what instruction it was and return -1 (unknown) + // + // int 3 + // + + // if we are in an epilogue, there will be at least one instruction left. + int trailingEpilogueInstructions = 1; + uint8_t* pNextByte = (uint8_t*)pvAddress; + + // + // Check for any number of: + // + // pop nonvolatile-integer-register[0..15]. + // + + while (true) + { + if ((pNextByte[0] & 0xf8) == POP_OP) + { + pNextByte += 1; + trailingEpilogueInstructions++; + } + else if (IS_REX_PREFIX(pNextByte[0]) && ((pNextByte[1] & 0xf8) == POP_OP)) + { + pNextByte += 2; + trailingEpilogueInstructions++; + } + else + { + break; + } + } + + // + // A REPNE prefix may optionally precede a control transfer + // instruction with no effect on unwinding. + // + + if (pNextByte[0] == REPNE_PREFIX) + { + pNextByte += 1; + } + + if (((pNextByte[0] == RET_OP) || + (pNextByte[0] == RET_OP_2)) || + (((pNextByte[0] == REP_PREFIX) && (pNextByte[1] == RET_OP)))) + { + // + // A return is an unambiguous indication of an epilogue. + // + return trailingEpilogueInstructions; + } + + if ((pNextByte[0] == JMP_IMM8_OP) || + (pNextByte[0] == JMP_IMM32_OP)) + { + // + // An unconditional branch to a target that is equal to the start of + // or outside of this routine is logically a call to another function. + // + + size_t branchTarget = (size_t)pNextByte; + if (pNextByte[0] == JMP_IMM8_OP) + { + branchTarget += 2 + (int8_t)pNextByte[1]; + } + else + { + uint32_t delta = + (uint32_t)pNextByte[1] | + ((uint32_t)pNextByte[2] << 8) | + ((uint32_t)pNextByte[3] << 16) | + ((uint32_t)pNextByte[4] << 24); + + branchTarget += 5 + (int32_t)delta; + } + + // + // Determine whether the branch target refers to code within this + // function. If not, then it is an epilogue indicator. + // + // A branch to the start of self implies a recursive call, so + // is treated as an epilogue. + // + + size_t startAddress; + size_t endAddress; + uintptr_t lsda; + + bool result = FindProcInfo((uintptr_t)pvAddress, &startAddress, &endAddress, &lsda); + ASSERT(result); + + if (branchTarget < startAddress || branchTarget >= endAddress) + { + return trailingEpilogueInstructions; + } + } + else if ((pNextByte[0] == JMP_IND_OP) && (pNextByte[1] == 0x25)) + { + // + // An unconditional jump indirect. + // + // This is a jmp outside of the function, probably a tail call + // to an import function. + // + + return trailingEpilogueInstructions; + } + else if (((pNextByte[0] & 0xf8) == SIZE64_PREFIX) && + (pNextByte[1] == 0xff) && + (pNextByte[2] & 0x38) == 0x20) + { + // + // This is an indirect jump opcode: 0x48 0xff /4. The 64-bit + // flag (REX.W) is always redundant here, so its presence is + // overloaded to indicate a branch out of the function - a tail + // call. + // + // Such an opcode is an unambiguous epilogue indication. + // + + return trailingEpilogueInstructions; + } + else if (pNextByte[0] == INT3_OP) + { + // + // A breakpoint, possibly placed by the debugger - we do not know what was here. + // + return -1; + } + +#endif + + return 0; +} + +// Convert the return kind that was encoded by RyuJIT to the +// enum used by the runtime. +GCRefKind GetGcRefKind(ReturnKind returnKind) +{ + ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef_ByRef)); + + return (GCRefKind)returnKind; +} + bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out GCRefKind * pRetValueKind) // out { - // @TODO: GetReturnAddressHijackInfo + UnixNativeMethodInfo* pNativeMethodInfo = (UnixNativeMethodInfo*)pMethodInfo; + + PTR_UInt8 p = pNativeMethodInfo->pLSDA; + + uint8_t unwindBlockFlags = *p++; + + if ((unwindBlockFlags & UBF_FUNC_HAS_ASSOCIATED_DATA) != 0) + p += sizeof(int32_t); + + // Check whether this is a funclet + if ((unwindBlockFlags & UBF_FUNC_KIND_MASK) != UBF_FUNC_KIND_ROOT) + return false; + + // Skip hijacking a reverse-pinvoke method - it doesn't get us much because we already synchronize + // with the GC on the way back to native code. + if ((unwindBlockFlags & UBF_FUNC_REVERSE_PINVOKE) != 0) + return false; + + if ((unwindBlockFlags & UBF_FUNC_HAS_EHINFO) != 0) + p += sizeof(int32_t); + + // Decode the GC info for the current method to determine its return type + GcInfoDecoderFlags flags = DECODE_RETURN_KIND; +#if defined(TARGET_ARM) || defined(TARGET_ARM64) + flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); +#endif // TARGET_ARM || TARGET_ARM64 + + GcInfoDecoder decoder(GCInfoToken(p), flags); + *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); + + int epilogueInstructions = TrailingEpilogueInstructionsCount((PTR_VOID)pRegisterSet->IP); + if (epilogueInstructions < 0) + { + // can't figure, possibly a breakpoint instruction + return false; + } + else if (epilogueInstructions > 0) + { + *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() + (sizeof(TADDR) * (epilogueInstructions - 1))); + return true; + } + + ASSERT(IsUnwindable((PTR_VOID)pRegisterSet->IP)); + + // Unwind the current method context to the caller's context to get its stack pointer + // and obtain the location of the return address on the stack +#if defined(TARGET_AMD64) + + if (!VirtualUnwind(pRegisterSet)) + { + return false; + } + + *ppvRetAddrLocation = (PTR_PTR_VOID)(pRegisterSet->GetSP() - sizeof(TADDR)); + return true; + +#elif defined(TARGET_ARM64) + + if (decoder.HasTailCalls()) + { + // Do not hijack functions that have tail calls, since there are two problems: + // 1. When a function that tail calls another one is hijacked, the LR may be + // stored at a different location in the stack frame of the tail call target. + // So just by performing tail call, the hijacked location becomes invalid and + // unhijacking would corrupt stack by writing to that location. + // 2. There is a small window after the caller pops LR from the stack in its + // epilog and before the tail called function pushes LR in its prolog when + // the hijacked return address would not be not on the stack and so we would + // not be able to unhijack. + return false; + } + + PTR_UIntNative pLR = pRegisterSet->pLR; + if (!VirtualUnwind(pRegisterSet)) + { + return false; + } + + if (pRegisterSet->pLR == pLR) + { + // This is the case when we are either: + // + // 1) In a leaf method that does not push LR on stack, OR + // 2) In the prolog/epilog of a non-leaf method that has not yet pushed LR on stack + // or has LR already popped off. + return false; + } + + *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pLR; + return true; +#else return false; +#endif // defined(TARGET_AMD64) } PTR_VOID UnixNativeCodeManager::RemapHardwareFaultToGCSafePoint(MethodInfo * pMethodInfo, PTR_VOID controlPC) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h index 709bd381e06f24..7257b209129e78 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.h @@ -34,6 +34,8 @@ class UnixNativeCodeManager : public ICodeManager PTR_VOID GetFramePointer(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); + uint32_t GetCodeOffset(MethodInfo* pMethodInfo, PTR_VOID address, PTR_UInt8* gcInfo); + bool IsSafePoint(PTR_VOID pvAddress); void EnumGcRefs(MethodInfo * pMethodInfo, @@ -49,6 +51,10 @@ class UnixNativeCodeManager : public ICodeManager uintptr_t GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); + bool IsUnwindable(PTR_VOID pvAddress); + + int TrailingEpilogueInstructionsCount(PTR_VOID pvAddress); + bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp new file mode 100644 index 00000000000000..33852920653ff6 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.cpp @@ -0,0 +1,56 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "CommonTypes.h" +#include "PalRedhawkCommon.h" +#include "CommonMacros.h" +#include "config.h" + +#include "UnixSignals.h" + +// Add handler for hardware exception signal +bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction) +{ + struct sigaction newAction; + + newAction.sa_flags = SA_RESTART; + newAction.sa_handler = NULL; + newAction.sa_sigaction = handler; + newAction.sa_flags |= SA_SIGINFO; + + sigemptyset(&newAction.sa_mask); + + if (sigaction(signal, NULL, previousAction) == -1) + { + ASSERT_UNCONDITIONALLY("Failed to get previous signal handler"); + return false; + } + + if (previousAction->sa_flags & SA_ONSTACK) + { + // If the previous signal handler uses an alternate stack, we need to use it too + // so that when we chain-call the previous handler, it is called on the kind of + // stack it expects. + // We also copy the signal mask to make sure that if some signals were blocked + // from execution on the alternate stack by the previous action, we honor that. + newAction.sa_flags |= SA_ONSTACK; + newAction.sa_mask = previousAction->sa_mask; + } + + if (sigaction(signal, &newAction, previousAction) == -1) + { + ASSERT_UNCONDITIONALLY("Failed to install signal handler"); + return false; + } + + return true; +} + +// Restore original handler for hardware exception signal +void RestoreSignalHandler(int signal_id, struct sigaction *previousAction) +{ + if (-1 == sigaction(signal_id, previousAction, NULL)) + { + ASSERT_UNCONDITIONALLY("RestoreSignalHandler: sigaction() call failed"); + } +} diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixSignals.h b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.h new file mode 100644 index 00000000000000..60e08d461e3830 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/UnixSignals.h @@ -0,0 +1,20 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#ifndef __UNIX_SIGNALS_H__ +#define __UNIX_SIGNALS_H__ + +#include + +#ifdef SIGRTMIN +#define INJECT_ACTIVATION_SIGNAL SIGRTMIN +#else +#define INJECT_ACTIVATION_SIGNAL SIGUSR1 +#endif + +typedef void (*SignalHandler)(int code, siginfo_t* siginfo, void* context); + +bool AddSignalHandler(int signal, SignalHandler handler, struct sigaction* previousAction); +void RestoreSignalHandler(int signal_id, struct sigaction* previousAction); + +#endif // __UNIX_SIGNALS_H__ diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 7fe9b91637a65c..6fdd94d6bd0c8e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -58,6 +58,7 @@ struct Registers_REGDISPLAY : REGDISPLAY switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: return IP; case UNW_REG_SP: return SP; @@ -104,6 +105,7 @@ struct Registers_REGDISPLAY : REGDISPLAY switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: IP = value; pIP = (PTR_PCODE)location; return; @@ -178,7 +180,7 @@ struct Registers_REGDISPLAY : REGDISPLAY return true; if (regNum < 0) return false; - if (regNum > 15) + if (regNum > 16) return false; return true; } diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc index 47e0e386df5246..260a2ca533dc4b 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosamd64.inc @@ -252,6 +252,9 @@ C_FUNC(\Name): #define TSF_SuppressGcStress 0x08 #define TSF_DoNotTriggerGc 0x10 +// This must match HwExceptionCode.STATUS_REDHAWK_THREAD_ABORT +#define STATUS_REDHAWK_THREAD_ABORT 0x43 + // // Rename fields of nested structs // @@ -269,11 +272,10 @@ C_FUNC(\Name): #define PTFF_SAVE_R15 0x00000080 #define PTFF_SAVE_ALL_PRESERVED 0x000000F1 // NOTE: RBP is not included in this set! #define PTFF_SAVE_RSP 0x00008000 -#define PTFF_SAVE_RAX 0x00000100 // RAX is saved if it contains a GC ref and we're in hijack handler +#define PTFF_SAVE_RAX 0x00000100 // RAX is saved in hijack handler - in case it contains a GC ref +#define PTFF_SAVE_RDX 0x00000400 // RDX is saved in hijack handler - in case it contains a GC ref #define PTFF_SAVE_ALL_SCRATCH 0x00007F00 -#define PTFF_RAX_IS_GCREF 0x00010000 // iff PTFF_SAVE_RAX: set -> eax is Object, clear -> eax is scalar -#define PTFF_RAX_IS_BYREF 0x00020000 // iff PTFF_SAVE_RAX: set -> eax is ByRef, clear -> eax is Object or scalar -#define PTFF_THREAD_ABORT 0x00040000 // indicates that ThreadAbortException should be thrown when returning from the transition +#define PTFF_THREAD_ABORT 0x00100000 // indicates that ThreadAbortException should be thrown when returning from the transition // These must match the TrapThreadsFlags enum #define TrapThreadsFlags_None 0 @@ -318,7 +320,7 @@ C_FUNC(\Name): DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_RSP .macro PUSH_COOP_PINVOKE_FRAME trashReg - push_nonvol_reg rbp // push RBP frame + push_nonvol_reg rbp // push RBP frame // TODO: do we need this? not on windows. mov rbp, rsp lea \trashReg, [rsp + 0x10] push_register \trashReg // save caller's RSP diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 7738a7455c757c..70ec2175fe79a9 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -676,14 +676,21 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, return true; } +bool CoffNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) +{ + // RtlVirtualUnwind always can unwind. + return true; +} + // Convert the return kind that was encoded by RyuJIT to the // enum used by the runtime. GCRefKind GetGcRefKind(ReturnKind returnKind) { - static_assert((GCRefKind)ReturnKind::RT_Scalar == GCRK_Scalar, "ReturnKind::RT_Scalar does not match GCRK_Scalar"); - static_assert((GCRefKind)ReturnKind::RT_Object == GCRK_Object, "ReturnKind::RT_Object does not match GCRK_Object"); - static_assert((GCRefKind)ReturnKind::RT_ByRef == GCRK_Byref, "ReturnKind::RT_ByRef does not match GCRK_Byref"); - ASSERT((returnKind == RT_Scalar) || (returnKind == GCRK_Object) || (returnKind == GCRK_Byref)); +#ifdef TARGET_ARM64 + ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef_ByRef)); +#else + ASSERT((returnKind >= RT_Scalar) && (returnKind <= RT_ByRef)); +#endif return (GCRefKind)returnKind; } @@ -724,7 +731,7 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn #endif // TARGET_ARM || TARGET_ARM64 GcInfoDecoder decoder(GCInfoToken(p), flags); - GCRefKind gcRefKind = GetGcRefKind(decoder.GetReturnKind()); + *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); // Unwind the current method context to the caller's context to get its stack pointer // and obtain the location of the return address on the stack @@ -750,7 +757,6 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn NULL); *ppvRetAddrLocation = (PTR_PTR_VOID)(context.Rsp - sizeof (PVOID)); - *pRetValueKind = gcRefKind; return true; #elif defined(TARGET_ARM64) @@ -799,7 +805,6 @@ bool CoffNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn } *ppvRetAddrLocation = (PTR_PTR_VOID)contextPointers.Lr; - *pRetValueKind = gcRefKind; return true; #else return false; diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h index d8606f62367327..1598f640d18c9a 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.h @@ -87,6 +87,8 @@ class CoffNativeCodeManager : public ICodeManager uintptr_t GetConservativeUpperBoundForOutgoingArgs(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet); + bool IsUnwindable(PTR_VOID pvAddress); + bool GetReturnAddressHijackInfo(MethodInfo * pMethodInfo, REGDISPLAY * pRegisterSet, // in PTR_PTR_VOID * ppvRetAddrLocation, // out diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 631fbb61c4dd50..e6195e5b3d9174 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -430,111 +430,45 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalRestoreContext(CONTEXT * pCtx) RtlRestoreContext(pCtx, NULL); } -REDHAWK_PALEXPORT _Success_(return) bool REDHAWK_PALAPI PalGetThreadContext(HANDLE hThread, _Out_ PAL_LIMITED_CONTEXT * pCtx) -{ - CONTEXT win32ctx; - - win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; - - if (!GetThreadContext(hThread, &win32ctx)) - return false; +static PalHijackCallback g_pHijackCallback; - // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread - // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in - // this case (which should force our caller to resume the thread and try again -- since this is a fairly - // narrow window we're highly likely to succeed next time). - // Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag - // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling). - // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that - // it is not safe to manipulate with the current state of the thread context. - if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) == 0 || - (win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE))) - return false; +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback) +{ + ASSERT(g_pHijackCallback == NULL); + g_pHijackCallback = callback; -#ifdef HOST_X86 - pCtx->IP = win32ctx.Eip; - pCtx->Rsp = win32ctx.Esp; - pCtx->Rbp = win32ctx.Ebp; - pCtx->Rdi = win32ctx.Edi; - pCtx->Rsi = win32ctx.Esi; - pCtx->Rax = win32ctx.Eax; - pCtx->Rbx = win32ctx.Ebx; -#elif defined(HOST_AMD64) - pCtx->IP = win32ctx.Rip; - pCtx->Rsp = win32ctx.Rsp; - pCtx->Rbp = win32ctx.Rbp; - pCtx->Rdi = win32ctx.Rdi; - pCtx->Rsi = win32ctx.Rsi; - pCtx->Rax = win32ctx.Rax; - pCtx->Rbx = win32ctx.Rbx; - pCtx->R12 = win32ctx.R12; - pCtx->R13 = win32ctx.R13; - pCtx->R14 = win32ctx.R14; - pCtx->R15 = win32ctx.R15; -#elif defined(HOST_ARM) - pCtx->IP = win32ctx.Pc; - pCtx->R0 = win32ctx.R0; - pCtx->R4 = win32ctx.R4; - pCtx->R5 = win32ctx.R5; - pCtx->R6 = win32ctx.R6; - pCtx->R7 = win32ctx.R7; - pCtx->R8 = win32ctx.R8; - pCtx->R9 = win32ctx.R9; - pCtx->R10 = win32ctx.R10; - pCtx->R11 = win32ctx.R11; - pCtx->SP = win32ctx.Sp; - pCtx->LR = win32ctx.Lr; -#elif defined(HOST_ARM64) - pCtx->IP = win32ctx.Pc; - pCtx->X0 = win32ctx.X0; - pCtx->X1 = win32ctx.X1; - // TODO: Copy X2-X7 when we start supporting HVA's - pCtx->X19 = win32ctx.X19; - pCtx->X20 = win32ctx.X20; - pCtx->X21 = win32ctx.X21; - pCtx->X22 = win32ctx.X22; - pCtx->X23 = win32ctx.X23; - pCtx->X24 = win32ctx.X24; - pCtx->X25 = win32ctx.X25; - pCtx->X26 = win32ctx.X26; - pCtx->X27 = win32ctx.X27; - pCtx->X28 = win32ctx.X28; - pCtx->SP = win32ctx.Sp; - pCtx->LR = win32ctx.Lr; - pCtx->FP = win32ctx.Fp; -#else -#error Unsupported platform -#endif return true; } - -REDHAWK_PALEXPORT uint32_t REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_ PalHijackCallback callback, _In_opt_ void* pCallbackContext) +REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack) { - if (hThread == INVALID_HANDLE_VALUE) - { - return (uint32_t)E_INVALIDARG; - } - + _ASSERTE(hThread != INVALID_HANDLE_VALUE); if (SuspendThread(hThread) == (DWORD)-1) { - return HRESULT_FROM_WIN32(GetLastError()); + return; } - PAL_LIMITED_CONTEXT ctx; - HRESULT result; - if (!PalGetThreadContext(hThread, &ctx)) - { - result = HRESULT_FROM_WIN32(GetLastError()); - } - else + CONTEXT win32ctx; + win32ctx.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_EXCEPTION_REQUEST; + + if (GetThreadContext(hThread, &win32ctx)) { - result = callback(hThread, &ctx, pCallbackContext) ? S_OK : E_FAIL; + // The CONTEXT_SERVICE_ACTIVE and CONTEXT_EXCEPTION_ACTIVE output flags indicate we suspended the thread + // at a point where the kernel cannot guarantee a completely accurate context. We'll fail the request in + // this case (which should force our caller to resume the thread and try again -- since this is a fairly + // narrow window we're highly likely to succeed next time). + // Note: in some cases (x86 WOW64, ARM32 on ARM64) the OS will not set the CONTEXT_EXCEPTION_REPORTING flag + // if the thread is executing in kernel mode (i.e. in the middle of a syscall or exception handling). + // Therefore, we should treat the absence of the CONTEXT_EXCEPTION_REPORTING flag as an indication that + // it is not safe to manipulate with the current state of the thread context. + if ((win32ctx.ContextFlags & CONTEXT_EXCEPTION_REPORTING) != 0 && + ((win32ctx.ContextFlags & (CONTEXT_SERVICE_ACTIVE | CONTEXT_EXCEPTION_ACTIVE)) == 0)) + { + g_pHijackCallback(&win32ctx, pThreadToHijack); + } } ResumeThread(hThread); - - return result; } REDHAWK_PALEXPORT bool REDHAWK_PALAPI PalStartBackgroundWork(_In_ BackgroundCallback callback, _In_opt_ void* pCallbackContext, BOOL highPriority) diff --git a/src/coreclr/nativeaot/libunwind/src/DwarfParser.hpp b/src/coreclr/nativeaot/libunwind/src/DwarfParser.hpp index a2ebf3bb0e189b..ab4e64ebce6324 100644 --- a/src/coreclr/nativeaot/libunwind/src/DwarfParser.hpp +++ b/src/coreclr/nativeaot/libunwind/src/DwarfParser.hpp @@ -385,7 +385,7 @@ bool CFI_Parser::parseInstructions(A &addressSpace, pint_t instructions, static_cast(instructionsEnd)); // see DWARF Spec, section 6.4.2 for details on unwind opcodes - while ((p < instructionsEnd) && (codeOffset < pcoffset)) { + while ((p < instructionsEnd) && (codeOffset <= pcoffset)) { uint64_t reg; uint64_t reg2; int64_t offset; diff --git a/src/coreclr/nativeaot/libunwind/src/Registers.hpp b/src/coreclr/nativeaot/libunwind/src/Registers.hpp index e2f05fb0990d0d..fba7699a174d22 100644 --- a/src/coreclr/nativeaot/libunwind/src/Registers.hpp +++ b/src/coreclr/nativeaot/libunwind/src/Registers.hpp @@ -406,7 +406,7 @@ inline bool Registers_x86_64::validRegister(int regNum) const { return true; if (regNum < 0) return false; - if (regNum > 15) + if (regNum > 16) return false; return true; } @@ -414,6 +414,7 @@ inline bool Registers_x86_64::validRegister(int regNum) const { inline uint64_t Registers_x86_64::getRegister(int regNum) const { switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: return _registers.__rip; case UNW_REG_SP: return _registers.__rsp; @@ -456,6 +457,7 @@ inline uint64_t Registers_x86_64::getRegister(int regNum) const { inline uint64_t Registers_x86_64::getRegisterLocation(int regNum) const { switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: return _registerLocations.__rip; case UNW_REG_SP: return _registerLocations.__rsp; @@ -498,6 +500,7 @@ inline uint64_t Registers_x86_64::getRegisterLocation(int regNum) const { inline void Registers_x86_64::setRegister(int regNum, uint64_t value, uint64_t location) { switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: _registers.__rip = value; _registerLocations.__rip = location; return; @@ -576,6 +579,7 @@ inline void Registers_x86_64::setRegister(int regNum, uint64_t value, uint64_t l inline const char *Registers_x86_64::getRegisterName(int regNum) { switch (regNum) { case UNW_REG_IP: + case UNW_X86_64_RIP: return "rip"; case UNW_REG_SP: return "rsp"; diff --git a/src/tests/nativeaot/SmokeTests/UnitTests/BasicThreading.cs b/src/tests/nativeaot/SmokeTests/UnitTests/BasicThreading.cs index 57964b6dead0a7..0b62338d536282 100644 --- a/src/tests/nativeaot/SmokeTests/UnitTests/BasicThreading.cs +++ b/src/tests/nativeaot/SmokeTests/UnitTests/BasicThreading.cs @@ -507,7 +507,7 @@ public static int Run() TestMaxStackSize(); TestStartShutdown(); - + TestConcurrentIsBackgroundProperty(); return (s_failed == 0) ? BasicThreading.Pass : BasicThreading.Fail;