diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
index 18c3c1f017a9ff..8035484bf04a53 100644
--- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
+++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets
@@ -99,6 +99,12 @@ The .NET Foundation licenses this file to you under the MIT license.
       
       
       
+      
+      
+      
+      
+      
+      
     
 
     
diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
index 325128c4e01fcd..dc0e6fe1b75bef 100644
--- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
+++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp
@@ -536,6 +536,61 @@ int32_t __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs)
         return EXCEPTION_CONTINUE_SEARCH;
     }
 
+    // the following would work on ARM64 as well, but there is no way to test right now.
+#ifdef TARGET_AMD64
+
+#ifndef STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT
+#define STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT ((uintptr_t)0x80000033L)
+#endif
+
+    if (faultCode == STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT)
+    {
+        Thread * pThread = ThreadStore::GetCurrentThreadIfAvailable();
+        if (pThread == NULL || !pThread->IsCurrentThreadInCooperativeMode())
+        {
+            // if we are not in coop mode, this cannot be our hijack
+            // Perhaps some other runtime is responsible.
+            return EXCEPTION_CONTINUE_SEARCH;
+        }
+
+        // Sanity check.
+        if (!pThread->IsHijacked())
+        {
+            _ASSERTE(!"The thread should be hijacked by us.");
+            RhFailFast();
+        }
+
+        PCONTEXT interruptedContext = pExPtrs->ContextRecord;
+        bool areShadowStacksEnabled = PalAreShadowStacksEnabled();
+        if (areShadowStacksEnabled)
+        {
+            // OS should have fixed the SP value to the same as we`ve stashed for the hijacked thread
+            _ASSERTE(*(size_t *)interruptedContext->GetSp() == (uintptr_t)pThread->GetHijackedReturnAddress());
+
+            // When the CET is enabled, the interruption happens on the ret instruction in the calee.
+            // We need to "pop" rsp to the caller, as if the ret has consumed it.
+            interruptedContext->SetSp(interruptedContext->GetSp() + 8);
+        }
+
+        // Change the IP to be at the original return site, as if we have returned to the caller.
+        // That IP is an interruptible safe point, so we can suspend right there.
+        uintptr_t origIp = interruptedContext->GetIp();
+        interruptedContext->SetIp((uintptr_t)pThread->GetHijackedReturnAddress());
+
+        pThread->InlineSuspend(interruptedContext);
+
+        if (areShadowStacksEnabled)
+        {
+            // Undo the "pop", so that the ret could now succeed.
+            interruptedContext->SetSp(interruptedContext->GetSp() - 8);
+            interruptedContext->SetIp(origIp);
+        }
+
+        ASSERT(!pThread->IsHijacked());
+        return EXCEPTION_CONTINUE_EXECUTION;
+    }
+#endif // TARGET_AMD64    (support for STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT)
+
     uintptr_t faultingIP = pExPtrs->ContextRecord->GetIp();
 
     ICodeManager * pCodeManager = GetRuntimeInstance()->GetCodeManagerForAddress((PTR_VOID)faultingIP);
diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h
index 9257324bd1589b..56bb1fb66c4eba 100644
--- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h
+++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h
@@ -436,6 +436,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT {
     uintptr_t GetIp() { return Pc; }
     uintptr_t GetLr() { return Lr; }
     uintptr_t GetSp() { return Sp; }
+    void SetSp(uintptr_t sp) { Sp = sp; }
 
     template 
     void ForEachPossibleObjectRef(F lambda)
@@ -665,6 +666,7 @@ REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddre
 REDHAWK_PALIMPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size);
 REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds);
 REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread();
+REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled();
 REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName);
 REDHAWK_PALIMPORT uint64_t REDHAWK_PALAPI PalGetTickCount64();
 REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer);
@@ -692,6 +694,7 @@ REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ Backgro
 typedef void (*PalHijackCallback)(_In_ NATIVE_CONTEXT* pThreadContext, _In_opt_ void* pThreadToHijack);
 REDHAWK_PALIMPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack);
 REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback);
+REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(_In_ HijackFunc* defaultHijackTarget);
 #endif
 
 REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut);
diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
index ae073e57c7ecd0..2574ea39a16633 100644
--- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
+++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp
@@ -277,28 +277,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF
 
 #endif // defined(USE_PORTABLE_HELPERS)
 
-    // This function guarantees that the final initialized context will refer to a managed
-    // frame.  In the rare case where the PC does not refer to managed code (and refers to an
-    // assembly thunk instead), unwind through the thunk sequence to find the nearest managed
-    // frame.
-    // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting
-    // lower bound that must be applied when processing the managed frame.
-
-    ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);
-
-    if (category == InManagedCode)
-    {
-        ASSERT(m_pInstance->IsManaged(m_ControlPC));
-    }
-    else if (IsNonEHThunk(category))
-    {
-        UnwindNonEHThunkSequence();
-        ASSERT(m_pInstance->IsManaged(m_ControlPC));
-    }
-    else
-    {
-        FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("PInvokeTransitionFrame PC points to an unexpected assembly thunk kind.");
-    }
+    // adjust for thunks, if needed
+    EnsureInitializedToManagedFrame();
 
     STRESS_LOG1(LF_STACKWALK, LL_INFO10000, "   %p\n", m_ControlPC);
 }
@@ -484,7 +464,13 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO
 }
 
 // Prepare to start a stack walk from the context listed in the supplied NATIVE_CONTEXT.
-// The supplied context can describe a location in managed code.
+// NOTE: When a return address hijack is executed, the PC in the NATIVE_CONTEXT
+// matches the hijacked return address.  This PC is not guaranteed to be in managed code
+// since the hijacked return address may refer to a location where an assembly thunk called
+// into managed code.
+// NOTE: When the PC is in an assembly thunk, this function will unwind to the next managed
+// frame and may publish a conservative stack range (if and only if any of the unwound
+// thunks report a conservative range).
 void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags)
 {
     ASSERT((dwFlags & MethodStateCalculated) == 0);
@@ -498,8 +484,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
     // properly walk it in parallel.
     ResetNextExInfoForSP(pCtx->GetSp());
 
-    // This codepath is used by the hijack stackwalk. The IP must be in managed code.
-    ASSERT(m_pInstance->IsManaged(dac_cast(pCtx->GetIp())));
+    // This codepath is used by the hijack stackwalk. The IP must be in managed code
+    // or in a conservatively reported assembly thunk.
+    ASSERT(IsValidReturnAddress((void*)pCtx->GetIp()));
 
     //
     // control state
@@ -616,6 +603,35 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC
 #endif // TARGET_ARM
 
 #undef PTR_TO_REG
+
+    // adjust for thunks, if needed
+    EnsureInitializedToManagedFrame();
+}
+
+void StackFrameIterator::EnsureInitializedToManagedFrame()
+{
+    // This function guarantees that the final initialized context will refer to a managed
+    // frame.  In the rare case where the PC does not refer to managed code (and refers to an
+    // assembly thunk instead), unwind through the thunk sequence to find the nearest managed
+    // frame.
+    // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting
+    // lower bound that must be applied when processing the managed frame.
+
+    ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC);
+
+    if (category == InManagedCode)
+    {
+        ASSERT(m_pInstance->IsManaged(m_ControlPC));
+    }
+    else if (IsNonEHThunk(category))
+    {
+        UnwindNonEHThunkSequence();
+        ASSERT(m_pInstance->IsManaged(m_ControlPC));
+    }
+    else
+    {
+        FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unadjusted initial PC points to an unexpected assembly thunk kind.");
+    }
 }
 
 PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo)
diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
index cf7f524de8dbbe..77cef2133b5c16 100644
--- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
+++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h
@@ -86,6 +86,7 @@ class StackFrameIterator
     void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, uint32_t dwFlags); // GC stackwalk
     void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, uint32_t dwFlags);  // EH and hijack stackwalk, and collided unwind
     void InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags);  // GC stackwalk of redirected thread
+    void EnsureInitializedToManagedFrame();
 
     void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk
     void InternalInitForStackTrace();  // Environment.StackTrace
diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
index 8dd52b3acfa85f..fe98a2eafc7c39 100644
--- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
+++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h
@@ -58,6 +58,7 @@ PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15)
 
 PLAT_ASM_SIZEOF(130, REGDISPLAY)
 PLAT_ASM_OFFSET(78, REGDISPLAY, SP)
+PLAT_ASM_OFFSET(80, REGDISPLAY, IP)
 
 PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx)
 PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp)
diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm
index 86c3f408665b69..facd8e983e6796 100644
--- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm
+++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm
@@ -15,6 +15,10 @@ include asmmacros.inc
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 NESTED_ENTRY RhpThrowHwEx, _TEXT
 
+ALTERNATE_ENTRY RhpThrowHwExGEHCONT ; this needs to be an EHCONT target since we'll be context-jumping here.
+
+.GEHCONT RhpThrowHwExGEHCONT
+
         SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6
         STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15))
 
@@ -486,8 +490,9 @@ endif
         INLINE_THREAD_UNHIJACK rdx, rcx, r9                         ;; Thread in rdx, trashes rcx and r9
 
         mov     rcx, [rsp + rsp_offsetof_arguments + 18h]           ;; rcx <- current ExInfo *
+        mov     r10, [r8 + OFFSETOF__REGDISPLAY__IP]                ;; r10 <- original IP value
         mov     r8, [r8 + OFFSETOF__REGDISPLAY__SP]                 ;; r8 <- resume SP value
-        xor     r9d, r9d                                            ;; r9 <- 0
+        xor     r9, r9                                              ;; r9 <- 0
 
    @@:  mov     rcx, [rcx + OFFSETOF__ExInfo__m_pPrevExInfo]        ;; rcx <- next ExInfo
         cmp     rcx, r9
@@ -497,6 +502,20 @@ endif
 
    @@:  mov     [rdx + OFFSETOF__Thread__m_pExInfoStackHead], rcx   ;; store the new head on the Thread
 
+   ;; Sanity check: if we have shadow stack, it should agree with what we have in rsp
+   LOCAL_STACK_USE equ 118h
+   ifdef _DEBUG
+        rdsspq  r9
+        test    r9, r9
+        jz      @f
+        mov     r9, [r9]
+        cmp     [rsp + LOCAL_STACK_USE], r9
+        je      @f
+        int     3
+   @@:
+        xor     r9, r9                                              ;; r9 <- 0
+   endif
+
         test    [RhpTrapThreads], TrapThreadsFlags_AbortInProgress
         jz      @f
 
@@ -507,12 +526,28 @@ endif
         ;; It was the ThreadAbortException, so rethrow it
         mov     rcx, STATUS_REDHAWK_THREAD_ABORT
         mov     rdx, rax                                            ;; rdx <- continuation address as exception RIP
-        mov     rsp, r8                                             ;; reset the SP to resume SP value
-        jmp     RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception
+        mov     rax, RhpThrowHwEx                                   ;; Throw the ThreadAbortException as a special kind of hardware exception
 
-        ;; reset RSP and jump to the continuation address
+        ;; reset RSP and jump to RAX
    @@:  mov     rsp, r8                                             ;; reset the SP to resume SP value
-        jmp     rax
+
+        ;; if have shadow stack, then we need to reconcile it with the rsp change we have just made
+        rdsspq  r9
+        test    r9, r9
+        jz      NoSSP
+
+        ;; Find the shadow stack pointer for the frame we are going to restore to.
+        ;; The SSP we search is pointing to the return address of the frame represented
+        ;; by the passed in context. So we search for the instruction pointer from 
+        ;; the context and return one slot up from there.
+        ;; (Same logic as in GetSSPForFrameOnCurrentStack)
+        xor     r11, r11
+   @@:  inc     r11
+        cmp     [r9 + r11 * 8 - 8], r10
+        jne     @b
+
+        incsspq r11
+NoSSP:  jmp     rax
 
 
 NESTED_END RhpCallCatchFunclet, _TEXT
diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
index 7e2715d3dd7685..fe09d2a73022a7 100644
--- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
+++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm
@@ -251,7 +251,7 @@ RhpGcStressProbe  endp
 
 endif ;; FEATURE_GC_STRESS
 
-FASTCALL_FUNC RhpGcProbeHijack, 0
+_RhpGcProbeHijack@0  proc public
         HijackFixupProlog
         test        [RhpTrapThreads], TrapThreadsFlags_TrapThreads
         jnz         WaitForGC
@@ -261,18 +261,18 @@ WaitForGC:
         or          ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
         jmp         RhpWaitForGC
 
-FASTCALL_ENDFUNC
+_RhpGcProbeHijack@0  endp
 
 ifdef FEATURE_GC_STRESS
-FASTCALL_FUNC RhpGcStressHijack, 0
+_RhpGcStressHijack@0  proc public
 
         HijackFixupProlog
         or          ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX
         jmp         RhpGcStressProbe
 
-FASTCALL_ENDFUNC
+_RhpGcStressHijack@0  endp
 
-FASTCALL_FUNC RhpHijackForGcStress, 0
+_RhpHijackForGcStress@0  proc public
         push        ebp
         mov         ebp, esp
 
@@ -307,7 +307,7 @@ FASTCALL_FUNC RhpHijackForGcStress, 0
         pop         edx
         pop         ebp
         ret
-FASTCALL_ENDFUNC
+_RhpHijackForGcStress@0  endp
 endif ;; FEATURE_GC_STRESS
 
         end
diff --git a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h
index 71cd8fcd0bf2b9..1b6e92b1788f27 100644
--- a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h
+++ b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h
@@ -61,4 +61,8 @@ typedef struct _GUID {
 } GUID;
 #endif // FEATURE_EVENT_TRACE && !_INC_WINDOWS
 
+// Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller.
+// Thus they cannot have any parameters or return anything.
+typedef void HijackFunc();
+
 #endif // __COMMON_TYPES_H__
diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp
index c3a72d1846d959..b796b052182260 100644
--- a/src/coreclr/nativeaot/Runtime/thread.cpp
+++ b/src/coreclr/nativeaot/Runtime/thread.cpp
@@ -572,13 +572,13 @@ void Thread::GcScanRootsWorker(ScanFunc * pfnEnumCallback, ScanContext * pvCallb
 
 #ifdef FEATURE_HIJACK
 
-EXTERN_C void FASTCALL RhpGcProbeHijack();
-EXTERN_C void FASTCALL RhpGcStressHijack();
+EXTERN_C void RhpGcProbeHijack();
+EXTERN_C void RhpGcStressHijack();
 
 // static
 bool Thread::IsHijackTarget(void* address)
 {
-    if (&RhpGcProbeHijack == address)
+    if (PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack) == address)
         return true;
 #ifdef FEATURE_GC_STRESS
     if (&RhpGcStressHijack == address)
@@ -697,7 +697,9 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijac
 #endif //FEATURE_SUSPEND_REDIRECTION
     }
 
-    pThread->HijackReturnAddress(pThreadContext, &RhpGcProbeHijack);
+    pThread->HijackReturnAddress(
+        pThreadContext,
+        PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack));
 }
 
 #ifdef FEATURE_GC_STRESS
diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h
index f5a1c82e59697d..4c0a21e9f9ab7f 100644
--- a/src/coreclr/nativeaot/Runtime/thread.h
+++ b/src/coreclr/nativeaot/Runtime/thread.h
@@ -173,16 +173,9 @@ class Thread : private RuntimeThreadLocals
 #ifdef FEATURE_HIJACK
     static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack);
 
-    //
-    // Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller.
-    // Thus they cannot have any parameters or return anything.
-    //
-    typedef void FASTCALL HijackFunc();
-
     void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction);
     void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction);
     void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction);
-    bool InlineSuspend(NATIVE_CONTEXT* interruptedContext);
     void CrossThreadUnhijack();
     void UnhijackWorker();
 #else // FEATURE_HIJACK
@@ -209,6 +202,7 @@ class Thread : private RuntimeThreadLocals
     static uint64_t s_DeadThreadsNonAllocBytes;
 
 public:
+    bool InlineSuspend(NATIVE_CONTEXT* interruptedContext);
 
     static uint64_t GetDeadThreadsNonAllocBytes();
 
diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
index ac3dd24a267047..8b3c6af9fe48f9 100644
--- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
+++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
@@ -636,6 +636,11 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread()
     return false;
 }
 
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled()
+{
+    return false;
+}
+
 extern "C" UInt32_BOOL CloseHandle(HANDLE handle)
 {
     if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE))
@@ -1070,6 +1075,11 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalH
     return AddSignalHandler(INJECT_ACTIVATION_SIGNAL, ActivationHandler, &g_previousActivationHandler);
 }
 
+REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget)
+{
+    return defaultHijackTarget;
+}
+
 REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack)
 {
     ThreadUnixHandle* threadHandle = (ThreadUnixHandle*)hThread;
diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
index 94424b17562be9..fec8c5fb7f10d2 100644
--- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
+++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp
@@ -61,6 +61,11 @@ static HMODULE LoadKernel32dll()
     return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
 }
 
+static HMODULE LoadNtdlldll()
+{
+    return LoadLibraryExW(L"ntdll.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32);
+}
+
 void InitializeCurrentProcessCpuCount()
 {
     DWORD count;
@@ -327,6 +332,20 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTR
     return CreateEventW(pEventAttributes, manualReset, initialState, pName);
 }
 
+REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled()
+{
+#if defined(TARGET_AMD64)
+    // The SSP is null when CET shadow stacks are not enabled. On processors that don't support shadow stacks, this is a
+    // no-op and the intrinsic returns 0. CET shadow stacks are enabled or disabled for all threads, so the result is the
+    // same from any thread.
+    return _rdsspq() != 0;
+#else
+    // When implementing AreShadowStacksEnabled() on other architectures, review all the places where this is used.
+    return false;
+#endif
+}
+
+
 #ifdef TARGET_X86
 
 #define EXCEPTION_HIJACK  0xe0434f4e    // 0xe0000000 | 'COM'+1
@@ -487,6 +506,10 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB
         context = context | CONTEXT_XSTATE;
     }
 
+    // the context does not need XSTATE_MASK_CET_U because we should not be using
+    // redirection when CET is enabled and should not be here.
+    _ASSERTE(!PalAreShadowStacksEnabled());
+
     // Retrieve contextSize by passing NULL for Buffer
     DWORD contextSize = 0;
     ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512;
@@ -580,8 +603,6 @@ REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT* p
 
 static PalHijackCallback g_pHijackCallback;
 
-#ifdef FEATURE_SPECIAL_USER_MODE_APC
-
 // These declarations are for a new special user-mode APC feature introduced in Windows. These are not yet available in Windows
 // SDK headers, so some names below are prefixed with "CLONE_" to avoid conflicts in the future. Once the prefixed declarations
 // become available in the Windows SDK headers, the prefixed declarations below can be removed in favor of the SDK ones.
@@ -611,6 +632,8 @@ static const CLONE_QUEUE_USER_APC_FLAGS SpecialUserModeApcWithContextFlags = (CL
                                     (CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC |
                                      CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT);
 
+static void* g_returnAddressHijackTarget = NULL;
+
 static void NTAPI ActivationHandler(ULONG_PTR parameter)
 {
     CLONE_APC_CALLBACK_DATA* data = (CLONE_APC_CALLBACK_DATA*)parameter;
@@ -619,7 +642,6 @@ static void NTAPI ActivationHandler(ULONG_PTR parameter)
     Thread* pThread = (Thread*)data->Parameter;
     pThread->SetActivationPending(false);
 }
-#endif
 
 REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback)
 {
@@ -629,6 +651,55 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalH
     return true;
 }
 
+void InitHijackingAPIs()
+{
+    HMODULE hKernel32 = LoadKernel32dll();
+
+#ifdef HOST_AMD64
+    typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine);
+
+    IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2");
+    USHORT processMachine, hostMachine;
+    if (pfnIsWow64Process2Proc != nullptr &&
+        (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) &&
+        (hostMachine == IMAGE_FILE_MACHINE_ARM64) &&
+        !IsWindowsVersionOrGreater(10, 0, 26100))
+    {
+        // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2)
+        g_pfnQueueUserAPC2Proc = NULL;
+    }
+    else
+#endif // HOST_AMD64
+    {
+        g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2");
+    }
+
+    if (PalAreShadowStacksEnabled())
+    {
+        // When shadow stacks are enabled, support for special user-mode APCs is required
+        _ASSERTE(g_pfnQueueUserAPC2Proc != NULL);
+
+        HMODULE hModNtdll = LoadNtdlldll();
+        typedef void* (*PFN_RtlGetReturnAddressHijackTarget)(void);
+
+        void* rtlGetReturnAddressHijackTarget = GetProcAddress(hModNtdll, "RtlGetReturnAddressHijackTarget");
+        if (rtlGetReturnAddressHijackTarget != NULL)
+        {
+            g_returnAddressHijackTarget = ((PFN_RtlGetReturnAddressHijackTarget)rtlGetReturnAddressHijackTarget)();
+        }
+
+        if (g_returnAddressHijackTarget == NULL)
+        {
+            _ASSERTE(!"RtlGetReturnAddressHijackTarget must provide a target when shadow stacks are enabled");
+        }
+    }
+}
+
+REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget)
+{
+    return g_returnAddressHijackTarget ? (HijackFunc*)g_returnAddressHijackTarget : defaultHijackTarget;
+}
+
 REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack)
 {
     _ASSERTE(hThread != INVALID_HANDLE_VALUE);
@@ -637,28 +708,10 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
 
     // initialize g_pfnQueueUserAPC2Proc on demand.
     // Note that only one thread at a time may perform suspension (guaranteed by the thread store lock)
-    // so simple conditional assignment is ok.
+    // so simple condition check is ok.
     if (g_pfnQueueUserAPC2Proc == QUEUE_USER_APC2_UNINITIALIZED)
     {
-        HMODULE hKernel32 = LoadKernel32dll();
-#ifdef HOST_AMD64
-        typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine);
-
-        IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2");
-        USHORT processMachine, hostMachine;
-        if (pfnIsWow64Process2Proc != nullptr &&
-            (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) &&
-            (hostMachine == IMAGE_FILE_MACHINE_ARM64) &&
-            !IsWindowsVersionOrGreater(10, 0, 26100))
-        {
-            // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2)
-            g_pfnQueueUserAPC2Proc = NULL;
-        }
-        else
-#endif // HOST_AMD64
-        {
-            g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2");
-        }
+        InitHijackingAPIs();
     }
 
     if (g_pfnQueueUserAPC2Proc)
diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp
index 1183d1e825b581..942a8df13cdd67 100644
--- a/src/coreclr/vm/excep.cpp
+++ b/src/coreclr/vm/excep.cpp
@@ -6765,11 +6765,14 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandler(PEXCEPTION_POINTERS pExceptionInfo
     {
         if (pThread == NULL || !pThread->PreemptiveGCDisabled())
         {
-            // We are not running managed code, so this cannot be our hijack
+            // if we are not in coop mode, this cannot be our hijack
             // Perhaps some other runtime is responsible.
             return VEH_CONTINUE_SEARCH;
         }
 
+        // Sanity check. The thread should be hijacked by us.
+        _ASSERTE_ALL_BUILDS(pThread->HasThreadState(Thread::TS_Hijacked));
+
         PCONTEXT interruptedContext = pExceptionInfo->ContextRecord;
         bool areShadowStacksEnabled = Thread::AreShadowStacksEnabled();
         if (areShadowStacksEnabled)