Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid long busy-waiting between hijack retries. #103212

Merged
merged 3 commits into from
Jun 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/coreclr/minipal/Unix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(SOURCES
doublemapping.cpp
dn-u16.cpp
${CLR_SRC_NATIVE_DIR}/minipal/time.c
)

if(NOT CLR_CROSS_COMPONENTS_BUILD)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/minipal/Windows/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ set(SOURCES
doublemapping.cpp
dn-u16.cpp
${CLR_SRC_NATIVE_DIR}/minipal/utf8.c
${CLR_SRC_NATIVE_DIR}/minipal/time.c
)

if(NOT CLR_CROSS_COMPONENTS_BUILD)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/nativeaot/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ set(COMMON_RUNTIME_SOURCES
${GC_DIR}/softwarewritewatch.cpp

${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
${CLR_SRC_NATIVE_DIR}/minipal/time.c
)

set(SERVER_GC_SOURCES
Expand Down
65 changes: 26 additions & 39 deletions src/coreclr/nativeaot/Runtime/threadstore.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "RuntimeInstance.h"
#include "TargetPtrs.h"
#include "yieldprocessornormalized.h"
#include <minipal/time.h>

#include "slist.inl"

Expand Down Expand Up @@ -224,30 +225,6 @@ void ThreadStore::UnlockThreadStore()
m_Lock.Leave();
}

// exponential spinwait with an approximate time limit for waiting in microsecond range.
// when iteration == -1, only usecLimit is used
void SpinWait(int iteration, int usecLimit)
{
int64_t startTicks = PalQueryPerformanceCounter();
int64_t ticksPerSecond = PalQueryPerformanceFrequency();
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;

int l = iteration >= 0 ? min(iteration, 30): 30;
for (int i = 0; i < l; i++)
{
for (int j = 0; j < (1 << i); j++)
{
System_YieldProcessor();
}

int64_t currentTicks = PalQueryPerformanceCounter();
if (currentTicks > endTicks)
{
break;
}
}
}

void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
{
Thread * pThisThread = GetCurrentThreadIfAvailable();
Expand All @@ -265,16 +242,14 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
// reason for this is that we essentially implement Dekker's algorithm, which requires write ordering.
PalFlushProcessWriteBuffers();

int retries = 0;
int prevRemaining = 0;
int remaining = 0;
bool observeOnly = false;
int prevRemaining = INT32_MAX;
bool observeOnly = true;
uint32_t rehijackDelay = 8;
uint32_t usecsSinceYield = 0;

while(true)
{
prevRemaining = remaining;
remaining = 0;

int remaining = 0;
FOREACH_THREAD(pTargetThread)
{
if (pTargetThread == pThisThread)
Expand All @@ -293,30 +268,42 @@ void ThreadStore::SuspendAllThreads(bool waitForGCEvent)
}
END_FOREACH_THREAD

if (!remaining)
if (remaining == 0)
break;

// if we see progress or have just done a hijacking pass
// do not hijack in the next iteration
if (remaining < prevRemaining || !observeOnly)
{
// 5 usec delay, then check for more progress
SpinWait(-1, 5);
minipal_microdelay(5, &usecsSinceYield);
observeOnly = true;
}
else
{
SpinWait(retries++, 100);
minipal_microdelay(rehijackDelay, &usecsSinceYield);
observeOnly = false;

// make sure our spining is not starving other threads, but not too often,
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
// very rarely needed, since threads are supposed to be releasing their CPUs
if ((retries & 127) == 0)
// double up rehijack delay in case we are rehjacking too often
// up to 100 usec, as that should be enough to make progress.
if (rehijackDelay < 100)
{
PalSwitchToThread();
rehijackDelay *= 2;
}
}

prevRemaining = remaining;

// If we see 1 msec of uninterrupted wait, it is a concern.
// Since we are stopping threads, there should be free cores to run on. Perhaps
// some thread that we need to stop needs to run on the same core as ours.
// Let's yield the timeslice to make sure such threads can run.
// We will not do this often though, since this can introduce arbitrary delays.
if (usecsSinceYield > 1000)
{
PalSwitchToThread();
usecsSinceYield = 0;
}
}

#if defined(TARGET_ARM) || defined(TARGET_ARM64)
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1049,7 +1049,6 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p
// stack overflow too. Those are held in the sigsegv_handler with blocked signals until
// the process exits.
// ESRCH may happen on some OSes when the thread is exiting.
// The thread should leave cooperative mode, but we could have seen it in its earlier state.
if ((status == EAGAIN)
|| (status == ESRCH)
#ifdef __APPLE__
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/pal/src/exception/signal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -893,7 +893,13 @@ PAL_ERROR InjectActivationInternal(CorUnix::CPalThread* pThread)
}
#endif

if ((status != 0) && (status != EAGAIN))
// ESRCH may happen on some OSes when the thread is exiting.
if (status == EAGAIN || status == ESRCH)
{
return ERROR_CANCELLED;
}

if (status != 0)
{
// Failure to send the signal is fatal. There are only two cases when sending
// the signal can fail. First, if the signal ID is invalid and second,
Expand Down
89 changes: 35 additions & 54 deletions src/coreclr/vm/threadsuspend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

#include "finalizerthread.h"
#include "dbginterface.h"
#include <minipal/time.h>

#define HIJACK_NONINTERRUPTIBLE_THREADS

Expand Down Expand Up @@ -2188,6 +2189,8 @@ void Thread::RareDisablePreemptiveGC()
#if defined(FEATURE_HIJACK) && !defined(TARGET_UNIX)
ResetThreadState(Thread::TS_GCSuspendRedirected);
#endif
// make sure this is cleared - in case a signal is lost or somehow we did not act on it
m_hasPendingActivation = false;

DWORD status = GCHeapUtilities::GetGCHeap()->WaitUntilGCComplete();
if (status != S_OK)
Expand Down Expand Up @@ -3207,44 +3210,6 @@ COR_PRF_SUSPEND_REASON GCSuspendReasonToProfSuspendReason(ThreadSuspend::SUSPEND
}
#endif // PROFILING_SUPPORTED

static int64_t QueryPerformanceCounter()
{
LARGE_INTEGER ts;
QueryPerformanceCounter(&ts);
return ts.QuadPart;
}

static int64_t QueryPerformanceFrequency()
{
LARGE_INTEGER ts;
QueryPerformanceFrequency(&ts);
return ts.QuadPart;
}

// exponential spinwait with an approximate time limit for waiting in microsecond range.
// when iteration == -1, only usecLimit is used
void SpinWait(int iteration, int usecLimit)
{
int64_t startTicks = QueryPerformanceCounter();
int64_t ticksPerSecond = QueryPerformanceFrequency();
int64_t endTicks = startTicks + (usecLimit * ticksPerSecond) / 1000000;

int l = iteration >= 0 ? min(iteration, 30): 30;
for (int i = 0; i < l; i++)
{
for (int j = 0; j < (1 << i); j++)
{
System_YieldProcessor();
}

int64_t currentTicks = QueryPerformanceCounter();
if (currentTicks > endTicks)
{
break;
}
}
}

//************************************************************************************
//
// SuspendRuntime is responsible for ensuring that all managed threads reach a
Expand Down Expand Up @@ -3335,16 +3300,14 @@ void ThreadSuspend::SuspendAllThreads()
// See VSW 475315 and 488918 for details.
::FlushProcessWriteBuffers();

int retries = 0;
int prevRemaining = 0;
int remaining = 0;
bool observeOnly = false;
int prevRemaining = INT32_MAX;
bool observeOnly = true;
uint32_t rehijackDelay = 8;
uint32_t usecsSinceYield = 0;

while(true)
{
prevRemaining = remaining;
remaining = 0;

int remaining = 0;
Thread* pTargetThread = NULL;
while ((pTargetThread = ThreadStore::GetThreadList(pTargetThread)) != NULL)
{
Expand All @@ -3361,30 +3324,42 @@ void ThreadSuspend::SuspendAllThreads()
}
}

if (!remaining)
if (remaining == 0)
break;

// if we see progress or have just done a hijacking pass
// do not hijack in the next iteration
if (remaining < prevRemaining || !observeOnly)
{
// 5 usec delay, then check for more progress
SpinWait(-1, 5);
minipal_microdelay(5, &usecsSinceYield);
observeOnly = true;
}
else
{
SpinWait(retries++, 100);
minipal_microdelay(rehijackDelay, &usecsSinceYield);
observeOnly = false;

// make sure our spining is not starving other threads, but not too often,
// this can cause a 1-15 msec delay, depending on OS, and that is a lot while
// very rarely needed, since threads are supposed to be releasing their CPUs
if ((retries & 127) == 0)
// double up rehijack delay in case we are rehjacking too often
// up to 100 usec, as that should be enough to make progress.
if (rehijackDelay < 100)
{
SwitchToThread();
rehijackDelay *= 2;
}
}

prevRemaining = remaining;

// If we see 1 msec of uninterrupted wait, it is a concern.
// Since we are stopping threads, there should be free cores to run on. Perhaps
// some thread that we need to stop needs to run on the same core as ours.
// Let's yield the timeslice to make sure such threads can run.
// We will not do this often though, since this can introduce arbitrary delays.
if (usecsSinceYield > 1000)
{
SwitchToThread();
usecsSinceYield = 0;
}
}

#if defined(TARGET_ARM) || defined(TARGET_ARM64)
Expand Down Expand Up @@ -5930,7 +5905,13 @@ bool Thread::InjectActivation(ActivationReason reason)
if (hThread != INVALID_HANDLE_VALUE)
{
m_hasPendingActivation = true;
return ::PAL_InjectActivation(hThread);
BOOL success = ::PAL_InjectActivation(hThread);
if (!success)
{
m_hasPendingActivation = false;
}

return success;
}
}

Expand Down
5 changes: 5 additions & 0 deletions src/native/minipal/configure.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,9 @@ check_function_exists(sysctlbyname HAVE_SYSCTLBYNAME)
check_symbol_exists(arc4random_buf "stdlib.h" HAVE_ARC4RANDOM_BUF)
check_symbol_exists(O_CLOEXEC fcntl.h HAVE_O_CLOEXEC)

check_symbol_exists(
clock_gettime_nsec_np
time.h
HAVE_CLOCK_GETTIME_NSEC_NP)

configure_file(${CMAKE_CURRENT_LIST_DIR}/minipalconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/minipalconfig.h)
1 change: 1 addition & 0 deletions src/native/minipal/minipalconfig.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@
#cmakedefine01 HAVE_AUXV_HWCAP_H
#cmakedefine01 HAVE_O_CLOEXEC
#cmakedefine01 HAVE_SYSCTLBYNAME
#cmakedefine01 HAVE_CLOCK_GETTIME_NSEC_NP

#endif
Loading
Loading