Skip to content

Commit 1c4c009

Browse files
authored
Randomized allocation sampling (#104955)
* Add Randomized Allocation Sampling This feature allows profilers to do allocation profiling based off randomized samples. It has better theoretical and empirically observed accuracy than our current allocation profiling approaches while also maintaining low performance overhead. It is designed for use in production profiling scenarios. For more information about usage and implementation, see the included doc docs/design/features/RandomizedAllocationSampling.md Much of this code was originally written in #100356 by @chrisnas, but for logistical reasons we superceded it with this new PR.
1 parent 8d21e13 commit 1c4c009

35 files changed

+2358
-23
lines changed

docs/design/features/RandomizedAllocationSampling.md

Lines changed: 317 additions & 0 deletions
Large diffs are not rendered by default.

src/coreclr/inc/eventtracebase.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1331,17 +1331,19 @@ namespace ETW
13311331
#define ETWLoaderStaticLoad 0 // Static reference load
13321332
#define ETWLoaderDynamicLoad 1 // Dynamic assembly load
13331333

1334+
#if defined (FEATURE_EVENT_TRACE)
1335+
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_DOTNET_Context;
1336+
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Context;
1337+
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_DOTNET_Context;
1338+
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_DOTNET_Context;
1339+
#endif // FEATURE_EVENT_TRACE
1340+
13341341
#if defined(FEATURE_EVENT_TRACE) && !defined(HOST_UNIX)
13351342
//
13361343
// The ONE and only ONE global instantiation of this class
13371344
//
13381345
extern ETW::CEtwTracer * g_pEtwTracer;
13391346

1340-
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_PROVIDER_DOTNET_Context;
1341-
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Context;
1342-
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_RUNDOWN_PROVIDER_DOTNET_Context;
1343-
EXTERN_C DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_STRESS_PROVIDER_DOTNET_Context;
1344-
13451347
//
13461348
// Special Handling of Startup events
13471349
//

src/coreclr/nativeaot/Runtime/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ set(COMMON_RUNTIME_SOURCES
4949
${GC_DIR}/handletablescan.cpp
5050
${GC_DIR}/objecthandle.cpp
5151
${GC_DIR}/softwarewritewatch.cpp
52+
${CLR_SRC_NATIVE_DIR}/minipal/xoshiro128pp.c
5253
)
5354

5455
set(SERVER_GC_SOURCES

src/coreclr/nativeaot/Runtime/GCHelpers.cpp

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,12 @@
2929

3030
#include "gcdesc.h"
3131

32+
#ifdef FEATURE_EVENT_TRACE
33+
#include "clretwallmain.h"
34+
#else // FEATURE_EVENT_TRACE
35+
#include "etmdummy.h"
36+
#endif // FEATURE_EVENT_TRACE
37+
3238
#define RH_LARGE_OBJECT_SIZE 85000
3339

3440
MethodTable g_FreeObjectEEType;
@@ -471,6 +477,24 @@ EXTERN_C int64_t QCALLTYPE RhGetTotalAllocatedBytesPrecise()
471477
return allocated;
472478
}
473479

480+
void FireAllocationSampled(GC_ALLOC_FLAGS flags, size_t size, size_t samplingBudgetOffset, Object* orObject)
481+
{
482+
#ifdef FEATURE_EVENT_TRACE
483+
void* typeId = GetLastAllocEEType();
484+
// Note: Just as for AllocationTick, the type name cannot be retrieved
485+
WCHAR* name = nullptr;
486+
487+
if (typeId != nullptr)
488+
{
489+
unsigned int allocKind =
490+
(flags & GC_ALLOC_PINNED_OBJECT_HEAP) ? 2 :
491+
(flags & GC_ALLOC_LARGE_OBJECT_HEAP) ? 1 :
492+
0; // SOH
493+
FireEtwAllocationSampled(allocKind, GetClrInstanceId(), typeId, name, (BYTE*)orObject, size, samplingBudgetOffset);
494+
}
495+
#endif
496+
}
497+
474498
static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t numElements, Thread* pThread)
475499
{
476500
ASSERT(!pThread->IsDoNotTriggerGcSet());
@@ -539,8 +563,47 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t
539563
// Save the MethodTable for instrumentation purposes.
540564
tls_pLastAllocationEEType = pEEType;
541565

542-
Object* pObject = GCHeapUtilities::GetGCHeap()->Alloc(pThread->GetAllocContext(), cbSize, uFlags);
543-
pThread->GetEEAllocContext()->UpdateCombinedLimit();
566+
// check for dynamic allocation sampling
567+
ee_alloc_context* pEEAllocContext = pThread->GetEEAllocContext();
568+
gc_alloc_context* pAllocContext = pEEAllocContext->GetGCAllocContext();
569+
bool isSampled = false;
570+
size_t availableSpace = 0;
571+
size_t samplingBudget = 0;
572+
573+
bool isRandomizedSamplingEnabled = ee_alloc_context::IsRandomizedSamplingEnabled();
574+
if (isRandomizedSamplingEnabled)
575+
{
576+
// The number bytes we can allocate before we need to emit a sampling event.
577+
// This calculation is only valid if combined_limit < alloc_limit.
578+
samplingBudget = (size_t)(pEEAllocContext->combined_limit - pAllocContext->alloc_ptr);
579+
580+
// The number of bytes available in the current allocation context
581+
availableSpace = (size_t)(pAllocContext->alloc_limit - pAllocContext->alloc_ptr);
582+
583+
// Check to see if the allocated object overlaps a sampled byte
584+
// in this AC. This happens when both:
585+
// 1) The AC contains a sampled byte (combined_limit < alloc_limit)
586+
// 2) The object is large enough to overlap it (samplingBudget < aligned_size)
587+
//
588+
// Note that the AC could have no remaining space for allocations (alloc_ptr =
589+
// alloc_limit = combined_limit). When a thread hasn't done any SOH allocations
590+
// yet it also starts in an empty state where alloc_ptr = alloc_limit =
591+
// combined_limit = nullptr. The (1) check handles both of these situations
592+
// properly as an empty AC can not have a sampled byte inside of it.
593+
isSampled =
594+
(pEEAllocContext->combined_limit < pAllocContext->alloc_limit) &&
595+
(samplingBudget < cbSize);
596+
597+
// if the object overflows the AC, we need to sample the remaining bytes
598+
// the sampling budget only included at most the bytes inside the AC
599+
if (cbSize > availableSpace && !isSampled)
600+
{
601+
samplingBudget = ee_alloc_context::ComputeGeometricRandom() + availableSpace;
602+
isSampled = (samplingBudget < cbSize);
603+
}
604+
}
605+
606+
Object* pObject = GCHeapUtilities::GetGCHeap()->Alloc(pAllocContext, cbSize, uFlags);
544607
if (pObject == NULL)
545608
return NULL;
546609

@@ -551,6 +614,19 @@ static Object* GcAllocInternal(MethodTable* pEEType, uint32_t uFlags, uintptr_t
551614
((Array*)pObject)->InitArrayLength((uint32_t)numElements);
552615
}
553616

617+
if (isSampled)
618+
{
619+
FireAllocationSampled((GC_ALLOC_FLAGS)uFlags, cbSize, samplingBudget, pObject);
620+
}
621+
622+
// There are a variety of conditions that may have invalidated the previous combined_limit value
623+
// such as not allocating the object in the AC memory region (UOH allocations), moving the AC, adding
624+
// extra alignment padding, allocating a new AC, or allocating an object that consumed the sampling budget.
625+
// Rather than test for all the different invalidation conditions individually we conservatively always
626+
// recompute it. If sampling isn't enabled this inlined function is just trivially setting
627+
// combined_limit=alloc_limit.
628+
pEEAllocContext->UpdateCombinedLimit(isRandomizedSamplingEnabled);
629+
554630
if (uFlags & GC_ALLOC_USER_OLD_HEAP)
555631
GCHeapUtilities::GetGCHeap()->PublishObject((uint8_t*)pObject);
556632

src/coreclr/nativeaot/Runtime/disabledeventtrace.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,11 @@
1212

1313
void EventTracing_Initialize() { }
1414

15+
bool IsRuntimeProviderEnabled(uint8_t level, uint64_t keyword)
16+
{
17+
return false;
18+
}
19+
1520
void ETW::GCLog::FireGcStart(ETW_GC_INFO * pGcInfo) { }
1621

1722
#ifdef FEATURE_ETW

src/coreclr/nativeaot/Runtime/eventpipe/gen-eventing-event-inc.lst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
# Native runtime events supported by aot runtime.
22

3+
AllocationSampled
34
BGC1stConEnd
45
BGC1stNonConEnd
56
BGC1stSweepEnd

src/coreclr/nativeaot/Runtime/eventtrace.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,11 @@ DOTNET_TRACE_CONTEXT MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_DOTNET_Con
3737
MICROSOFT_WINDOWS_DOTNETRUNTIME_PRIVATE_PROVIDER_EVENTPIPE_Context
3838
};
3939

40+
bool IsRuntimeProviderEnabled(uint8_t level, uint64_t keyword)
41+
{
42+
return RUNTIME_PROVIDER_CATEGORY_ENABLED(level, keyword);
43+
}
44+
4045
volatile LONGLONG ETW::GCLog::s_l64LastClientSequenceNumber = 0;
4146

4247
//---------------------------------------------------------------------------------------
@@ -300,4 +305,4 @@ void EventPipeEtwCallbackDotNETRuntimePrivate(
300305
_Inout_opt_ PVOID CallbackContext)
301306
{
302307
EtwCallbackCommon(DotNETRuntimePrivate, ControlCode, Level, MatchAnyKeyword, FilterData, true);
303-
}
308+
}

src/coreclr/nativeaot/Runtime/eventtracebase.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ void InitializeEventTracing();
3030

3131
#ifdef FEATURE_EVENT_TRACE
3232

33+
bool IsRuntimeProviderEnabled(uint8_t level, uint64_t keyword);
34+
3335
// !!!!!!! NOTE !!!!!!!!
3436
// The flags must match those in the ETW manifest exactly
3537
// !!!!!!! NOTE !!!!!!!!
@@ -102,6 +104,7 @@ struct ProfilingScanContext;
102104
#define CLR_GCHEAPSURVIVALANDMOVEMENT_KEYWORD 0x400000
103105
#define CLR_MANAGEDHEAPCOLLECT_KEYWORD 0x800000
104106
#define CLR_GCHEAPANDTYPENAMES_KEYWORD 0x1000000
107+
#define CLR_ALLOCATIONSAMPLING_KEYWORD 0x80000000000
105108

106109
//
107110
// Using KEYWORDZERO means when checking the events category ignore the keyword

src/coreclr/nativeaot/Runtime/gctoclreventsink.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "common.h"
55
#include "gctoclreventsink.h"
66
#include "thread.h"
7+
#include "eventtracebase.h"
78

89
GCToCLREventSink g_gcToClrEventSink;
910

@@ -174,6 +175,14 @@ void GCToCLREventSink::FireGCAllocationTick_V4(uint64_t allocationAmount,
174175
{
175176
LIMITED_METHOD_CONTRACT;
176177

178+
#ifdef FEATURE_EVENT_TRACE
179+
if (IsRuntimeProviderEnabled(TRACE_LEVEL_INFORMATION, CLR_ALLOCATIONSAMPLING_KEYWORD))
180+
{
181+
// skip AllocationTick if AllocationSampled is emitted
182+
return;
183+
}
184+
#endif // FEATURE_EVENT_TRACE
185+
177186
void * typeId = GetLastAllocEEType();
178187
WCHAR * name = nullptr;
179188

src/coreclr/nativeaot/Runtime/thread.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,13 @@ static Thread* g_RuntimeInitializingThread;
3535

3636
#endif //!DACCESS_COMPILE
3737

38+
ee_alloc_context::PerThreadRandom::PerThreadRandom()
39+
{
40+
minipal_xoshiro128pp_init(&random_state, (uint32_t)PalGetTickCount64());
41+
}
42+
43+
thread_local ee_alloc_context::PerThreadRandom ee_alloc_context::t_random = PerThreadRandom();
44+
3845
PInvokeTransitionFrame* Thread::GetTransitionFrame()
3946
{
4047
if (ThreadStore::GetSuspendingThread() == this)

0 commit comments

Comments
 (0)