From b9645dfc1c34b808e714fd90f6dc6feeea676a7f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 18 Nov 2021 19:08:40 -0800 Subject: [PATCH 1/2] Enable QJFL and OSR by default for x64 and arm64 Change these default values when the jit targets x64 or arm64: * COMPlus_TC_QuickJitForLoops=1 * COMPlus_TC_OnStackReplacement=1 The upshot is that on x64/arm64 more methods will be jitted at Tier0, and we will rely on OSR to get out of long-running Tier0 methods. Other architectures continue to use the old behavior for now, as OSR is not yet supported for x86 or arm. --- src/coreclr/inc/clrconfigvalues.h | 4 ++++ src/coreclr/jit/jitconfigvalues.h | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index a14ba1ec98da4b..1d3e23272a1b5a 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -570,7 +570,11 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent, #ifdef FEATURE_TIERED_COMPILATION RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1, "Enables tiered compilation") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.") +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 1, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.") +#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64)) RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 0, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.") +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.") RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_BackgroundWorkerTimeoutMs, W("TC_BackgroundWorkerTimeoutMs"), 4000, "How long in milliseconds the background worker thread may remain idle before exiting.") RETAIL_CONFIG_DWORD_INFO(INTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), 30, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.") diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 73b71a07a8c492..490aa2cf185b8e 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -515,8 +515,12 @@ CONFIG_STRING(JitGuardedDevirtualizationRange, W("JitGuardedDevirtualizationRang CONFIG_INTEGER(JitRandomGuardedDevirtualization, W("JitRandomGuardedDevirtualization"), 0) #endif // DEBUG -// Enable insertion of patchpoints into Tier0 methods with loops. +// Enable insertion of patchpoints into Tier0 methods, switching to optimized where needed. +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) +CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 1) +#else CONFIG_INTEGER(TC_OnStackReplacement, W("TC_OnStackReplacement"), 0) +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) // Initial patchpoint counter value used by jitted code CONFIG_INTEGER(TC_OnStackReplacement_InitialCounter, W("TC_OnStackReplacement_InitialCounter"), 1000) // Enable partial compilation for Tier0 methods From 203d36b31d99a5c5853cba5ee93ea85a9fcbabce Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 19 Jan 2022 11:03:37 -0800 Subject: [PATCH 2/2] force extra warmup --- eng/testing/performance/performance-setup.ps1 | 2 +- eng/testing/performance/performance-setup.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/eng/testing/performance/performance-setup.ps1 b/eng/testing/performance/performance-setup.ps1 index d85325ee1349d5..101c91c709fee8 100644 --- a/eng/testing/performance/performance-setup.ps1 +++ b/eng/testing/performance/performance-setup.ps1 @@ -51,7 +51,7 @@ if ($Internal) { Default { $Queue = "Windows.10.Amd64.19H1.Tiger.Perf" } } $PerfLabArguments = "--upload-to-perflab-container" - $ExtraBenchmarkDotNetArguments = "" + $ExtraBenchmarkDotNetArguments = "--warmupCount 30" $Creator = "" $HelixSourcePrefix = "official" } diff --git a/eng/testing/performance/performance-setup.sh b/eng/testing/performance/performance-setup.sh index b58a031bac3cda..d07033028fc560 100755 --- a/eng/testing/performance/performance-setup.sh +++ b/eng/testing/performance/performance-setup.sh @@ -201,7 +201,7 @@ if [[ "$internal" == true ]]; then perflab_arguments="--upload-to-perflab-container" helix_source_prefix="official" creator= - extra_benchmark_dotnet_arguments= + extra_benchmark_dotnet_arguments="--warmupCount 30" if [[ "$architecture" == "arm64" ]]; then queue=Ubuntu.1804.Arm64.Perf