Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix an issue with the last level cache values on Linux running on certain AMD Processors #108492

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
3 changes: 2 additions & 1 deletion src/coreclr/gc/gcconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ class GCConfigStringHolder
STRING_CONFIG(GCPath, "GCPath", "System.GC.Path", "Specifies the path of the standalone GC implementation.") \
INT_CONFIG (GCSpinCountUnit, "GCSpinCountUnit", NULL, 0, "Specifies the spin count unit used by the GC.") \
INT_CONFIG (GCDynamicAdaptationMode, "GCDynamicAdaptationMode", "System.GC.DynamicAdaptationMode", 1, "Enable the GC to dynamically adapt to application sizes.") \
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS")
INT_CONFIG (GCDTargetTCP, "GCDTargetTCP", "System.GC.DTargetTCP", 0, "Specifies the target tcp for DATAS") \
BOOL_CONFIG (GCCacheSizeFromSysConf, "GCCacheSizeFromSysConf", NULL, false, "Specifies using sysconf to retrieve the last level cache size for Unix.")

// This class is responsible for retreiving configuration information
// for how the GC should operate.
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/gc/unix/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
set(CMAKE_INCLUDE_CURRENT_DIR ON)
include_directories("../env")
include_directories("..")

include(configure.cmake)

Expand Down
147 changes: 91 additions & 56 deletions src/coreclr/gc/unix/gcenv.unix.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
#include "gcenv.structs.h"
#include "gcenv.base.h"
#include "gcenv.os.h"
#include "gcenv.ee.h"
#include "gcenv.unix.inl"
#include "volatile.h"
#include "gcconfig.h"
#include "numasupport.h"

#if HAVE_SWAPCTL
Expand Down Expand Up @@ -862,10 +864,10 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val)
return result;
}

static size_t GetLogicalProcessorCacheSizeFromOS()
static void GetLogicalProcessorCacheSizeFromSysConf(size_t* cacheLevel, size_t* cacheSize)
{
size_t cacheLevel = 0;
size_t cacheSize = 0;
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if defined(_SC_LEVEL1_DCACHE_SIZE) || defined(_SC_LEVEL2_CACHE_SIZE) || defined(_SC_LEVEL3_CACHE_SIZE) || defined(_SC_LEVEL4_CACHE_SIZE)
const int cacheLevelNames[] =
Expand All @@ -881,47 +883,105 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
long size = sysconf(cacheLevelNames[i]);
if (size > 0)
{
cacheSize = (size_t)size;
cacheLevel = i + 1;
*cacheSize = (size_t)size;
*cacheLevel = i + 1;
break;
}
}
#endif
}

static void GetLogicalProcessorCacheSizeFromSysFs(size_t* cacheLevel, size_t* cacheSize)
{
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86)
if (cacheSize == 0)
//
// Retrieve cachesize via sysfs by reading the file /sys/devices/system/cpu/cpu0/cache/index{LastLevelCache}/size
// for the platform. Currently musl and arm64 should be only cases to use
mrsharm marked this conversation as resolved.
Show resolved Hide resolved
// this method to determine cache size.
//
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
assert(path_to_size_file[index] == '-');
assert(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
//
// Fallback to retrieve cachesize via /sys/.. if sysconf was not available
// for the platform. Currently musl and arm64 should be only cases to use
// this method to determine cache size.
//
size_t level;
char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size";
char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level";
int index = 40;
assert(path_to_size_file[index] == '-');
assert(path_to_level_file[index] == '-');

for (int i = 0; i < 5; i++)
{
path_to_size_file[index] = (char)(48 + i);
path_to_size_file[index] = (char)(48 + i);

uint64_t cache_size_from_sys_file = 0;
uint64_t cache_size_from_sys_file = 0;

if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
{
cacheSize = std::max(cacheSize, (size_t)cache_size_from_sys_file);
if (ReadMemoryValueFromFile(path_to_size_file, &cache_size_from_sys_file))
{
*cacheSize = std::max(*cacheSize, (size_t)cache_size_from_sys_file);

path_to_level_file[index] = (char)(48 + i);
if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
cacheLevel = level;
}
path_to_level_file[index] = (char)(48 + i);
if (ReadMemoryValueFromFile(path_to_level_file, &level))
{
*cacheLevel = level;
}
}
}
#endif
}

static void GetLogicalProcessorCacheSizeFromHeuristic(size_t* cacheLevel, size_t* cacheSize)
{
assert (cacheLevel != nullptr);
assert (cacheSize != nullptr);

#if (defined(TARGET_LINUX) && !defined(TARGET_APPLE))
{
// Use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_processAffinitySet.Count();
if (logicalCPUs < 5)
{
*cacheSize = 4;
}
else if (logicalCPUs < 17)
{
*cacheSize = 8;
}
else if (logicalCPUs < 65)
{
*cacheSize = 16;
}
else
{
*cacheSize = 32;
}

*cacheSize *= (1024 * 1024);
}
#endif
}

static size_t GetLogicalProcessorCacheSizeFromOS()
{
size_t cacheLevel = 0;
size_t cacheSize = 0;

if (GCConfig::GetGCCacheSizeFromSysConf())
{
GetLogicalProcessorCacheSizeFromSysConf(&cacheLevel, &cacheSize);
}

if (cacheSize == 0)
{
GetLogicalProcessorCacheSizeFromSysFs(&cacheLevel, &cacheSize);
if (cacheSize == 0)
{
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
}
}

#if HAVE_SYSCTLBYNAME
if (cacheSize == 0)
Expand All @@ -948,32 +1008,7 @@ static size_t GetLogicalProcessorCacheSizeFromOS()
#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_APPLE)
if (cacheLevel != 3)
{
// We expect to get the L3 cache size for Arm64 but currently expected to be missing that info
// from most of the machines.
// Hence, just use the following heuristics at best depending on the CPU count
// 1 ~ 4 : 4 MB
// 5 ~ 16 : 8 MB
// 17 ~ 64 : 16 MB
// 65+ : 32 MB
DWORD logicalCPUs = g_processAffinitySet.Count();
if (logicalCPUs < 5)
{
cacheSize = 4;
}
else if (logicalCPUs < 17)
{
cacheSize = 8;
}
else if (logicalCPUs < 65)
{
cacheSize = 16;
}
else
{
cacheSize = 32;
}

cacheSize *= (1024 * 1024);
GetLogicalProcessorCacheSizeFromHeuristic(&cacheLevel, &cacheSize);
}
#endif

Expand Down