Skip to content

Commit

Permalink
Enable TLS on linux/arm64 only for static resolver (#106052)
Browse files Browse the repository at this point in the history
Fix arm64 behavior around assuming that a static resolver is possible for Arm64 TLS at all times.

Instead, detect whether the static resolver path was taken, and if it was, use the static path.

In addition, this change adds a new config variable DOTNET_DisableOptimizedThreadStaticAccess which can be used to mitigate this issue in case issues are found in other optimized thread static access paths.

Finally, the test for this relies on adding a new switch to the corerun utility, which can pre-load a set of .so files into the process.

---------

Co-authored-by: Kunal Pathak <[email protected]>
Co-authored-by: Ubuntu <azureuser@davidwr-arm64-l2.sql0fxglchme5jhu4spsf5nbmb.xx.internal.cloudapp.net>
Co-authored-by: Jan Kotas <[email protected]>
  • Loading branch information
4 people authored Aug 7, 2024
1 parent 194fec5 commit ade568b
Show file tree
Hide file tree
Showing 11 changed files with 301 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/coreclr/hosts/corerun/corerun.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -472,6 +472,7 @@ static void display_usage()
W(" -p, --property - Property to pass to runtime during initialization.\n")
W(" If a property value contains spaces, quote the entire argument.\n")
W(" May be supplied multiple times. Format: <key>=<value>.\n")
W(" -l, --preload - path to shared library to load before loading the CLR.\n")
W(" -d, --debug - causes corerun to wait for a debugger to attach before executing.\n")
W(" -e, --env - path to a .env file with environment variables that corerun should set.\n")
W(" -?, -h, --help - show this help.\n")
Expand Down Expand Up @@ -569,6 +570,22 @@ static bool parse_args(
config.user_defined_keys.push_back(std::move(key));
config.user_defined_values.push_back(std::move(value));
}
else if (pal::strcmp(option, W("l")) == 0 || (pal::strcmp(option, W("preload")) == 0))
{
i++;
if (i >= argc)
{
pal::fprintf(stderr, W("Option %s: missing shared library path\n"), arg);
break;
}

string_t library = argv[i];
pal::mod_t hMod;
if (!pal::try_load_library(library, hMod))
{
break;
}
}
else if (pal::strcmp(option, W("d")) == 0 || (pal::strcmp(option, W("debug")) == 0))
{
config.wait_to_debug = true;
Expand Down
23 changes: 23 additions & 0 deletions src/coreclr/hosts/corerun/corerun.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,17 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)::LoadLibraryExW(path.c_str(), nullptr, 0);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: 0x%08x\n"), path.c_str(), ::GetLastError());
return false;
}
return true;
}

inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down Expand Up @@ -600,6 +611,18 @@ namespace pal
return hMod != nullptr;
}

inline bool try_load_library(const pal::string_t& path, pal::mod_t& hMod)
{
hMod = (pal::mod_t)dlopen(path.c_str(), RTLD_NOW | RTLD_LOCAL);
if (hMod == nullptr)
{
pal::fprintf(stderr, W("Failed to load: '%s'. Error: %s\n"), path.c_str(), dlerror());
return false;
}
return true;
}


inline bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
pal::string_t coreclr_path = core_root;
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,8 @@ CONFIG_STRING_INFO(INTERNAL_TailCallMax, W("TailCallMax"), "")
RETAIL_CONFIG_STRING_INFO(EXTERNAL_TailCallOpt, W("TailCallOpt"), "")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TailCallLoopOpt, W("TailCallLoopOpt"), 1, "Convert recursive tail calls to loops")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_Jit_NetFx40PInvokeStackResilience, W("NetFx40_PInvokeStackResilience"), (DWORD)-1, "Makes P/Invoke resilient against mismatched signature and calling convention (significant perf penalty).")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_DisableOptimizedThreadStaticAccess, W("DisableOptimizedThreadStaticAccess"), (DWORD)0, "Disable the OptimizedThreadStaticAccess feature.")
CONFIG_DWORD_INFO(EXTERNAL_AssertNotStaticTlsResolver, W("AssertNotStaticTlsResolver"), (DWORD)0, "Assert if we attempt to use the static tls resolver path.")

// AltJitAssertOnNYI should be 0 on targets where JIT is under development or bring up stage, so as to facilitate fallback to main JIT on hitting a NYI.
#if defined(TARGET_X86)
Expand Down
14 changes: 14 additions & 0 deletions src/coreclr/vm/arm64/asmhelpers.S
Original file line number Diff line number Diff line change
Expand Up @@ -794,4 +794,18 @@ LEAF_ENTRY GetThreadStaticsVariableOffset, _TEXT
EPILOG_RETURN
LEAF_END GetThreadStaticsVariableOffset, _TEXT
// ------------------------------------------------------------------

// ------------------------------------------------------------------
// size_t GetTLSResolverAddress()

// Helper to get the TLS resolver address. This will be then used to determine if we have a static or dynamic resolver.
LEAF_ENTRY GetTLSResolverAddress, _TEXT
PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32
adrp x0, :tlsdesc:t_ThreadStatics
ldr x1, [x0, #:tlsdesc_lo12:t_ThreadStatics]
mov x0, x1
EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
EPILOG_RETURN
LEAF_END GetTLSResolverAddress, _TEXT
// ------------------------------------------------------------------
#endif // !TARGET_OSX
3 changes: 3 additions & 0 deletions src/coreclr/vm/eeconfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ HRESULT EEConfig::Init()
fJitFramed = false;
fJitMinOpts = false;
fJitEnableOptionalRelocs = false;
fDisableOptimizedThreadStaticAccess = false;
fPInvokeRestoreEsp = (DWORD)-1;

fStressLog = false;
Expand Down Expand Up @@ -503,6 +504,8 @@ HRESULT EEConfig::sync()
iJitOptimizeType = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_JitOptimizeType);
if (iJitOptimizeType > OPT_RANDOM) iJitOptimizeType = OPT_DEFAULT;

fDisableOptimizedThreadStaticAccess = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_DisableOptimizedThreadStaticAccess) != 0;

#ifdef TARGET_X86
fPInvokeRestoreEsp = CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_Jit_NetFx40PInvokeStackResilience);
#endif
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/vm/eeconfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ class EEConfig
bool JitFramed(void) const {LIMITED_METHOD_CONTRACT; return fJitFramed; }
bool JitMinOpts(void) const {LIMITED_METHOD_CONTRACT; return fJitMinOpts; }
bool JitEnableOptionalRelocs(void) const {LIMITED_METHOD_CONTRACT; return fJitEnableOptionalRelocs; }
bool DisableOptimizedThreadStaticAccess(void) const {LIMITED_METHOD_CONTRACT; return fDisableOptimizedThreadStaticAccess; }

// Tiered Compilation config
#if defined(FEATURE_TIERED_COMPILATION)
Expand Down Expand Up @@ -459,6 +460,7 @@ class EEConfig
bool fJitFramed; // Enable/Disable EBP based frames
bool fJitMinOpts; // Enable MinOpts for all jitted methods
bool fJitEnableOptionalRelocs; // Allow optional relocs
bool fDisableOptimizedThreadStaticAccess; // Disable OptimizedThreadStatic access

unsigned iJitOptimizeType; // 0=Blended,1=SmallCode,2=FastCode, default is 0=Blended

Expand Down
40 changes: 40 additions & 0 deletions src/coreclr/vm/threadstatics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -784,6 +784,10 @@ void FreeTLSIndicesForLoaderAllocator(LoaderAllocator *pLoaderAllocator)

static void* GetTlsIndexObjectAddress();

#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
extern "C" size_t GetTLSResolverAddress();
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_ARM64

bool CanJITOptimizeTLSAccess()
{
LIMITED_METHOD_CONTRACT;
Expand All @@ -799,6 +803,36 @@ bool CanJITOptimizeTLSAccess()
// Optimization is disabled for FreeBSD/arm64
#elif defined(FEATURE_INTERPRETER)
// Optimization is disabled when interpreter may be used
#elif !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_ARM64)
// Optimization is enabled for linux/arm64 only for static resolver.
// For static resolver, the TP offset is same for all threads.
// For dynamic resolver, TP offset returned is for the current thread and
// will be different for the other threads.
uint32_t* resolverAddress = reinterpret_cast<uint32_t*>(GetTLSResolverAddress());
int ip = 0;
if ((resolverAddress[ip] == 0xd503201f) || (resolverAddress[ip] == 0xd503241f))
{
// nop might not be present in older resolver, so skip it.

// nop or hint 32
ip++;
}

if (
// ldr x0, [x0, #8]
(resolverAddress[ip] == 0xf9400400) &&
// ret
(resolverAddress[ip + 1] == 0xd65f03c0)
)
{
optimizeThreadStaticAccess = true;
#ifdef _DEBUG
if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_AssertNotStaticTlsResolver) != 0)
{
_ASSERTE(!"Detected static resolver in use when not expected");
}
#endif
}
#else
optimizeThreadStaticAccess = true;
#if !defined(TARGET_OSX) && defined(TARGET_UNIX) && defined(TARGET_AMD64)
Expand All @@ -808,6 +842,12 @@ bool CanJITOptimizeTLSAccess()
optimizeThreadStaticAccess = GetTlsIndexObjectAddress() != nullptr;
#endif // !TARGET_OSX && TARGET_UNIX && TARGET_AMD64
#endif

if (g_pConfig->DisableOptimizedThreadStaticAccess())
{
optimizeThreadStaticAccess = false;
}

return optimizeThreadStaticAccess;
}

Expand Down
8 changes: 8 additions & 0 deletions src/tests/JIT/Directed/tls/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Licensed to the .NET Foundation under one or more agreements.
# The .NET Foundation licenses this file to you under the MIT license.

include_directories(${INC_PLATFORM_DIR})

add_library(usetls SHARED testtls.cpp)

install (TARGETS usetls DESTINATION bin)
130 changes: 130 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.


// This test is verifying that the runtime properly handles the cases where the TLS infra in the runtime is forced
// to use a dynamic resolver. This is done by means of a private config variable to validate the behavior on Linux Arm64
// and a set of multithreaded tasks, that has been known to cause the runtime to crash when this is handled incorrectly.

using System;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Runtime.Loader;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

namespace TestTLSWithLoadedDlls
{
static class TLSWithLoadedDlls
{
private const int CountOfLibTlsToLoad = 40;

static async Task DoLotsOfAsyncWork(int loopCount)
{
for (int i = 0; i < loopCount; i++)
{
Console.WriteLine("Starting a new batch of tasks...");
var tasks = Enumerable.Range(1, 100).Select(i => Task.Run(async () =>
{
await Task.Delay(1);
})).ToArray();

await Task.WhenAll(tasks);

Console.WriteLine("Batch of tasks completed. Main loop sleeping for 20 ms...");
await Task.Delay(20);
}
}

static int Main(string[] args)
{
if ((args.Length == 1) && (args[0] == "RunLotsOfTasks"))
{
DoLotsOfAsyncWork(100).GetAwaiter().GetResult();
return 100;
}

int CountOfLibTlsToLoad = 60;

if (OperatingSystem.IsWindows()) // Windows does not have a really long command line length limit, and doesn't have a problem with many TLS using images used
CountOfLibTlsToLoad = 10;

StringBuilder arguments = new();

(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();

string UseTlsFileName = GetSharedLibraryFileNameForCurrentPlatform("usetls");
string testDirectory = Path.GetDirectoryName(System.Reflection.Assembly.GetExecutingAssembly().Location);
string UseTlsFilePath = Path.Combine(testDirectory, UseTlsFileName);

for (int i = 0; i < CountOfLibTlsToLoad; i++)
{
string tlsNumberSpecificPath = Path.Combine(testDirectory, i.ToString());
string finalUseTlsPath = Path.Combine(tlsNumberSpecificPath, prefix + "usetls" + suffix);

Directory.CreateDirectory(tlsNumberSpecificPath);
if (!File.Exists(finalUseTlsPath))
{
File.Copy(
UseTlsFilePath,
finalUseTlsPath);
}

arguments.Append(" -l ");
arguments.Append(finalUseTlsPath);
}

arguments.Append(' ');
arguments.Append(System.Reflection.Assembly.GetExecutingAssembly().Location);
arguments.Append(" RunLotsOfTasks");

Process process = new Process();
process.StartInfo.FileName = GetCorerunPath();
process.StartInfo.Arguments = arguments.ToString();
process.StartInfo.UseShellExecute = false;
process.StartInfo.EnvironmentVariables["DOTNET_AssertNotStaticTlsResolver"] = "1";

Console.WriteLine($"Launching {process.StartInfo.FileName} {process.StartInfo.Arguments}");

process.Start();
process.WaitForExit();
return process.ExitCode;
}

private static string GetCorerunPath()
{
string corerunName;
if (OperatingSystem.IsWindows())
{
corerunName = "CoreRun.exe";
}
else
{
corerunName = "corerun";
}

return Path.Combine(Environment.GetEnvironmentVariable("CORE_ROOT"), corerunName);
}

public static (string, string) GetSharedLibraryPrefixSuffix()
{
if (OperatingSystem.IsWindows())
return (string.Empty, ".dll");

if (OperatingSystem.IsMacOS())
return ("lib", ".dylib");

return ("lib", ".so");
}

public static string GetSharedLibraryFileNameForCurrentPlatform(string libraryName)
{
(string prefix, string suffix) = GetSharedLibraryPrefixSuffix();
return prefix + libraryName + suffix;
}
}
}
16 changes: 16 additions & 0 deletions src/tests/JIT/Directed/tls/TestTLSWithLoadedDlls.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<CLRTestPriority>0</CLRTestPriority>
<RequiresProcessIsolation>true</RequiresProcessIsolation>
<ReferenceXUnitWrapperGenerator>false</ReferenceXUnitWrapperGenerator>
<NativeAotIncompatible>true</NativeAotIncompatible>
</PropertyGroup>
<PropertyGroup>
<DebugType>PdbOnly</DebugType>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
<CMakeProjectReference Include="CMakeLists.txt" />
</ItemGroup>
</Project>
46 changes: 46 additions & 0 deletions src/tests/JIT/Directed/tls/testtls.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

#ifdef _MSC_VER
#define DLLEXPORT __declspec(dllexport)
#else
#define DLLEXPORT __attribute__((visibility("default")))
#endif // _MSC_VER

thread_local int tls0;
thread_local int tls1;
thread_local int tls2;
thread_local int tls3;
thread_local int tls4;
thread_local int tls5;
thread_local int tls6;
thread_local int tls7;
thread_local int tls8;
thread_local int tls9;
thread_local int tls10;
thread_local int tls11;
thread_local int tls12;
thread_local int tls13;
thread_local int tls14;
thread_local int tls15;
thread_local int tls16;

extern "C" DLLEXPORT void initializeTLS() {
tls0=0;
tls1=0;
tls2=0;
tls3=0;
tls4=0;
tls5=0;
tls6=0;
tls7=0;
tls8=0;
tls9=0;
tls10=0;
tls11=0;
tls12=0;
tls13=0;
tls14=0;
tls15=0;
tls16=0;
}

0 comments on commit ade568b

Please sign in to comment.