Skip to content

Commit

Permalink
Add support for --instruction-set:native
Browse files Browse the repository at this point in the history
This allows compiling for the ISA extensions that the currently running CPU supports.

Fixes dotnet#73246.
  • Loading branch information
MichalStrehovsky committed Jun 21, 2023
1 parent 0c77cbe commit a695971
Show file tree
Hide file tree
Showing 17 changed files with 927 additions and 817 deletions.
6 changes: 2 additions & 4 deletions src/coreclr/nativeaot/Runtime/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ set(COMMON_RUNTIME_SOURCES
${GC_DIR}/handletablescan.cpp
${GC_DIR}/objecthandle.cpp
${GC_DIR}/softwarewritewatch.cpp

${CLR_SRC_NATIVE_DIR}/minipal/cpufeatures.c
)

set(SERVER_GC_SOURCES
Expand Down Expand Up @@ -118,10 +120,6 @@ if (WIN32)
list(APPEND FULL_RUNTIME_SOURCES windows/CoffNativeCodeManager.cpp)

set(ASM_SUFFIX asm)

if (CLR_CMAKE_TARGET_ARCH_I386 OR CLR_CMAKE_TARGET_ARCH_AMD64)
set(RUNTIME_SOURCES_ARCH_ASM ${ARCH_SOURCES_DIR}/GC.${ASM_SUFFIX})
endif()
else()

include_directories(unix)
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/nativeaot/Runtime/MiscHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
#include "GCMemoryHelpers.inl"
#include "yieldprocessornormalized.h"
#include "RhConfig.h"
#include <minipal/cpufeatures.h>

COOP_PINVOKE_HELPER(void, RhDebugBreak, ())
{
Expand Down Expand Up @@ -411,6 +412,6 @@ COOP_PINVOKE_HELPER(uint32_t, RhGetKnobValues, (char *** pResultKeys, char *** p
#if defined(TARGET_X86) || defined(TARGET_AMD64)
EXTERN_C NATIVEAOT_API void __cdecl RhCpuIdEx(int* cpuInfo, int functionId, int subFunctionId)
{
__cpuidex(cpuInfo, functionId, subFunctionId);
minipal_cpuidex(cpuInfo, functionId, subFunctionId);
}
#endif
26 changes: 0 additions & 26 deletions src/coreclr/nativeaot/Runtime/PalRedhawk.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
#include <sal.h>
#include <stdarg.h>
#include "gcenv.structs.h" // CRITICAL_SECTION
#include "IntrinsicConstants.h"
#include "PalRedhawkCommon.h"

#ifndef PAL_REDHAWK_INCLUDED
Expand Down Expand Up @@ -780,31 +779,6 @@ REDHAWK_PALIMPORT char* PalCopyTCharAsChar(const TCHAR* toCopy);
REDHAWK_PALIMPORT int32_t __cdecl _stricmp(const char *string1, const char *string2);
#endif // TARGET_UNIX

#if defined(HOST_X86) || defined(HOST_AMD64)

#ifdef TARGET_UNIX
// MSVC directly defines intrinsics for __cpuid and __cpuidex matching the below signatures
// We define matching signatures for use on Unix platforms.
//
// IMPORTANT: Unlike MSVC, Unix does not explicitly zero ECX for __cpuid

REDHAWK_PALIMPORT void __cpuid(int cpuInfo[4], int function_id);
REDHAWK_PALIMPORT void __cpuidex(int cpuInfo[4], int function_id, int subFunction_id);
#else
#include <intrin.h>
#endif

REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI xmmYmmStateSupport();
REDHAWK_PALIMPORT uint32_t REDHAWK_PALAPI avx512StateSupport();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvxEnabled();
REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalIsAvx512Enabled();

#endif // defined(HOST_X86) || defined(HOST_AMD64)

#if defined(HOST_ARM64)
REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags);
#endif //defined(HOST_ARM64)

#include "PalRedhawkInline.h"

#endif // !PAL_REDHAWK_INCLUDED
36 changes: 0 additions & 36 deletions src/coreclr/nativeaot/Runtime/amd64/GC.asm

This file was deleted.

1 change: 0 additions & 1 deletion src/coreclr/nativeaot/Runtime/amd64/GcProbe.asm
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,6 @@ RuntimeInstance__ShouldHijackLoopForGcStress equ ?ShouldHijackLoopForGcStress@Ru
EXTERN RuntimeInstance__ShouldHijackLoopForGcStress : PROC

EXTERN g_fGcStressStarted : DWORD
EXTERN g_fHasFastFxsave : BYTE

;;
;; INVARIANT: Don't trash the argument registers, the binder codegen depends on this.
Expand Down
202 changes: 2 additions & 200 deletions src/coreclr/nativeaot/Runtime/startup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#include "stressLog.h"
#include "RestrictedCallouts.h"
#include "yieldprocessornormalized.h"
#include <minipal/cpufeatures.h>

#ifdef FEATURE_PERFTRACING
#include "EventPipeInterface.h"
Expand All @@ -48,9 +49,6 @@ static bool DetectCPUFeatures();

extern RhConfig * g_pRhConfig;

EXTERN_C bool g_fHasFastFxsave;
bool g_fHasFastFxsave = false;

CrstStatic g_ThunkPoolLock;

#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)
Expand Down Expand Up @@ -180,203 +178,7 @@ static bool InitDLL(HANDLE hPalInstance)
bool DetectCPUFeatures()
{
#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64)

#if defined(HOST_X86) || defined(HOST_AMD64)

int cpuidInfo[4];

const int CPUID_EAX = 0;
const int CPUID_EBX = 1;
const int CPUID_ECX = 2;
const int CPUID_EDX = 3;

__cpuid(cpuidInfo, 0x00000000);
uint32_t maxCpuId = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);

if (maxCpuId >= 1)
{
__cpuid(cpuidInfo, 0x00000001);

const int requiredBaselineEdxFlags = (1 << 25) // SSE
| (1 << 26); // SSE2

if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags)
{
g_cpuFeatures |= XArchIntrinsicConstants_VectorT128;

if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI
{
g_cpuFeatures |= XArchIntrinsicConstants_Aes;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // PCLMULQDQ
{
g_cpuFeatures |= XArchIntrinsicConstants_Pclmulqdq;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 0)) != 0) // SSE3
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse3;

if ((cpuidInfo[CPUID_ECX] & (1 << 9)) != 0) // SSSE3
{
g_cpuFeatures |= XArchIntrinsicConstants_Ssse3;

if ((cpuidInfo[CPUID_ECX] & (1 << 19)) != 0) // SSE4.1
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse41;

if ((cpuidInfo[CPUID_ECX] & (1 << 20)) != 0) // SSE4.2
{
g_cpuFeatures |= XArchIntrinsicConstants_Sse42;

if ((cpuidInfo[CPUID_ECX] & (1 << 22)) != 0) // MOVBE
{
g_cpuFeatures |= XArchIntrinsicConstants_Movbe;
}

if ((cpuidInfo[CPUID_ECX] & (1 << 23)) != 0) // POPCNT
{
g_cpuFeatures |= XArchIntrinsicConstants_Popcnt;
}

const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE
| (1 << 28); // AVX

if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags)
{
if (PalIsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx;

if ((cpuidInfo[CPUID_ECX] & (1 << 12)) != 0) // FMA
{
g_cpuFeatures |= XArchIntrinsicConstants_Fma;
}

if (maxCpuId >= 0x07)
{
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx2;
g_cpuFeatures |= XArchIntrinsicConstants_VectorT256;

if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111
{
if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f;
g_cpuFeatures |= XArchIntrinsicConstants_VectorT512;

bool isAVX512_VLSupported = false;
if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512f_vl;
isAVX512_VLSupported = true;
}

if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw;
if (isAVX512_VLSupported) // AVX512BW_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512bw_vl;
}
}

if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd;
if (isAVX512_VLSupported) // AVX512CD_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512cd_vl;
}
}

if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq;
if (isAVX512_VLSupported) // AVX512DQ_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512dq_vl;
}
}

if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi;
if (isAVX512_VLSupported) // AVX512VBMI_VL
{
g_cpuFeatures |= XArchIntrinsicConstants_Avx512Vbmi_vl;
}
}
}
}

__cpuidex(cpuidInfo, 0x00000007, 0x00000001);

if ((cpuidInfo[CPUID_EAX] & (1 << 4)) != 0) // AVX-VNNI
{
g_cpuFeatures |= XArchIntrinsicConstants_AvxVnni;
}
}
}
}
}
}
}
}
}
}

if (maxCpuId >= 0x07)
{
__cpuidex(cpuidInfo, 0x00000007, 0x00000000);

if ((cpuidInfo[CPUID_EBX] & (1 << 3)) != 0) // BMI1
{
g_cpuFeatures |= XArchIntrinsicConstants_Bmi1;
}

if ((cpuidInfo[CPUID_EBX] & (1 << 8)) != 0) // BMI2
{
g_cpuFeatures |= XArchIntrinsicConstants_Bmi2;
}

if ((cpuidInfo[CPUID_EDX] & (1 << 14)) != 0)
{
g_cpuFeatures |= XArchIntrinsicConstants_Serialize; // SERIALIZE
}
}
}

__cpuid(cpuidInfo, 0x80000000);
uint32_t maxCpuIdEx = static_cast<uint32_t>(cpuidInfo[CPUID_EAX]);

if (maxCpuIdEx >= 0x80000001)
{
__cpuid(cpuidInfo, 0x80000001);

if ((cpuidInfo[CPUID_ECX] & (1 << 5)) != 0) // LZCNT
{
g_cpuFeatures |= XArchIntrinsicConstants_Lzcnt;
}

#ifdef HOST_AMD64
// AMD has a "fast" mode for fxsave/fxrstor, which omits the saving of xmm registers. The OS will enable this mode
// if it is supported. So if we continue to use fxsave/fxrstor, we must manually save/restore the xmm registers.
// fxsr_opt is bit 25 of CPUID_EDX
if ((cpuidInfo[CPUID_EDX] & (1 << 25)) != 0)
g_fHasFastFxsave = true;
#endif
}
#endif // HOST_X86 || HOST_AMD64

#if defined(HOST_ARM64)
PAL_GetCpuCapabilityFlags (&g_cpuFeatures);
#endif
g_cpuFeatures = minipal_getcpufeatures();

if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures)
{
Expand Down
Loading

0 comments on commit a695971

Please sign in to comment.