From ff010bbb88abc55ec20e31a257fa8f64177f9018 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 3 Jul 2025 12:24:42 -0700 Subject: [PATCH 01/10] Add GC tracking for APX EGPRs for LINUX AMD64 --- .../Runtime/ExceptionServices/AsmOffsets.cs | 4 +- src/coreclr/debug/ee/debugger.cpp | 20 ++++++++ src/coreclr/gcinfo/gcinfodumper.cpp | 51 ++++++++++++++++++- src/coreclr/inc/gcinfotypes.h | 6 +-- src/coreclr/inc/regdisp.h | 39 +++++++++++++- .../nativeaot/Runtime/amd64/AsmOffsetsCpu.h | 14 ++--- src/coreclr/nativeaot/Runtime/regdisplay.h | 18 +++++++ src/coreclr/pal/inc/pal.h | 39 +++++++------- src/coreclr/unwinder/amd64/unwinder.cpp | 2 +- src/coreclr/vm/amd64/asmconstants.h | 7 +++ src/coreclr/vm/amd64/cgenamd64.cpp | 13 +++++ src/coreclr/vm/gcinfodecoder.cpp | 38 ++++++++++++-- .../GC/Regress/GPRStressR16toR31.cs | 40 +++++++++++++++ 13 files changed, 252 insertions(+), 39 deletions(-) create mode 100644 src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index df3da7a876b787..89786829d5ec8a 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -16,7 +16,7 @@ class AsmOffsets // Debug build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1b90; + public const int SIZEOF__REGDISPLAY = 0x1c10; public const int OFFSETOF__REGDISPLAY__SP = 0x1b78; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80; #else // TARGET_UNIX @@ -82,7 +82,7 @@ class AsmOffsets // Release build offsets #if TARGET_AMD64 #if TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0x1b80; + public const int SIZEOF__REGDISPLAY = 0x1c00; public const int OFFSETOF__REGDISPLAY__SP = 0x1b70; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b78; #else // TARGET_UNIX diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp index 914b4abf61957a..abed7d6f74f4de 100644 --- a/src/coreclr/debug/ee/debugger.cpp +++ b/src/coreclr/debug/ee/debugger.cpp @@ -16850,6 +16850,26 @@ void FuncEvalFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloa pRD->pCurrentContextPointers->R14 = &(pDE->m_context.R14); pRD->pCurrentContextPointers->R15 = &(pDE->m_context.R15); +#if defined(TARGET_UNIX) + // This would mean we need to update winnt.h in windows sdk. + pRD->volatileCurrContextPointers.R16 = &(pDE->m_context.R16); + pRD->volatileCurrContextPointers.R17 = &(pDE->m_context.R17); + pRD->volatileCurrContextPointers.R18 = &(pDE->m_context.R18); + pRD->volatileCurrContextPointers.R19 = &(pDE->m_context.R19); + pRD->volatileCurrContextPointers.R20 = &(pDE->m_context.R20); + pRD->volatileCurrContextPointers.R21 = &(pDE->m_context.R21); + pRD->volatileCurrContextPointers.R22 = &(pDE->m_context.R22); + pRD->volatileCurrContextPointers.R23 = &(pDE->m_context.R23); + pRD->volatileCurrContextPointers.R24 = &(pDE->m_context.R24); + pRD->volatileCurrContextPointers.R25 = &(pDE->m_context.R25); + pRD->volatileCurrContextPointers.R26 = &(pDE->m_context.R26); + pRD->volatileCurrContextPointers.R27 = &(pDE->m_context.R27); + pRD->volatileCurrContextPointers.R28 = &(pDE->m_context.R28); + pRD->volatileCurrContextPointers.R29 = &(pDE->m_context.R29); + pRD->volatileCurrContextPointers.R30 = &(pDE->m_context.R30); + pRD->volatileCurrContextPointers.R31 = &(pDE->m_context.R31); +#endif // TARGET_UNIX + // SyncRegDisplayToCurrentContext() sets the pRD->SP and pRD->ControlPC on AMD64. SyncRegDisplayToCurrentContext(pRD); diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index c22850c2b0f10c..5dc5ed13bea598 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -131,6 +131,28 @@ BOOL GcInfoDumper::ReportPointerRecord ( REG(r13, R13), REG(r14, R14), REG(r15, R15), +#if defined(TARGET_UNIX) +#undef REG +#define REG(reg, field) { offsetof(Amd64VolatileContextPointer, field) } + REG(r16, R16), + REG(r17, R17), + REG(r18, R18), + REG(r19, R19), + REG(r20, R20), + REG(r21, R21), + REG(r22, R22), + REG(r23, R23), + REG(r24, R24), + REG(r25, R25), + REG(r26, R26), + REG(r27, R27), + REG(r28, R28), + REG(r29, R29), + REG(r30, R30), + REG(r31, R31), + REG(r16, R16), + REG(r16, R16), +#endif // TARGET_UNIX #elif defined(TARGET_ARM) #undef REG #define REG(reg, field) { offsetof(ArmVolatileContextPointer, field) } @@ -294,7 +316,7 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this #if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) BYTE* pContext = (BYTE*)&(pRD->volatileCurrContextPointers); -#else +#else // TARGET_ARM || TARGET_ARM64 || TARGET_RISCV64 || TARGET_LOONGARCH64 BYTE* pContext = (BYTE*)pRD->pCurrentContext; #endif @@ -390,7 +412,12 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { continue; } -#endif +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) + if (ctx != 0 && iEncodedReg > 15) + { + break; + } +#endif // TARGET_AMD64 || TARGET_UNIX { _ASSERTE(iReg < nCONTEXTRegisters); #ifdef TARGET_ARM @@ -414,6 +441,19 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { pReg = (SIZE_T*)((BYTE*)pRD->pCurrentContext + rgRegisters[iReg].cbContextOffset); } +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) + if (ctx == 0 && iReg == 16) + { + pContext = (BYTE*)&(pRD->volatileCurrContextPointers); + } + if (ctx == 0 && iReg >= 16) + { + pReg = *(SIZE_T**)(pContext + rgRegisters[iReg].cbContextOffset); + } + else + { + pReg = (SIZE_T*)(pContext + rgRegisters[iReg].cbContextOffset); + } #else pReg = (SIZE_T*)(pContext + rgRegisters[iReg].cbContextOffset); #endif @@ -664,6 +704,13 @@ GcInfoDumper::EnumerateStateChangesResults GcInfoDumper::EnumerateStateChanges ( *(ppCurrentRax + iReg) = ®disp.pCurrentContext->Rax + iReg; *(ppCallerRax + iReg) = ®disp.pCallerContext ->Rax + iReg; } +#if defined(TARGET_UNIX) + ULONG64 **ppVolatileReg = ®disp.volatileCurrContextPointers.R16; + for (iReg = 0; iReg < 16; iReg++) + { + *(ppVolatileReg+iReg) = ®disp.pCurrentContext->R16 + iReg; + } +#endif // TARGET_UNIX #elif defined(TARGET_ARM) FILL_REGS(pCurrentContext->R0, 16); FILL_REGS(pCallerContext->R0, 16); diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index 4d5690505e7986..b3bdd5e9befd9b 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -595,7 +595,7 @@ struct AMD64GcInfoEncoding { static const int SIZE_OF_STACK_AREA_ENCBASE = 3; static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; - static const int NUM_REGISTERS_ENCBASE = 2; + static const int NUM_REGISTERS_ENCBASE = 3; static const int NUM_STACK_SLOTS_ENCBASE = 2; static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; static const int NORM_PROLOG_SIZE_ENCBASE = 5; @@ -603,8 +603,8 @@ struct AMD64GcInfoEncoding { static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; - static const int REGISTER_ENCBASE = 3; - static const int REGISTER_DELTA_ENCBASE = 2; + static const int REGISTER_ENCBASE = 5; + static const int REGISTER_DELTA_ENCBASE = 5; static const int STACK_SLOT_ENCBASE = 6; static const int STACK_SLOT_DELTA_ENCBASE = 4; static const int NUM_SAFE_POINTS_ENCBASE = 2; diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 7a5cf9d9d0cec4..36f0495d3423f6 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -197,6 +197,33 @@ typedef struct _Arm64VolatileContextPointer } Arm64VolatileContextPointer; #endif //TARGET_ARM64 +#if defined(TARGET_AMD64) +typedef struct _Amd64VolatileContextPointer +{ + union { + struct { + PDWORD64 R16; + PDWORD64 R17; + PDWORD64 R18; + PDWORD64 R19; + PDWORD64 R20; + PDWORD64 R21; + PDWORD64 R22; + PDWORD64 R23; + PDWORD64 R24; + PDWORD64 R25; + PDWORD64 R26; + PDWORD64 R27; + PDWORD64 R28; + PDWORD64 R29; + PDWORD64 R30; + PDWORD64 R31; + }; + PDWORD64 R[16]; + }; +} Amd64VolatileContextPointer; +#endif //TARGET_AMD64 + #if defined(TARGET_LOONGARCH64) typedef struct _LoongArch64VolatileContextPointer { @@ -253,6 +280,10 @@ struct REGDISPLAY : public REGDISPLAY_BASE { LoongArch64VolatileContextPointer volatileCurrContextPointers; #endif +#if defined(TARGET_AMD64) && defined(TARGET_UNIX) + Amd64VolatileContextPointer volatileCurrContextPointers; +#endif + #ifdef TARGET_RISCV64 RiscV64VolatileContextPointer volatileCurrContextPointers; #endif @@ -563,7 +594,11 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC // Fill volatile context pointers. They can be used by GC in the case of the leaf frame for (int i=0; i < 18; i++) pRD->volatileCurrContextPointers.X[i] = &pctx->X[i]; -#elif defined(TARGET_LOONGARCH64) // TARGET_ARM64 +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) // TARGET_ARM64 + // Fill volatile context pointers. They can be used by GC in the case of the leaf frame + for (int i=0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = &pctx->R[i]; +#elif defined(TARGET_LOONGARCH64) // TARGET_ADM64 && TARGET_UNIX pRD->volatileCurrContextPointers.A0 = &pctx->A0; pRD->volatileCurrContextPointers.A1 = &pctx->A1; pRD->volatileCurrContextPointers.A2 = &pctx->A2; @@ -664,7 +699,7 @@ inline size_t * getRegAddr (unsigned regNum, PTR_CONTEXT regs) return (PTR_size_t)(PTR_BYTE(regs) + OFFSET_OF_REGISTERS[regNum]); #elif defined(TARGET_AMD64) - _ASSERTE(regNum < 16); + _ASSERTE(regNum < 32); return (size_t *)®s->Rax + regNum; #elif defined(TARGET_ARM) _ASSERTE(regNum < 16); diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h index afeb2a408851a4..7bf55871558c0e 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -73,7 +73,7 @@ PLAT_ASM_OFFSET(90, REGDISPLAY, Xmm) #else // !UNIX_AMD64_ABI -PLAT_ASM_SIZEOF(190, ExInfo) +PLAT_ASM_SIZEOF(210, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) @@ -81,7 +81,7 @@ PLAT_ASM_OFFSET(18, ExInfo, m_kind) PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(188, ExInfo, m_notifyDebuggerSP) +PLAT_ASM_OFFSET(208, ExInfo, m_notifyDebuggerSP) PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) @@ -89,12 +89,12 @@ PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(168, StackFrameIterator) +PLAT_ASM_SIZEOF(1e8, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(158, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(160, StackFrameIterator, m_pPreviousTransitionFrame) +PLAT_ASM_OFFSET(1d8, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(1e0, StackFrameIterator, m_pPreviousTransitionFrame) PLAT_ASM_SIZEOF(50, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) @@ -110,8 +110,8 @@ PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R13) PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R14) PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R15) -PLAT_ASM_SIZEOF(88, REGDISPLAY) -PLAT_ASM_OFFSET(78, REGDISPLAY, SP) +PLAT_ASM_SIZEOF(108, REGDISPLAY) +PLAT_ASM_OFFSET(f8, REGDISPLAY, SP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 41eb41bf746975..311502ee06954b 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -27,6 +27,24 @@ struct REGDISPLAY PTR_uintptr_t pR13; PTR_uintptr_t pR14; PTR_uintptr_t pR15; +#if defined(TARGET_UNIX) + PTR_uintptr_t pR16; + PTR_uintptr_t pR17; + PTR_uintptr_t pR18; + PTR_uintptr_t pR19; + PTR_uintptr_t pR20; + PTR_uintptr_t pR21; + PTR_uintptr_t pR22; + PTR_uintptr_t pR23; + PTR_uintptr_t pR24; + PTR_uintptr_t pR25; + PTR_uintptr_t pR26; + PTR_uintptr_t pR27; + PTR_uintptr_t pR28; + PTR_uintptr_t pR29; + PTR_uintptr_t pR30; + PTR_uintptr_t pR31; +#endif //TARGET_UNIX #endif // TARGET_AMD64 uintptr_t SP; diff --git a/src/coreclr/pal/inc/pal.h b/src/coreclr/pal/inc/pal.h index 8da66ba362488a..50e2bfe729126a 100644 --- a/src/coreclr/pal/inc/pal.h +++ b/src/coreclr/pal/inc/pal.h @@ -1464,24 +1464,29 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { M512 Zmm31; }; - struct + // XSTATE_APX + union { - DWORD64 R16; - DWORD64 R17; - DWORD64 R18; - DWORD64 R19; - DWORD64 R20; - DWORD64 R21; - DWORD64 R22; - DWORD64 R23; - DWORD64 R24; - DWORD64 R25; - DWORD64 R26; - DWORD64 R27; - DWORD64 R28; - DWORD64 R29; - DWORD64 R30; - DWORD64 R31; + struct + { + DWORD64 R16; + DWORD64 R17; + DWORD64 R18; + DWORD64 R19; + DWORD64 R20; + DWORD64 R21; + DWORD64 R22; + DWORD64 R23; + DWORD64 R24; + DWORD64 R25; + DWORD64 R26; + DWORD64 R27; + DWORD64 R28; + DWORD64 R29; + DWORD64 R30; + DWORD64 R31; + }; + DWORD64 R[16]; }; } CONTEXT, *PCONTEXT, *LPCONTEXT; diff --git a/src/coreclr/unwinder/amd64/unwinder.cpp b/src/coreclr/unwinder/amd64/unwinder.cpp index 57acbd30ab06e8..bc9cfcab03dbe0 100644 --- a/src/coreclr/unwinder/amd64/unwinder.cpp +++ b/src/coreclr/unwinder/amd64/unwinder.cpp @@ -242,7 +242,7 @@ BOOL DacUnwindStackFrame(CONTEXT * pContext, KNONVOLATILE_CONTEXT_POINTERS* pCon if (res && pContextPointers) { - for (int i = 0; i < 16; i++) + for (int i = 0; i < 32; i++) { *(&pContextPointers->Rax + i) = &pContext->Rax + i; } diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 96bb2f26d96239..8ca7dc04a45d90 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -419,6 +419,13 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Xmm15 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__VectorRegister == offsetof(CONTEXT, VectorRegister[0])); +// TBD APX: is this needed for unix? +// #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) +// #define OFFSETOF__CONTEXT__R16 (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + 2*16 + 8*16 + 16*16 + 96 + 128*26 + 8*8 + 8*8*7 + 8*16*16 + 8*8*8 + 8*32*16 + 8*64*16) +// ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__R16 +// == offsetof(CONTEXT, R16)); +// #endif // UNIX_AMD64_ABI && !HOST_WINDOWS + #define SIZEOF__FaultingExceptionFrame (0x20 + SIZEOF__CONTEXT + 16) ASMCONSTANTS_C_ASSERT(SIZEOF__FaultingExceptionFrame == sizeof(FaultingExceptionFrame)); diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index 7fa9d44d1baa6b..9cdd5c49df91af 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -59,6 +59,11 @@ void ClearRegDisplayArgumentAndScratchRegisters(REGDISPLAY * pRD) pContextPointers->R9 = NULL; pContextPointers->R10 = NULL; pContextPointers->R11 = NULL; + +#if defined(TARGET_UNIX) + for (int i=0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = NULL; +#endif // TARGET_UNIX } void TransitionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) @@ -175,6 +180,8 @@ void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool u pRD->pCurrentContextPointers->R14 = &m_ctx.R14; pRD->pCurrentContextPointers->R15 = &m_ctx.R15; + // TBD APX: Do we need to update the EGPR context? + pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. } @@ -219,6 +226,11 @@ void ResumableFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFlo pRD->pCurrentContextPointers->R14 = &m_Regs->R14; pRD->pCurrentContextPointers->R15 = &m_Regs->R15; +#if defined(TARGET_UNIX) + for (int i = 0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = &m_Regs->R[i]; +#endif // TARGET_UNIX + pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. @@ -226,6 +238,7 @@ void ResumableFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFlo } // The HijackFrame has to know the registers that are pushed by OnHijackTripThread +// TBD APX: How to check for APX registers here? void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { CONTRACTL { diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 00bfa6b96f6d63..0b313e491749e7 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1488,18 +1488,28 @@ template OBJECTREF* TGcInfoDecoder::Ge PREGDISPLAY pRD ) { +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp #ifdef FEATURE_NATIVEAOT PTR_uintptr_t* ppRax = &pRD->pRax; if (regNum > 4) regNum--; // rsp is skipped in NativeAOT RegDisplay -#else +#else // FEATURE_NATIVEAOT // The fields of KNONVOLATILE_CONTEXT_POINTERS are in the same order as // the processor encoding numbers. - ULONGLONG **ppRax = &pRD->pCurrentContextPointers->Rax; -#endif +#if defined(TARGET_UNIX) + if(regNum >= 16) + { + ppRax = &pRD->volatileCurrContextPointers.R16; + return (OBJECTREF*)*(ppRax + regNum - 16); + } +#endif // TARGET_UNIX +#endif // FEATURE_NATIVEAOT return (OBJECTREF*)*(ppRax + regNum); } @@ -1510,12 +1520,22 @@ template OBJECTREF* TGcInfoDecoder::Ge PREGDISPLAY pRD ) { +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp // The fields of CONTEXT are in the same order as // the processor encoding numbers. - +#if defined(TARGET_UNIX) + if (regNum >= 16) + { + ULONGLONG *pRax = &pRD->pCurrentContext->R16; + return (OBJECTREF*)(pRax + regNum - 16); + } +#endif // TARGET_UNIX ULONGLONG *pRax = &pRD->pCurrentContext->Rax; return (OBJECTREF*)(pRax + regNum); @@ -1524,10 +1544,14 @@ template OBJECTREF* TGcInfoDecoder::Ge template bool TGcInfoDecoder::IsScratchRegister(int regNum, PREGDISPLAY pRD) { +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp - UINT16 PreservedRegMask = + UINT32 PreservedRegMask = (1 << 3) // rbx | (1 << 5) // rbp #ifndef UNIX_AMD64_ABI @@ -1568,7 +1592,11 @@ template void TGcInfoDecoder::ReportRe { GCINFODECODER_CONTRACT; +#if defined(TARGET_UNIX) + _ASSERTE(regNum >= 0 && regNum <= 32); +#else // TARGET_UNIX _ASSERTE(regNum >= 0 && regNum <= 16); +#endif // TARGET_UNIX _ASSERTE(regNum != 4); // rsp LOG((LF_GCROOTS, LL_INFO1000, "Reporting " FMT_REG, regNum )); diff --git a/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs b/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs new file mode 100644 index 00000000000000..de115fab1a9c78 --- /dev/null +++ b/src/tests/JIT/Methodical/GC/Regress/GPRStressR16toR31.cs @@ -0,0 +1,40 @@ +using System; +using System.Runtime.CompilerServices; + +class GPRStressR16toR31 +{ + [MethodImpl(MethodImplOptions.NoInlining)] + static void StressRegisters() + { + // 32 reference variables to force JIT to use all GPRs + object o0 = new object(), o1 = new object(), o2 = new object(), o3 = new object(); + object o4 = new object(), o5 = new object(), o6 = new object(), o7 = new object(); + object o8 = new object(), o9 = new object(), o10 = new object(), o11 = new object(); + object o12 = new object(), o13 = new object(), o14 = new object(), o15 = new object(); + object o16 = new object(), o17 = new object(), o18 = new object(), o19 = new object(); + object o20 = new object(), o21 = new object(), o22 = new object(), o23 = new object(); + object o24 = new object(), o25 = new object(), o26 = new object(), o27 = new object(); + object o28 = new object(), o29 = new object(), o30 = new object(), o31 = new object(); + + // Use all variables in a way that prevents optimization + for (int i = 0; i < 10000; i++) + { + GC.Collect(); + GC.KeepAlive(o0); GC.KeepAlive(o1); GC.KeepAlive(o2); GC.KeepAlive(o3); + GC.KeepAlive(o4); GC.KeepAlive(o5); GC.KeepAlive(o6); GC.KeepAlive(o7); + GC.KeepAlive(o8); GC.KeepAlive(o9); GC.KeepAlive(o10); GC.KeepAlive(o11); + GC.KeepAlive(o12); GC.KeepAlive(o13); GC.KeepAlive(o14); GC.KeepAlive(o15); + GC.KeepAlive(o16); GC.KeepAlive(o17); GC.KeepAlive(o18); GC.KeepAlive(o19); + GC.KeepAlive(o20); GC.KeepAlive(o21); GC.KeepAlive(o22); GC.KeepAlive(o23); + GC.KeepAlive(o24); GC.KeepAlive(o25); GC.KeepAlive(o26); GC.KeepAlive(o27); + GC.KeepAlive(o28); GC.KeepAlive(o29); GC.KeepAlive(o30); GC.KeepAlive(o31); + } + } + + static int Main() + { + StressRegisters(); + Console.WriteLine("Test Passed"); + return 100; + } +} \ No newline at end of file From 28b9dd700748835c4cd4608f6be83dc99ae81842 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 3 Jul 2025 15:47:53 -0700 Subject: [PATCH 02/10] Get correct register context for APX --- src/coreclr/inc/regdisp.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 36f0495d3423f6..735fd5a7cd2cf5 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -197,7 +197,7 @@ typedef struct _Arm64VolatileContextPointer } Arm64VolatileContextPointer; #endif //TARGET_ARM64 -#if defined(TARGET_AMD64) +#if defined(TARGET_AMD64) && defined(TARGET_UNIX) typedef struct _Amd64VolatileContextPointer { union { @@ -222,7 +222,7 @@ typedef struct _Amd64VolatileContextPointer PDWORD64 R[16]; }; } Amd64VolatileContextPointer; -#endif //TARGET_AMD64 +#endif //TARGET_AMD64 && TARGET_UNIX #if defined(TARGET_LOONGARCH64) typedef struct _LoongArch64VolatileContextPointer @@ -698,8 +698,18 @@ inline size_t * getRegAddr (unsigned regNum, PTR_CONTEXT regs) }; return (PTR_size_t)(PTR_BYTE(regs) + OFFSET_OF_REGISTERS[regNum]); -#elif defined(TARGET_AMD64) +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) _ASSERTE(regNum < 32); + if (regNum < 16) + { + return (size_t *)®s->Rax + regNum; + } + else + { + return (size_t *)®s->R16 + (regNum - 16); + } +#elif defined(TARGET_AMD64) + _ASSERTE(regNum < 16); return (size_t *)®s->Rax + regNum; #elif defined(TARGET_ARM) _ASSERTE(regNum < 16); From 2cf1452a3be099c7262ba412573baa0e4f04b273 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Wed, 9 Jul 2025 13:47:08 -0700 Subject: [PATCH 03/10] Remove dead code --- src/coreclr/debug/ee/debugger.cpp | 20 -------------------- src/coreclr/inc/gcinfotypes.h | 2 +- 2 files changed, 1 insertion(+), 21 deletions(-) diff --git a/src/coreclr/debug/ee/debugger.cpp b/src/coreclr/debug/ee/debugger.cpp index abed7d6f74f4de..914b4abf61957a 100644 --- a/src/coreclr/debug/ee/debugger.cpp +++ b/src/coreclr/debug/ee/debugger.cpp @@ -16850,26 +16850,6 @@ void FuncEvalFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloa pRD->pCurrentContextPointers->R14 = &(pDE->m_context.R14); pRD->pCurrentContextPointers->R15 = &(pDE->m_context.R15); -#if defined(TARGET_UNIX) - // This would mean we need to update winnt.h in windows sdk. - pRD->volatileCurrContextPointers.R16 = &(pDE->m_context.R16); - pRD->volatileCurrContextPointers.R17 = &(pDE->m_context.R17); - pRD->volatileCurrContextPointers.R18 = &(pDE->m_context.R18); - pRD->volatileCurrContextPointers.R19 = &(pDE->m_context.R19); - pRD->volatileCurrContextPointers.R20 = &(pDE->m_context.R20); - pRD->volatileCurrContextPointers.R21 = &(pDE->m_context.R21); - pRD->volatileCurrContextPointers.R22 = &(pDE->m_context.R22); - pRD->volatileCurrContextPointers.R23 = &(pDE->m_context.R23); - pRD->volatileCurrContextPointers.R24 = &(pDE->m_context.R24); - pRD->volatileCurrContextPointers.R25 = &(pDE->m_context.R25); - pRD->volatileCurrContextPointers.R26 = &(pDE->m_context.R26); - pRD->volatileCurrContextPointers.R27 = &(pDE->m_context.R27); - pRD->volatileCurrContextPointers.R28 = &(pDE->m_context.R28); - pRD->volatileCurrContextPointers.R29 = &(pDE->m_context.R29); - pRD->volatileCurrContextPointers.R30 = &(pDE->m_context.R30); - pRD->volatileCurrContextPointers.R31 = &(pDE->m_context.R31); -#endif // TARGET_UNIX - // SyncRegDisplayToCurrentContext() sets the pRD->SP and pRD->ControlPC on AMD64. SyncRegDisplayToCurrentContext(pRD); diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index b3bdd5e9befd9b..3c769a2967717f 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -595,7 +595,7 @@ struct AMD64GcInfoEncoding { static const int SIZE_OF_STACK_AREA_ENCBASE = 3; static const int SIZE_OF_EDIT_AND_CONTINUE_PRESERVED_AREA_ENCBASE = 4; static const int REVERSE_PINVOKE_FRAME_ENCBASE = 6; - static const int NUM_REGISTERS_ENCBASE = 3; + static const int NUM_REGISTERS_ENCBASE = 2; static const int NUM_STACK_SLOTS_ENCBASE = 2; static const int NUM_UNTRACKED_SLOTS_ENCBASE = 1; static const int NORM_PROLOG_SIZE_ENCBASE = 5; From 9a5563e98fd26092805b56b74177e363ddde2c12 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 10 Jul 2025 09:39:43 -0700 Subject: [PATCH 04/10] revert the gcInfoEncoding for AMD64 --- src/coreclr/inc/gcinfotypes.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/gcinfotypes.h b/src/coreclr/inc/gcinfotypes.h index 3c769a2967717f..4d5690505e7986 100644 --- a/src/coreclr/inc/gcinfotypes.h +++ b/src/coreclr/inc/gcinfotypes.h @@ -603,8 +603,8 @@ struct AMD64GcInfoEncoding { static const int NORM_CODE_OFFSET_DELTA_ENCBASE = 3; static const int INTERRUPTIBLE_RANGE_DELTA1_ENCBASE = 6; static const int INTERRUPTIBLE_RANGE_DELTA2_ENCBASE = 6; - static const int REGISTER_ENCBASE = 5; - static const int REGISTER_DELTA_ENCBASE = 5; + static const int REGISTER_ENCBASE = 3; + static const int REGISTER_DELTA_ENCBASE = 2; static const int STACK_SLOT_ENCBASE = 6; static const int STACK_SLOT_DELTA_ENCBASE = 4; static const int NUM_SAFE_POINTS_ENCBASE = 2; From 3629e56d5f6bd0139a05d006875b97f2f89d340e Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 10 Jul 2025 13:08:40 -0700 Subject: [PATCH 05/10] remove comments and TBD --- src/coreclr/vm/amd64/asmconstants.h | 7 ------- src/coreclr/vm/amd64/cgenamd64.cpp | 3 --- 2 files changed, 10 deletions(-) diff --git a/src/coreclr/vm/amd64/asmconstants.h b/src/coreclr/vm/amd64/asmconstants.h index 8ca7dc04a45d90..96bb2f26d96239 100644 --- a/src/coreclr/vm/amd64/asmconstants.h +++ b/src/coreclr/vm/amd64/asmconstants.h @@ -419,13 +419,6 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__Xmm15 ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__VectorRegister == offsetof(CONTEXT, VectorRegister[0])); -// TBD APX: is this needed for unix? -// #if defined(UNIX_AMD64_ABI) && !defined(HOST_WINDOWS) -// #define OFFSETOF__CONTEXT__R16 (8*6 + 4*2 + 2*6 + 4 + 8*6 + 8*16 + 8 + 2*16 + 8*16 + 16*16 + 96 + 128*26 + 8*8 + 8*8*7 + 8*16*16 + 8*8*8 + 8*32*16 + 8*64*16) -// ASMCONSTANTS_C_ASSERT(OFFSETOF__CONTEXT__R16 -// == offsetof(CONTEXT, R16)); -// #endif // UNIX_AMD64_ABI && !HOST_WINDOWS - #define SIZEOF__FaultingExceptionFrame (0x20 + SIZEOF__CONTEXT + 16) ASMCONSTANTS_C_ASSERT(SIZEOF__FaultingExceptionFrame == sizeof(FaultingExceptionFrame)); diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index ba9497a8854566..d72288329f1532 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -188,8 +188,6 @@ void FaultingExceptionFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool u pRD->pCurrentContextPointers->R14 = &m_ctx.R14; pRD->pCurrentContextPointers->R15 = &m_ctx.R15; - // TBD APX: Do we need to update the EGPR context? - pRD->IsCallerContextValid = FALSE; pRD->IsCallerSPValid = FALSE; // Don't add usage of this field. This is only temporary. } @@ -246,7 +244,6 @@ void ResumableFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFlo } // The HijackFrame has to know the registers that are pushed by OnHijackTripThread -// TBD APX: How to check for APX registers here? void HijackFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFloats) { CONTRACTL { From 0241da3242249a6ac842a3fd31178f96ad08f9aa Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 15 Jul 2025 12:23:41 -0700 Subject: [PATCH 06/10] Avoid EGPR context processing during cross compilation --- src/coreclr/inc/regdisp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 735fd5a7cd2cf5..517f0a3c1da417 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -594,11 +594,11 @@ inline void FillRegDisplay(const PREGDISPLAY pRD, PT_CONTEXT pctx, PT_CONTEXT pC // Fill volatile context pointers. They can be used by GC in the case of the leaf frame for (int i=0; i < 18; i++) pRD->volatileCurrContextPointers.X[i] = &pctx->X[i]; -#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) // TARGET_ARM64 +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) && defined(HOST_UNIX) // TARGET_ARM64 // Fill volatile context pointers. They can be used by GC in the case of the leaf frame for (int i=0; i < 16; i++) pRD->volatileCurrContextPointers.R[i] = &pctx->R[i]; -#elif defined(TARGET_LOONGARCH64) // TARGET_ADM64 && TARGET_UNIX +#elif defined(TARGET_LOONGARCH64) // TARGET_ADM64 && TARGET_UNIX && HOST_UNIX pRD->volatileCurrContextPointers.A0 = &pctx->A0; pRD->volatileCurrContextPointers.A1 = &pctx->A1; pRD->volatileCurrContextPointers.A2 = &pctx->A2; From 33792cc5d81d105561466ed088114af797e103be Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 15 Jul 2025 12:59:14 -0700 Subject: [PATCH 07/10] remove cross compile support from regdisp --- src/coreclr/inc/regdisp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index 517f0a3c1da417..a6ff66d3f1fa2b 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -698,7 +698,7 @@ inline size_t * getRegAddr (unsigned regNum, PTR_CONTEXT regs) }; return (PTR_size_t)(PTR_BYTE(regs) + OFFSET_OF_REGISTERS[regNum]); -#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) +#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) && defined(HOST_UNIX) _ASSERTE(regNum < 32); if (regNum < 16) { From 7f9f4c32aaacf9540bc215c8ef80a1dd3ad5c29e Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 15 Jul 2025 14:10:21 -0700 Subject: [PATCH 08/10] Only process EGPR in gcinfodumper if HOST_UNIX && TARGET_UNIX && TARGET_AMD64 --- src/coreclr/gcinfo/gcinfodumper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index 5dc5ed13bea598..8af407d6b06fcb 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -704,7 +704,7 @@ GcInfoDumper::EnumerateStateChangesResults GcInfoDumper::EnumerateStateChanges ( *(ppCurrentRax + iReg) = ®disp.pCurrentContext->Rax + iReg; *(ppCallerRax + iReg) = ®disp.pCallerContext ->Rax + iReg; } -#if defined(TARGET_UNIX) +#if defined(TARGET_UNIX) && defined(HOST_UNIX) ULONG64 **ppVolatileReg = ®disp.volatileCurrContextPointers.R16; for (iReg = 0; iReg < 16; iReg++) { From 943308e4974a7c200940973fe0e79ba4e1760d57 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Tue, 15 Jul 2025 23:59:06 -0700 Subject: [PATCH 09/10] process EGPRs in VM only if APX is supported. Also, add AMD64VolatileContextPointers for windows --- .../src/System/Runtime/ExceptionServices/AsmOffsets.cs | 2 +- src/coreclr/inc/regdisp.h | 6 +++--- src/coreclr/vm/codeman.h | 10 ++++++++++ src/coreclr/vm/gcinfodecoder.cpp | 8 ++------ 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs index 89786829d5ec8a..479de96b10df41 100644 --- a/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs +++ b/src/coreclr/System.Private.CoreLib/src/System/Runtime/ExceptionServices/AsmOffsets.cs @@ -20,7 +20,7 @@ class AsmOffsets public const int OFFSETOF__REGDISPLAY__SP = 0x1b78; public const int OFFSETOF__REGDISPLAY__ControlPC = 0x1b80; #else // TARGET_UNIX - public const int SIZEOF__REGDISPLAY = 0xbf0; + public const int SIZEOF__REGDISPLAY = 0xc70; public const int OFFSETOF__REGDISPLAY__SP = 0xbd8; public const int OFFSETOF__REGDISPLAY__ControlPC = 0xbe0; #endif // TARGET_UNIX diff --git a/src/coreclr/inc/regdisp.h b/src/coreclr/inc/regdisp.h index a6ff66d3f1fa2b..0c976dc4fb8a25 100644 --- a/src/coreclr/inc/regdisp.h +++ b/src/coreclr/inc/regdisp.h @@ -197,7 +197,7 @@ typedef struct _Arm64VolatileContextPointer } Arm64VolatileContextPointer; #endif //TARGET_ARM64 -#if defined(TARGET_AMD64) && defined(TARGET_UNIX) +#if defined(TARGET_AMD64) typedef struct _Amd64VolatileContextPointer { union { @@ -222,7 +222,7 @@ typedef struct _Amd64VolatileContextPointer PDWORD64 R[16]; }; } Amd64VolatileContextPointer; -#endif //TARGET_AMD64 && TARGET_UNIX +#endif //TARGET_AMD64 #if defined(TARGET_LOONGARCH64) typedef struct _LoongArch64VolatileContextPointer @@ -280,7 +280,7 @@ struct REGDISPLAY : public REGDISPLAY_BASE { LoongArch64VolatileContextPointer volatileCurrContextPointers; #endif -#if defined(TARGET_AMD64) && defined(TARGET_UNIX) +#if defined(TARGET_AMD64) Amd64VolatileContextPointer volatileCurrContextPointers; #endif diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 9720f029e30c2a..13e22174d99217 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -70,6 +70,9 @@ Module Name: #ifdef TARGET_X86 #include "gc_unwind_x86.h" #endif +#ifdef TARGET_AMD64 +#include +#endif class MethodDesc; class ICorJitCompiler; @@ -2198,6 +2201,13 @@ class EEJitManager final : public EECodeGenManager return m_CPUCompileFlags; } +#if defined(TARGET_AMD64) + inline bool IsAPXSupported() + { + return m_CPUCompileFlags.IsSet(InstructionSet_APX); + } +#endif // TARGET_AMD64 + private : Crst m_JitLoadLock; diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index 0b313e491749e7..ecb99472cc3591 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1502,13 +1502,11 @@ template OBJECTREF* TGcInfoDecoder::Ge // The fields of KNONVOLATILE_CONTEXT_POINTERS are in the same order as // the processor encoding numbers. ULONGLONG **ppRax = &pRD->pCurrentContextPointers->Rax; -#if defined(TARGET_UNIX) - if(regNum >= 16) + if(ExecutionManager::GetEEJitManager()->IsAPXSupported() && regNum >= 16) { ppRax = &pRD->volatileCurrContextPointers.R16; return (OBJECTREF*)*(ppRax + regNum - 16); } -#endif // TARGET_UNIX #endif // FEATURE_NATIVEAOT return (OBJECTREF*)*(ppRax + regNum); @@ -1529,13 +1527,11 @@ template OBJECTREF* TGcInfoDecoder::Ge // The fields of CONTEXT are in the same order as // the processor encoding numbers. -#if defined(TARGET_UNIX) - if (regNum >= 16) + if (ExecutionManager::GetEEJitManager()->IsAPXSupported() && regNum >= 16) { ULONGLONG *pRax = &pRD->pCurrentContext->R16; return (OBJECTREF*)(pRax + regNum - 16); } -#endif // TARGET_UNIX ULONGLONG *pRax = &pRD->pCurrentContext->Rax; return (OBJECTREF*)(pRax + regNum); From d4cffc712ce2cf37b4d4c1afd7c2c017358ad126 Mon Sep 17 00:00:00 2001 From: Khushal Modi Date: Thu, 17 Jul 2025 00:51:00 -0700 Subject: [PATCH 10/10] Introduce IsAPXSupported and cached cpuFeatures in VM, GC abd NAOT. Also fix small bugs and remove ifdefs for linux wherever possible. --- src/coreclr/gc/gc.cpp | 15 ++++++++++++- src/coreclr/gc/gcimpl.h | 5 +++++ src/coreclr/gc/vxsort/do_vxsort.h | 2 +- src/coreclr/gc/vxsort/isa_detection.cpp | 8 +++---- src/coreclr/gcinfo/gcinfodumper.cpp | 21 +++++++++---------- .../nativeaot/Runtime/amd64/AsmOffsetsCpu.h | 20 +++++++++--------- src/coreclr/nativeaot/Runtime/regdisplay.h | 15 +++++++++++-- src/coreclr/nativeaot/Runtime/startup.cpp | 2 +- src/coreclr/unwinder/amd64/unwinder.cpp | 4 +++- src/coreclr/vm/amd64/cgenamd64.cpp | 7 +++++-- src/coreclr/vm/codeman.cpp | 5 ++++- src/coreclr/vm/codeman.h | 18 +++++++++------- src/coreclr/vm/gcinfodecoder.cpp | 9 +++----- 13 files changed, 84 insertions(+), 47 deletions(-) diff --git a/src/coreclr/gc/gc.cpp b/src/coreclr/gc/gc.cpp index 3845048743a2de..06965d8ab2fd4f 100644 --- a/src/coreclr/gc/gc.cpp +++ b/src/coreclr/gc/gc.cpp @@ -27,6 +27,7 @@ #include "handletable.inl" #include "gcenv.inl" #include "gceventstatus.h" +#include #ifdef __INTELLISENSE__ #if defined(FEATURE_SVR_GC) @@ -143,6 +144,15 @@ bool g_built_with_svr_gc = true; bool g_built_with_svr_gc = false; #endif // FEATURE_SVR_GC +// Stores the ISA capability of the hardware +int cpuFeatures = 0; +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 + #if defined(BUILDENV_DEBUG) uint8_t g_build_variant = 0; #elif defined(BUILDENV_CHECKED) @@ -14698,7 +14708,7 @@ HRESULT gc_heap::initialize_gc (size_t soh_segment_size, #endif // __linux__ #ifdef USE_VXSORT - InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets()); + InitSupportedInstructionSet ((int32_t)GCConfig::GetGCEnabledInstructionSets(), cpuFeatures); #endif if (!init_semi_shared()) @@ -49286,6 +49296,9 @@ HRESULT GCHeap::Initialize() return CLR_E_GC_BAD_HARD_LIMIT; } + // initialize the cpuFeatures from minipal + cpuFeatures = minipal_getcpufeatures(); + uint32_t nhp = 1; uint32_t nhp_from_config = 0; uint32_t max_nhp_from_config = (uint32_t)GCConfig::GetMaxHeapCount(); diff --git a/src/coreclr/gc/gcimpl.h b/src/coreclr/gc/gcimpl.h index 93c856a2145f0d..fa3996e6acf0df 100644 --- a/src/coreclr/gc/gcimpl.h +++ b/src/coreclr/gc/gcimpl.h @@ -40,6 +40,11 @@ extern bool g_fFinalizerRunOnShutDown; extern bool g_built_with_svr_gc; extern uint8_t g_build_variant; extern VOLATILE(int32_t) g_no_gc_lock; +// Stores the mask for supported instruction sets +extern int cpuFeatures; +#if defined(TARGET_AMD64) +extern inline bool IsAPXSupported(); +#endif // TARGET_AMD64 class GCHeap : public IGCHeapInternal { diff --git a/src/coreclr/gc/vxsort/do_vxsort.h b/src/coreclr/gc/vxsort/do_vxsort.h index edd803f310f492..6044aa6ae2e52c 100644 --- a/src/coreclr/gc/vxsort/do_vxsort.h +++ b/src/coreclr/gc/vxsort/do_vxsort.h @@ -8,7 +8,7 @@ enum class InstructionSet AVX512F = 1, }; -void InitSupportedInstructionSet (int32_t configSetting); +void InitSupportedInstructionSet (int32_t configSetting, int cpuFeatures); bool IsSupportedInstructionSet (InstructionSet instructionSet); void do_vxsort_avx2 (uint8_t** low, uint8_t** high, uint8_t *range_low, uint8_t *range_high); diff --git a/src/coreclr/gc/vxsort/isa_detection.cpp b/src/coreclr/gc/vxsort/isa_detection.cpp index b069c8be9bee04..5172b6a314d93c 100644 --- a/src/coreclr/gc/vxsort/isa_detection.cpp +++ b/src/coreclr/gc/vxsort/isa_detection.cpp @@ -13,9 +13,9 @@ enum class SupportedISA AVX512F = 1 << (int)InstructionSet::AVX512F }; -SupportedISA DetermineSupportedISA() +SupportedISA DetermineSupportedISA(int cpuFeatures) { - int cpuFeatures = minipal_getcpufeatures(); + // int cpuFeatures = minipal_getcpufeatures(); if ((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) { if ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) @@ -39,9 +39,9 @@ bool IsSupportedInstructionSet (InstructionSet instructionSet) return ((int)s_supportedISA & (1 << (int)instructionSet)) != 0; } -void InitSupportedInstructionSet (int32_t configSetting) +void InitSupportedInstructionSet (int32_t configSetting, int cpuFeatures) { - s_supportedISA = (SupportedISA)((int)DetermineSupportedISA() & configSetting); + s_supportedISA = (SupportedISA)((int)DetermineSupportedISA(cpuFeatures) & configSetting); // we are assuming that AVX2 can be used if AVX512F can, // so if AVX2 is disabled, we need to disable AVX512F as well if (!((int)s_supportedISA & (int)SupportedISA::AVX2)) diff --git a/src/coreclr/gcinfo/gcinfodumper.cpp b/src/coreclr/gcinfo/gcinfodumper.cpp index 8af407d6b06fcb..62cb3053eb7e01 100644 --- a/src/coreclr/gcinfo/gcinfodumper.cpp +++ b/src/coreclr/gcinfo/gcinfodumper.cpp @@ -131,7 +131,6 @@ BOOL GcInfoDumper::ReportPointerRecord ( REG(r13, R13), REG(r14, R14), REG(r15, R15), -#if defined(TARGET_UNIX) #undef REG #define REG(reg, field) { offsetof(Amd64VolatileContextPointer, field) } REG(r16, R16), @@ -150,9 +149,6 @@ BOOL GcInfoDumper::ReportPointerRecord ( REG(r29, R29), REG(r30, R30), REG(r31, R31), - REG(r16, R16), - REG(r16, R16), -#endif // TARGET_UNIX #elif defined(TARGET_ARM) #undef REG #define REG(reg, field) { offsetof(ArmVolatileContextPointer, field) } @@ -412,12 +408,12 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { continue; } -#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) - if (ctx != 0 && iEncodedReg > 15) +#elif defined(TARGET_AMD64) + if ((ctx != 0 && iEncodedReg > 15) || !IsAPXSupported()) { break; } -#endif // TARGET_AMD64 || TARGET_UNIX +#endif // TARGET_AMD64 { _ASSERTE(iReg < nCONTEXTRegisters); #ifdef TARGET_ARM @@ -441,7 +437,7 @@ PORTABILITY_ASSERT("GcInfoDumper::ReportPointerRecord is not implemented on this { pReg = (SIZE_T*)((BYTE*)pRD->pCurrentContext + rgRegisters[iReg].cbContextOffset); } -#elif defined(TARGET_AMD64) && defined(TARGET_UNIX) +#elif defined(TARGET_AMD64) if (ctx == 0 && iReg == 16) { pContext = (BYTE*)&(pRD->volatileCurrContextPointers); @@ -705,10 +701,13 @@ GcInfoDumper::EnumerateStateChangesResults GcInfoDumper::EnumerateStateChanges ( *(ppCallerRax + iReg) = ®disp.pCallerContext ->Rax + iReg; } #if defined(TARGET_UNIX) && defined(HOST_UNIX) - ULONG64 **ppVolatileReg = ®disp.volatileCurrContextPointers.R16; - for (iReg = 0; iReg < 16; iReg++) + if (IsAPXSupported()) { - *(ppVolatileReg+iReg) = ®disp.pCurrentContext->R16 + iReg; + ULONG64 **ppVolatileReg = ®disp.volatileCurrContextPointers.R16; + for (iReg = 0; iReg < 16; iReg++) + { + *(ppVolatileReg+iReg) = ®disp.pCurrentContext->R16 + iReg; + } } #endif // TARGET_UNIX #elif defined(TARGET_ARM) diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h index 7bf55871558c0e..945ec257cc4c64 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -8,7 +8,7 @@ // NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix #ifndef UNIX_AMD64_ABI -PLAT_ASM_SIZEOF(250, ExInfo) +PLAT_ASM_SIZEOF(2d0, ExInfo) PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) PLAT_ASM_OFFSET(10, ExInfo, m_exception) @@ -16,7 +16,7 @@ PLAT_ASM_OFFSET(18, ExInfo, m_kind) PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(240, ExInfo, m_notifyDebuggerSP) +PLAT_ASM_OFFSET(2c0, ExInfo, m_notifyDebuggerSP) PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) @@ -24,12 +24,12 @@ PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) -PLAT_ASM_SIZEOF(220, StackFrameIterator) +PLAT_ASM_SIZEOF(2A0, StackFrameIterator) PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(210, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(218, StackFrameIterator, m_pPreviousTransitionFrame) +PLAT_ASM_OFFSET(290, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(298, StackFrameIterator, m_pPreviousTransitionFrame) PLAT_ASM_SIZEOF(100, PAL_LIMITED_CONTEXT) PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, IP) @@ -56,10 +56,10 @@ PLAT_ASM_OFFSET(0d0, PAL_LIMITED_CONTEXT, Xmm13) PLAT_ASM_OFFSET(0e0, PAL_LIMITED_CONTEXT, Xmm14) PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15) -PLAT_ASM_SIZEOF(130, REGDISPLAY) -PLAT_ASM_OFFSET(78, REGDISPLAY, SP) -PLAT_ASM_OFFSET(80, REGDISPLAY, IP) -PLAT_ASM_OFFSET(88, REGDISPLAY, SSP) +PLAT_ASM_SIZEOF(1b0, REGDISPLAY) +PLAT_ASM_OFFSET(f8, REGDISPLAY, SP) +PLAT_ASM_OFFSET(100, REGDISPLAY, IP) +PLAT_ASM_OFFSET(108, REGDISPLAY, SSP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) @@ -69,7 +69,7 @@ PLAT_ASM_OFFSET(58, REGDISPLAY, pR12) PLAT_ASM_OFFSET(60, REGDISPLAY, pR13) PLAT_ASM_OFFSET(68, REGDISPLAY, pR14) PLAT_ASM_OFFSET(70, REGDISPLAY, pR15) -PLAT_ASM_OFFSET(90, REGDISPLAY, Xmm) +PLAT_ASM_OFFSET(110, REGDISPLAY, Xmm) #else // !UNIX_AMD64_ABI diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 311502ee06954b..83dcf8aa48daa4 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -4,9 +4,22 @@ #ifndef __regdisplay_h__ #define __regdisplay_h__ +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) +// This field is inspected from the generated code to determine what intrinsics are available. +EXTERN_C int g_cpuFeatures; +#endif + #if defined(TARGET_X86) || defined(TARGET_AMD64) #include "PalLimitedContext.h" // Fp128 +#include + +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (g_cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 struct REGDISPLAY { @@ -27,7 +40,6 @@ struct REGDISPLAY PTR_uintptr_t pR13; PTR_uintptr_t pR14; PTR_uintptr_t pR15; -#if defined(TARGET_UNIX) PTR_uintptr_t pR16; PTR_uintptr_t pR17; PTR_uintptr_t pR18; @@ -44,7 +56,6 @@ struct REGDISPLAY PTR_uintptr_t pR29; PTR_uintptr_t pR30; PTR_uintptr_t pR31; -#endif //TARGET_UNIX #endif // TARGET_AMD64 uintptr_t SP; diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index c80ae2069abe48..6ceea8eaade598 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -50,7 +50,7 @@ extern RhConfig * g_pRhConfig; #if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. -EXTERN_C int g_cpuFeatures; +// EXTERN_C int g_cpuFeatures; int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. diff --git a/src/coreclr/unwinder/amd64/unwinder.cpp b/src/coreclr/unwinder/amd64/unwinder.cpp index 3383a5faf10554..ca4bb14c674f9d 100644 --- a/src/coreclr/unwinder/amd64/unwinder.cpp +++ b/src/coreclr/unwinder/amd64/unwinder.cpp @@ -205,7 +205,9 @@ BOOL DacUnwindStackFrame(CONTEXT * pContext, KNONVOLATILE_CONTEXT_POINTERS* pCon if (res && pContextPointers) { - for (int i = 0; i < 32; i++) + // TODO APX: this function restores the callee saved registers. + // As of now, this does not need to restore APX EGPRs. + for (int i = 0; i < 16; i++) { *(&pContextPointers->Rax + i) = &pContext->Rax + i; } diff --git a/src/coreclr/vm/amd64/cgenamd64.cpp b/src/coreclr/vm/amd64/cgenamd64.cpp index d72288329f1532..65d9893c1f5413 100644 --- a/src/coreclr/vm/amd64/cgenamd64.cpp +++ b/src/coreclr/vm/amd64/cgenamd64.cpp @@ -233,8 +233,11 @@ void ResumableFrame::UpdateRegDisplay_Impl(const PREGDISPLAY pRD, bool updateFlo pRD->pCurrentContextPointers->R15 = &m_Regs->R15; #if defined(TARGET_UNIX) - for (int i = 0; i < 16; i++) - pRD->volatileCurrContextPointers.R[i] = &m_Regs->R[i]; + if (IsAPXSupported()) + { + for (int i = 0; i < 16; i++) + pRD->volatileCurrContextPointers.R[i] = &m_Regs->R[i]; + } #endif // TARGET_UNIX pRD->IsCallerContextValid = FALSE; diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 03aaed2aa9515f..3e302f9922c0ff 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -42,6 +42,9 @@ #include "perfmap.h" #endif +// cpufeatures for VM +int cpuFeatures = 0; + // Default number of jump stubs in a jump stub block #define DEFAULT_JUMPSTUBS_PER_BLOCK 32 @@ -1178,7 +1181,7 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; - int cpuFeatures = minipal_getcpufeatures(); + cpuFeatures = minipal_getcpufeatures(); // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; diff --git a/src/coreclr/vm/codeman.h b/src/coreclr/vm/codeman.h index 13e22174d99217..97acb3c9f41a83 100644 --- a/src/coreclr/vm/codeman.h +++ b/src/coreclr/vm/codeman.h @@ -91,6 +91,17 @@ typedef struct } EH_CLAUSE_ENUMERATOR; class EECodeInfo; +// Cache the cpufeatures for use in other parts of VM. +// This is mainly added here to use the variable in GC +// APX support checks +extern int cpuFeatures; +#if defined(TARGET_AMD64) +inline bool IsAPXSupported() +{ + return (cpuFeatures & XArchIntrinsicConstants_Apx); +} +#endif // TARGET_AMD64 + #define ROUND_DOWN_TO_PAGE(x) ( (size_t) (x) & ~((size_t)GetOsPageSize()-1)) #define ROUND_UP_TO_PAGE(x) (((size_t) (x) + (GetOsPageSize()-1)) & ~((size_t)GetOsPageSize()-1)) @@ -2201,13 +2212,6 @@ class EEJitManager final : public EECodeGenManager return m_CPUCompileFlags; } -#if defined(TARGET_AMD64) - inline bool IsAPXSupported() - { - return m_CPUCompileFlags.IsSet(InstructionSet_APX); - } -#endif // TARGET_AMD64 - private : Crst m_JitLoadLock; diff --git a/src/coreclr/vm/gcinfodecoder.cpp b/src/coreclr/vm/gcinfodecoder.cpp index ecb99472cc3591..2289b34dacdd9e 100644 --- a/src/coreclr/vm/gcinfodecoder.cpp +++ b/src/coreclr/vm/gcinfodecoder.cpp @@ -1488,11 +1488,7 @@ template OBJECTREF* TGcInfoDecoder::Ge PREGDISPLAY pRD ) { -#if defined(TARGET_UNIX) - _ASSERTE(regNum >= 0 && regNum <= 32); -#else // TARGET_UNIX - _ASSERTE(regNum >= 0 && regNum <= 16); -#endif // TARGET_UNIX + _ASSERTE(regNum >= 0 && (regNum <= 16 || (IsAPXSupported() && regNum <=32))); _ASSERTE(regNum != 4); // rsp #ifdef FEATURE_NATIVEAOT @@ -1502,8 +1498,9 @@ template OBJECTREF* TGcInfoDecoder::Ge // The fields of KNONVOLATILE_CONTEXT_POINTERS are in the same order as // the processor encoding numbers. ULONGLONG **ppRax = &pRD->pCurrentContextPointers->Rax; - if(ExecutionManager::GetEEJitManager()->IsAPXSupported() && regNum >= 16) + if(regNum >= 16) { + assert(IsAPXSupported()); ppRax = &pRD->volatileCurrContextPointers.R16; return (OBJECTREF*)*(ppRax + regNum - 16); }