From c2b6454ab5158faeb7135c1a021b0e9430b828d7 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 30 Jul 2025 11:04:40 +0900 Subject: [PATCH 01/33] [CLRINTRP] Port to ARM --- src/coreclr/clr.featuredefines.props | 2 +- src/coreclr/clrfeatures.cmake | 6 +- src/coreclr/vm/callstubgenerator.cpp | 164 +++++++++++++++++++++++++++ 3 files changed, 168 insertions(+), 4 deletions(-) diff --git a/src/coreclr/clr.featuredefines.props b/src/coreclr/clr.featuredefines.props index ccd03ad67cd5c2..adb46fb21f809a 100644 --- a/src/coreclr/clr.featuredefines.props +++ b/src/coreclr/clr.featuredefines.props @@ -36,7 +36,7 @@ true - + true diff --git a/src/coreclr/clrfeatures.cmake b/src/coreclr/clrfeatures.cmake index 0327aefce005d6..8a4f2c31f010a7 100644 --- a/src/coreclr/clrfeatures.cmake +++ b/src/coreclr/clrfeatures.cmake @@ -44,11 +44,11 @@ if(NOT DEFINED FEATURE_INTERPRETER) set(FEATURE_PORTABLE_ENTRYPOINTS 1) set(FEATURE_PORTABLE_HELPERS 1) else() - if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + if(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM) set(FEATURE_INTERPRETER $,1,0>) - else(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + else(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM) set(FEATURE_INTERPRETER 0) - endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64) + endif(CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_ARM) endif() endif(NOT DEFINED FEATURE_INTERPRETER) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 9e3f7ab503d0ca..50d88d0efff0e4 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -979,6 +979,160 @@ PCODE FPRegsRoutines[] = #endif // TARGET_ARM64 +#ifdef TARGET_ARM + +extern "C" void Load_X0(); +extern "C" void Load_X0_X1(); +extern "C" void Load_X0_X1_X2(); +extern "C" void Load_X0_X1_X2_X3(); +extern "C" void Load_X1(); +extern "C" void Load_X1_X2(); +extern "C" void Load_X1_X2_X3(); +extern "C" void Load_X2(); +extern "C" void Load_X2_X3(); +extern "C" void Load_X3(); + +extern "C" void Store_X0(); +extern "C" void Store_X0_X1(); +extern "C" void Store_X0_X1_X2(); +extern "C" void Store_X0_X1_X2_X3(); +extern "C" void Store_X1(); +extern "C" void Store_X1_X2(); +extern "C" void Store_X1_X2_X3(); +extern "C" void Store_X2(); +extern "C" void Store_X2_X3(); +extern "C" void Store_X3(); + +extern "C" void Load_Ref_X0(); +extern "C" void Load_Ref_X1(); +extern "C" void Load_Ref_X2(); +extern "C" void Load_Ref_X3(); + +extern "C" void Store_Ref_X0(); +extern "C" void Store_Ref_X1(); +extern "C" void Store_Ref_X2(); +extern "C" void Store_Ref_X3(); + +PCODE GPRegsRoutines[] = +{ + (PCODE)Load_X0, // 00 + (PCODE)Load_X0_X1, // 01 + (PCODE)Load_X0_X1_X2, // 02 + (PCODE)Load_X0_X1_X2_X3, // 03 + (PCODE)0, // 04 + (PCODE)Load_X1, // 05 + (PCODE)Load_X1_X2, // 06 + (PCODE)Load_X1_X2_X3, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)Load_X2, // 10 + (PCODE)Load_X2_X3, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)Load_X3, // 15 +}; + +PCODE GPRegsStoreRoutines[] = +{ + (PCODE)Store_X0, // 00 + (PCODE)Store_X0_X1, // 01 + (PCODE)Store_X0_X1_X2, // 02 + (PCODE)Store_X0_X1_X2_X3, // 03 + (PCODE)0, // 04 + (PCODE)Store_X1, // 05 + (PCODE)Store_X1_X2, // 06 + (PCODE)Store_X1_X2_X3, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)Store_X2, // 10 + (PCODE)Store_X2_X3, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)Store_X3, // 15 +}; + +PCODE GPRegsRefRoutines[] = +{ + (PCODE)Load_Ref_X0, // 0 + (PCODE)Load_Ref_X1, // 1 + (PCODE)Load_Ref_X2, // 2 + (PCODE)Load_Ref_X3, // 3 +}; + +PCODE GPRegsRefStoreRoutines[] = +{ + (PCODE)Store_Ref_X0, // 0 + (PCODE)Store_Ref_X1, // 1 + (PCODE)Store_Ref_X2, // 2 + (PCODE)Store_Ref_X3, // 3 +}; + +extern "C" void Load_F0(); +extern "C" void Load_F0_F1(); +extern "C" void Load_F0_F1_F2(); +extern "C" void Load_F0_F1_F2_F3(); +extern "C" void Load_F1(); +extern "C" void Load_F1_F2(); +extern "C" void Load_F1_F2_F3(); +extern "C" void Load_F2(); +extern "C" void Load_F2_F3(); +extern "C" void Load_F3(); + +extern "C" void Store_F0(); +extern "C" void Store_F0_F1(); +extern "C" void Store_F0_F1_F2(); +extern "C" void Store_F0_F1_F2_F3(); +extern "C" void Store_F1(); +extern "C" void Store_F1_F2(); +extern "C" void Store_F1_F2_F3(); +extern "C" void Store_F2(); +extern "C" void Store_F2_F3(); +extern "C" void Store_F3(); + +PCODE FPRegsStoreRoutines[] = +{ + (PCODE)Store_F0, // 00 + (PCODE)Store_F0_F1, // 01 + (PCODE)Store_F0_F1_F2, // 02 + (PCODE)Store_F0_F1_F2_F3, // 03 + (PCODE)0, // 04 + (PCODE)Store_F1, // 05 + (PCODE)Store_F1_F2, // 06 + (PCODE)Store_F1_F2_F3, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)Store_F2, // 10 + (PCODE)Store_F2_F3, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)Store_F3, // 15 +}; + +PCODE FPRegsRoutines[] = +{ + (PCODE)Load_F0, // 00 + (PCODE)Load_F0_F1, // 01 + (PCODE)Load_F0_F1_F2, // 02 + (PCODE)Load_F0_F1_F2_F3, // 03 + (PCODE)0, // 04 + (PCODE)Load_F1, // 05 + (PCODE)Load_F1_F2, // 06 + (PCODE)Load_F1_F2_F3, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)Load_F2, // 10 + (PCODE)Load_F2_F3, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)Load_F3, // 15 +}; + +#endif // TARGET_ARM + #define LOG_COMPUTE_CALL_STUB 0 PCODE CallStubGenerator::GetStackRoutine() @@ -1586,7 +1740,11 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD m_r1 = argLocDesc.m_idxGenReg; m_r2 = m_r1 + argLocDesc.m_cGenReg - 1; } +#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE else if (argLocDesc.m_idxGenReg == m_r2 + 1 && (!pArgIt || !pArgIt->IsArgPassedByRef())) +#else + else if (argLocDesc.m_idxGenReg == m_r2 + 1) +#endif // ENREGISTERED_PARAMTYPE_MAXSIZE { // Extend an existing range, but only if the argument is not passed by reference. // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. @@ -1637,7 +1795,11 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } +#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8) && (!pArgIt || !pArgIt->IsArgPassedByRef())) +#else + else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8)) +#endif // ENREGISTERED_PARAMTYPE_MAXSIZE { // Extend an existing range, but only if the argument is at least pointer size large. // The only case when this is not true is on Apple ARM64 OSes where primitive type smaller @@ -1683,6 +1845,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD // Arguments passed by reference are handled separately, because the interpreter stores the value types on its stack by value. // So the argument loading routine needs to load the address of the argument. To avoid explosion of number of the routines, // we always process single argument passed by reference using single routine. +#ifdef ENREGISTERED_PARAMTYPE_MAXSIZE if (pArgIt != NULL && pArgIt->IsArgPassedByRef()) { int unalignedArgSize = pArgIt->GetArgSize(); @@ -1710,6 +1873,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD m_s1 = NoRange; } } +#endif // ENREGISTERED_PARAMTYPE_MAXSIZE #endif // UNIX_AMD64_ABI } From ad24b45b25844e74fcc577d54d850d1f43320e82 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 11 Sep 2025 18:14:34 +0900 Subject: [PATCH 02/33] [CLRINTRP] Make dummy asssembly functions --- src/coreclr/vm/arm/asmhelpers.S | 166 +++++++++++++++++++++++++++ src/coreclr/vm/callstubgenerator.cpp | 7 ++ 2 files changed, 173 insertions(+) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 1e485992c09ec9..e9b16a6da1d789 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -897,3 +897,169 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT eor r0, r0, r1 EPILOG_BRANCH_REG r12 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT + +LEAF_ENTRY Load_X0 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X0 + +LEAF_ENTRY Load_X0_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X0_X1 + +LEAF_ENTRY Load_X0_X1_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X0_X1_X2 + +LEAF_ENTRY Load_X0_X1_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X0_X1_X2_X3 + +LEAF_ENTRY Load_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X1 + +LEAF_ENTRY Load_X1_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X1_X2 + +LEAF_ENTRY Load_X1_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X1_X2_X3 + +LEAF_ENTRY Load_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X2 + +LEAF_ENTRY Load_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X2_X3 + +LEAF_ENTRY Load_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Load_X3 + +LEAF_ENTRY Store_X0 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X0 + +LEAF_ENTRY Store_X0_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X0_X1 + +LEAF_ENTRY Store_X0_X1_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X0_X1_X2 + +LEAF_ENTRY Store_X0_X1_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X0_X1_X2_X3 + +LEAF_ENTRY Store_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X1 + +LEAF_ENTRY Store_X1_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X1_X2 + +LEAF_ENTRY Store_X1_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X1_X2_X3 + +LEAF_ENTRY Store_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X2 + +LEAF_ENTRY Store_X2_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X2_X3 + +LEAF_ENTRY Store_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Store_X3 + +LEAF_ENTRY Load_Ref_X0 + EMIT_BREAKPOINT // TODO +LEAF_END Load_Ref_X0 + +LEAF_ENTRY Load_Ref_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Load_Ref_X1 + +LEAF_ENTRY Load_Ref_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Load_Ref_X2 + +LEAF_ENTRY Load_Ref_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Load_Ref_X3 + +LEAF_ENTRY Store_Ref_X0 + EMIT_BREAKPOINT // TODO +LEAF_END Store_Ref_X0 + +LEAF_ENTRY Store_Ref_X1 + EMIT_BREAKPOINT // TODO +LEAF_END Store_Ref_X1 + +LEAF_ENTRY Store_Ref_X2 + EMIT_BREAKPOINT // TODO +LEAF_END Store_Ref_X2 + +LEAF_ENTRY Store_Ref_X3 + EMIT_BREAKPOINT // TODO +LEAF_END Store_Ref_X3 + +LEAF_ENTRY Load_Stack + EMIT_BREAKPOINT // TODO +LEAF_END Load_Stack + +LEAF_ENTRY Store_Stack + EMIT_BREAKPOINT // TODO +LEAF_END Store_Stack + +LEAF_ENTRY Load_Stack_Ref + EMIT_BREAKPOINT // TODO +LEAF_END Load_Stack_Ref + +LEAF_ENTRY Store_Stack_Ref + EMIT_BREAKPOINT // TODO +LEAF_END Store_Stack_Ref + +NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END CallJittedMethodRetVoid, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END CallJittedMethodRetI8, _TEXT + +NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END CallJittedMethodRetDouble, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END CallJittedMethodRetBuff, _TEXT + +NESTED_ENTRY InterpreterStub, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END InterpreterStub, _TEXT + +NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END InterpreterStubRetVoid, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END InterpreterStubRetI8, _TEXT + +NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END InterpreterStubRetDouble, _TEXT + +NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END InterpreterStubRetBuff, _TEXT + + diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 50d88d0efff0e4..4cf36594c49e53 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1069,6 +1069,7 @@ PCODE GPRegsRefStoreRoutines[] = (PCODE)Store_Ref_X3, // 3 }; +#if 0 extern "C" void Load_F0(); extern "C" void Load_F0_F1(); extern "C" void Load_F0_F1_F2(); @@ -1130,6 +1131,7 @@ PCODE FPRegsRoutines[] = (PCODE)0, // 14 (PCODE)Load_F3, // 15 }; +#endif #endif // TARGET_ARM @@ -1204,7 +1206,12 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) printf("GetFPRegRangeRoutine %d %d\n", x1, x2); #endif int index = x1 * NUM_FLOAT_ARGUMENT_REGISTERS + x2; +#ifdef TARGET_ARM + _ASSERTE(!"Not supported FP reg in ARMEL"); + return NULL; +#else return m_interpreterToNative ? FPRegsRoutines[index] : FPRegsStoreRoutines[index]; +#endif } extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); From 04b980f91d5374eaadbc747caffc35531d2281a4 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 16 Sep 2025 16:11:52 +0900 Subject: [PATCH 03/33] Disable Float --- src/coreclr/vm/callstubgenerator.cpp | 64 ---------------------------- 1 file changed, 64 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 4cf36594c49e53..e7499b47341e84 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1069,70 +1069,6 @@ PCODE GPRegsRefStoreRoutines[] = (PCODE)Store_Ref_X3, // 3 }; -#if 0 -extern "C" void Load_F0(); -extern "C" void Load_F0_F1(); -extern "C" void Load_F0_F1_F2(); -extern "C" void Load_F0_F1_F2_F3(); -extern "C" void Load_F1(); -extern "C" void Load_F1_F2(); -extern "C" void Load_F1_F2_F3(); -extern "C" void Load_F2(); -extern "C" void Load_F2_F3(); -extern "C" void Load_F3(); - -extern "C" void Store_F0(); -extern "C" void Store_F0_F1(); -extern "C" void Store_F0_F1_F2(); -extern "C" void Store_F0_F1_F2_F3(); -extern "C" void Store_F1(); -extern "C" void Store_F1_F2(); -extern "C" void Store_F1_F2_F3(); -extern "C" void Store_F2(); -extern "C" void Store_F2_F3(); -extern "C" void Store_F3(); - -PCODE FPRegsStoreRoutines[] = -{ - (PCODE)Store_F0, // 00 - (PCODE)Store_F0_F1, // 01 - (PCODE)Store_F0_F1_F2, // 02 - (PCODE)Store_F0_F1_F2_F3, // 03 - (PCODE)0, // 04 - (PCODE)Store_F1, // 05 - (PCODE)Store_F1_F2, // 06 - (PCODE)Store_F1_F2_F3, // 07 - (PCODE)0, // 08 - (PCODE)0, // 09 - (PCODE)Store_F2, // 10 - (PCODE)Store_F2_F3, // 11 - (PCODE)0, // 12 - (PCODE)0, // 13 - (PCODE)0, // 14 - (PCODE)Store_F3, // 15 -}; - -PCODE FPRegsRoutines[] = -{ - (PCODE)Load_F0, // 00 - (PCODE)Load_F0_F1, // 01 - (PCODE)Load_F0_F1_F2, // 02 - (PCODE)Load_F0_F1_F2_F3, // 03 - (PCODE)0, // 04 - (PCODE)Load_F1, // 05 - (PCODE)Load_F1_F2, // 06 - (PCODE)Load_F1_F2_F3, // 07 - (PCODE)0, // 08 - (PCODE)0, // 09 - (PCODE)Load_F2, // 10 - (PCODE)Load_F2_F3, // 11 - (PCODE)0, // 12 - (PCODE)0, // 13 - (PCODE)0, // 14 - (PCODE)Load_F3, // 15 -}; -#endif - #endif // TARGET_ARM #define LOG_COMPUTE_CALL_STUB 0 From 8cd71fa03b47bf652533c34fce3b554eb7074873 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 18 Sep 2025 20:21:21 +0900 Subject: [PATCH 04/33] [INTRP] Print HelloWorld Done However, it crashes right after HellWorld print. --- src/coreclr/pal/inc/unixasmmacrosarm.inc | 21 ++++++++ src/coreclr/vm/arm/asmconstants.h | 31 ++++++++++++ src/coreclr/vm/arm/asmhelpers.S | 62 ++++++++++++++++++++++-- src/coreclr/vm/interpexec.cpp | 2 + 4 files changed, 111 insertions(+), 5 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm.inc b/src/coreclr/pal/inc/unixasmmacrosarm.inc index 54a6f7d4dc3b19..0bbe1f12f4e234 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm.inc @@ -318,3 +318,24 @@ C_FUNC(\Name): movw \DestReg, #((\Constant) & 0xFFFF) movt \DestReg, #((\Constant) >> 16) .endm + + +// thumb with PIC version +.macro INLINE_GET_TLS_VAR var + ldr r0, 2f +1: + add r0, pc, r0 + bl __tls_get_addr + b 3f + + // Inline data + // LLVM assembler has no concept of subsections and this is not expressible as + // cross-section relocation. + .p2align 2 +2: + .extern \var + .type \var, tls_object + .long \var(TLSGD) + (2b - 1b - 4) +3: +.endm + diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h index e2e52c399fde27..85c36e600a1d9f 100644 --- a/src/coreclr/vm/arm/asmconstants.h +++ b/src/coreclr/vm/arm/asmconstants.h @@ -73,6 +73,8 @@ ASMCONSTANTS_C_ASSERT(ASM_MIN_OBJECT_SIZE == MIN_OBJECT_SIZE); #define MethodTable__enum_flag_ContainsGCPointers 0x01000000 ASMCONSTANTS_C_ASSERT(MethodTable__enum_flag_ContainsGCPointers == MethodTable::enum_flag_ContainsGCPointers); +#define METHODDESC_REGISTER r12 + #define SIZEOF__MethodTable DBG_FRE(0x2c, 0x28) ASMCONSTANTS_C_ASSERT(SIZEOF__MethodTable == sizeof(MethodTable)); @@ -198,6 +200,35 @@ ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForMethod == offsetof(CallCoun ASMCONSTANTS_C_ASSERT(CallCountingStubData__TargetForThresholdReached == offsetof(CallCountingStubData, TargetForThresholdReached)) #endif // FEATURE_TIERED_COMPILATION +#define OFFSETOF__ThreadLocalInfo__m_pThread 0 +ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLocalInfo, m_pThread)) + +#ifdef FEATURE_INTERPRETER +#ifdef _DEBUG +#define OFFSETOF__InterpMethod__pCallStub 0x14 +#else +#define OFFSETOF__InterpMethod__pCallStub 0x0 +#endif +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub)) + +#ifdef TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0x660 +#else // TARGET_UNIX +#define OFFSETOF__Thread__m_pInterpThreadContext 0x0 +#endif // TARGET_UNIX +ASMCONSTANTS_C_ASSERT(OFFSETOF__Thread__m_pInterpThreadContext == offsetof(Thread, m_pInterpThreadContext)) + +#define OFFSETOF__InterpThreadContext__pStackPointer 0x8 +ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpThreadContext__pStackPointer == offsetof(InterpThreadContext, pStackPointer)) + +#define OFFSETOF__CallStubHeader__Routines 0xc +ASMCONSTANTS_C_ASSERT(OFFSETOF__CallStubHeader__Routines == offsetof(CallStubHeader, Routines)) + +#define SIZEOF__TransitionBlock 0x34 +ASMCONSTANTS_C_ASSERT(SIZEOF__TransitionBlock == sizeof(TransitionBlock)) + +#endif // FEATURE_INTERPRETER + #ifdef PROFILING_SUPPORTED #define PROFILE_ENTER 0x1 #define PROFILE_LEAVE 0x2 diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index e9b16a6da1d789..9e152428416d3e 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -899,7 +899,9 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT LEAF_ENTRY Load_X0 - EMIT_BREAKPOINT // TODO + ldr r0, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Load_X0 LEAF_ENTRY Load_X0_X1 @@ -939,7 +941,9 @@ LEAF_ENTRY Load_X3 LEAF_END Load_X3 LEAF_ENTRY Store_X0 - EMIT_BREAKPOINT // TODO + str r0, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_X0 LEAF_ENTRY Store_X0_X1 @@ -1027,7 +1031,15 @@ LEAF_ENTRY Store_Stack_Ref LEAF_END Store_Stack_Ref NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH "{lr}" + mov r8, r3 + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + ldr r5, [r6], #4 + blx r5 + add sp, sp, r8 + EPILOG_POP "{pc}" NESTED_END CallJittedMethodRetVoid, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler @@ -1043,11 +1055,51 @@ NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler NESTED_END CallJittedMethodRetBuff, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_WITH_TRANSITION_BLOCK + + // IR bytecode address + mov r9, METHODDESC_REGISTER // InterpMethod + + PROLOG_PUSH "{r0-r3}" + + INLINE_GET_TLS_VAR C_FUNC(t_CurrentThreadInfo) + mov r10, r0 + + ldr r10, [r10, #OFFSETOF__ThreadLocalInfo__m_pThread] + + ldr r5, [r10, #OFFSETOF__Thread__m_pInterpThreadContext] + cbnz r5, LOCAL_LABEL(HaveInterpThreadContext) + + mov r0, r10 + bl C_FUNC(_ZN6Thread22GetInterpThreadContextEv) // Thread::GetInterpThreadContext + mov r5, r0 + +LOCAL_LABEL(HaveInterpThreadContext): + + # RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters + EPILOG_POP "{r0-r3}" + # RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters + + ldr r7, [r9] // InterpMethod* + ldr r7, [r7, #OFFSETOF__InterpMethod__pCallStub] + add r6, r7, #OFFSETOF__CallStubHeader__Routines + ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE + ldr r5, [r6] // InterpThreadContext + #EMIT_BREAKPOINT // TODO + blx r5 + + EPILOG_WITH_TRANSITION_BLOCK_RETURN + NESTED_END InterpreterStub, _TEXT NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH {lr} + add r0, sp, #__PWTB_TransitionBlock + 4 + mov r1, r9 + mov r2, 0 + bl C_FUNC(ExecuteInterpretedMethod) + EPILOG_POP {pc} + NESTED_END InterpreterStubRetVoid, _TEXT NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index d6cc958f06341e..a9ebd14dd10cea 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -720,6 +720,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } CONTRACTL_END; + // fprintf(stderr, "[CLAMP] %s %d %p %p %p %p\n", __PRETTY_FUNCTION__, __LINE__, pInterpreterFrame, pFrame, pThreadContext, pExceptionClauseArgs); #if defined(HOST_AMD64) && defined(HOST_WINDOWS) pInterpreterFrame->SetInterpExecMethodSSP((TADDR)_rdsspq()); #endif // HOST_AMD64 && HOST_WINDOWS @@ -776,6 +777,7 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr // and we can save the IP to the frame at the suspension time. // It will be useful for testing e.g. the debug info at various locations in the current method, so let's // keep it for such purposes until we don't need it anymore. + //fprintf(stderr, "[CLAMP] %s %d %p 0x%x\n", __PRETTY_FUNCTION__, __LINE__, ip, *ip); pFrame->ip = (int32_t*)ip; switch (*ip) From 55560c4944dacf356c1b6eca9b74773acd964780 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 18 Sep 2025 20:44:37 +0900 Subject: [PATCH 05/33] [INTRP] HelloWorld Done --- src/coreclr/vm/arm/asmhelpers.S | 8 ++++---- src/coreclr/vm/interpexec.cpp | 2 -- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 9e152428416d3e..48c4cd8b56635d 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1031,15 +1031,15 @@ LEAF_ENTRY Store_Stack_Ref LEAF_END Store_Stack_Ref NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler - PROLOG_PUSH "{lr}" - mov r8, r3 + PROLOG_PUSH "{r5-r8,lr}" + mov r8, sp sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 blx r5 - add sp, sp, r8 - EPILOG_POP "{pc}" + mov sp, r8 + EPILOG_POP "{r5-r8,pc}" NESTED_END CallJittedMethodRetVoid, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index a9ebd14dd10cea..d6cc958f06341e 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -720,7 +720,6 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } CONTRACTL_END; - // fprintf(stderr, "[CLAMP] %s %d %p %p %p %p\n", __PRETTY_FUNCTION__, __LINE__, pInterpreterFrame, pFrame, pThreadContext, pExceptionClauseArgs); #if defined(HOST_AMD64) && defined(HOST_WINDOWS) pInterpreterFrame->SetInterpExecMethodSSP((TADDR)_rdsspq()); #endif // HOST_AMD64 && HOST_WINDOWS @@ -777,7 +776,6 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr // and we can save the IP to the frame at the suspension time. // It will be useful for testing e.g. the debug info at various locations in the current method, so let's // keep it for such purposes until we don't need it anymore. - //fprintf(stderr, "[CLAMP] %s %d %p 0x%x\n", __PRETTY_FUNCTION__, __LINE__, ip, *ip); pFrame->ip = (int32_t*)ip; switch (*ip) From 7df0312a59a7bcc49761eaea308985ac593dfbda Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Mon, 22 Sep 2025 14:11:16 +0900 Subject: [PATCH 06/33] [INTRP] Update X to R for arm register --- src/coreclr/vm/arm/asmhelpers.S | 112 ++++++++++++------------- src/coreclr/vm/callstubgenerator.cpp | 118 +++++++++++++-------------- 2 files changed, 115 insertions(+), 115 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 48c4cd8b56635d..0eb8e933915d6c 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -898,121 +898,121 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT EPILOG_BRANCH_REG r12 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT -LEAF_ENTRY Load_X0 +LEAF_ENTRY Load_R0 ldr r0, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Load_X0 +LEAF_END Load_R0 -LEAF_ENTRY Load_X0_X1 +LEAF_ENTRY Load_R0_R1 EMIT_BREAKPOINT // TODO -LEAF_END Load_X0_X1 +LEAF_END Load_R0_R1 -LEAF_ENTRY Load_X0_X1_X2 +LEAF_ENTRY Load_R0_R1_R2 EMIT_BREAKPOINT // TODO -LEAF_END Load_X0_X1_X2 +LEAF_END Load_R0_R1_R2 -LEAF_ENTRY Load_X0_X1_X2_X3 +LEAF_ENTRY Load_R0_R1_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Load_X0_X1_X2_X3 +LEAF_END Load_R0_R1_R2_R3 -LEAF_ENTRY Load_X1 +LEAF_ENTRY Load_R1 EMIT_BREAKPOINT // TODO -LEAF_END Load_X1 +LEAF_END Load_R1 -LEAF_ENTRY Load_X1_X2 +LEAF_ENTRY Load_R1_R2 EMIT_BREAKPOINT // TODO -LEAF_END Load_X1_X2 +LEAF_END Load_R1_R2 -LEAF_ENTRY Load_X1_X2_X3 +LEAF_ENTRY Load_R1_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Load_X1_X2_X3 +LEAF_END Load_R1_R2_R3 -LEAF_ENTRY Load_X2 +LEAF_ENTRY Load_R2 EMIT_BREAKPOINT // TODO -LEAF_END Load_X2 +LEAF_END Load_R2 -LEAF_ENTRY Load_X2_X3 +LEAF_ENTRY Load_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Load_X2_X3 +LEAF_END Load_R2_R3 -LEAF_ENTRY Load_X3 +LEAF_ENTRY Load_R3 EMIT_BREAKPOINT // TODO -LEAF_END Load_X3 +LEAF_END Load_R3 -LEAF_ENTRY Store_X0 +LEAF_ENTRY Store_R0 str r0, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 -LEAF_END Store_X0 +LEAF_END Store_R0 -LEAF_ENTRY Store_X0_X1 +LEAF_ENTRY Store_R0_R1 EMIT_BREAKPOINT // TODO -LEAF_END Store_X0_X1 +LEAF_END Store_R0_R1 -LEAF_ENTRY Store_X0_X1_X2 +LEAF_ENTRY Store_R0_R1_R2 EMIT_BREAKPOINT // TODO -LEAF_END Store_X0_X1_X2 +LEAF_END Store_R0_R1_R2 -LEAF_ENTRY Store_X0_X1_X2_X3 +LEAF_ENTRY Store_R0_R1_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Store_X0_X1_X2_X3 +LEAF_END Store_R0_R1_R2_R3 -LEAF_ENTRY Store_X1 +LEAF_ENTRY Store_R1 EMIT_BREAKPOINT // TODO -LEAF_END Store_X1 +LEAF_END Store_R1 -LEAF_ENTRY Store_X1_X2 +LEAF_ENTRY Store_R1_R2 EMIT_BREAKPOINT // TODO -LEAF_END Store_X1_X2 +LEAF_END Store_R1_R2 -LEAF_ENTRY Store_X1_X2_X3 +LEAF_ENTRY Store_R1_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Store_X1_X2_X3 +LEAF_END Store_R1_R2_R3 -LEAF_ENTRY Store_X2 +LEAF_ENTRY Store_R2 EMIT_BREAKPOINT // TODO -LEAF_END Store_X2 +LEAF_END Store_R2 -LEAF_ENTRY Store_X2_X3 +LEAF_ENTRY Store_R2_R3 EMIT_BREAKPOINT // TODO -LEAF_END Store_X2_X3 +LEAF_END Store_R2_R3 -LEAF_ENTRY Store_X3 +LEAF_ENTRY Store_R3 EMIT_BREAKPOINT // TODO -LEAF_END Store_X3 +LEAF_END Store_R3 -LEAF_ENTRY Load_Ref_X0 +LEAF_ENTRY Load_Ref_R0 EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_X0 +LEAF_END Load_Ref_R0 -LEAF_ENTRY Load_Ref_X1 +LEAF_ENTRY Load_Ref_R1 EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_X1 +LEAF_END Load_Ref_R1 -LEAF_ENTRY Load_Ref_X2 +LEAF_ENTRY Load_Ref_R2 EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_X2 +LEAF_END Load_Ref_R2 -LEAF_ENTRY Load_Ref_X3 +LEAF_ENTRY Load_Ref_R3 EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_X3 +LEAF_END Load_Ref_R3 -LEAF_ENTRY Store_Ref_X0 +LEAF_ENTRY Store_Ref_R0 EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_X0 +LEAF_END Store_Ref_R0 -LEAF_ENTRY Store_Ref_X1 +LEAF_ENTRY Store_Ref_R1 EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_X1 +LEAF_END Store_Ref_R1 -LEAF_ENTRY Store_Ref_X2 +LEAF_ENTRY Store_Ref_R2 EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_X2 +LEAF_END Store_Ref_R2 -LEAF_ENTRY Store_Ref_X3 +LEAF_ENTRY Store_Ref_R3 EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_X3 +LEAF_END Store_Ref_R3 LEAF_ENTRY Load_Stack EMIT_BREAKPOINT // TODO diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index e7499b47341e84..a73530dba8138d 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -981,92 +981,92 @@ PCODE FPRegsRoutines[] = #ifdef TARGET_ARM -extern "C" void Load_X0(); -extern "C" void Load_X0_X1(); -extern "C" void Load_X0_X1_X2(); -extern "C" void Load_X0_X1_X2_X3(); -extern "C" void Load_X1(); -extern "C" void Load_X1_X2(); -extern "C" void Load_X1_X2_X3(); -extern "C" void Load_X2(); -extern "C" void Load_X2_X3(); -extern "C" void Load_X3(); - -extern "C" void Store_X0(); -extern "C" void Store_X0_X1(); -extern "C" void Store_X0_X1_X2(); -extern "C" void Store_X0_X1_X2_X3(); -extern "C" void Store_X1(); -extern "C" void Store_X1_X2(); -extern "C" void Store_X1_X2_X3(); -extern "C" void Store_X2(); -extern "C" void Store_X2_X3(); -extern "C" void Store_X3(); - -extern "C" void Load_Ref_X0(); -extern "C" void Load_Ref_X1(); -extern "C" void Load_Ref_X2(); -extern "C" void Load_Ref_X3(); - -extern "C" void Store_Ref_X0(); -extern "C" void Store_Ref_X1(); -extern "C" void Store_Ref_X2(); -extern "C" void Store_Ref_X3(); +extern "C" void Load_R0(); +extern "C" void Load_R0_R1(); +extern "C" void Load_R0_R1_R2(); +extern "C" void Load_R0_R1_R2_R3(); +extern "C" void Load_R1(); +extern "C" void Load_R1_R2(); +extern "C" void Load_R1_R2_R3(); +extern "C" void Load_R2(); +extern "C" void Load_R2_R3(); +extern "C" void Load_R3(); + +extern "C" void Store_R0(); +extern "C" void Store_R0_R1(); +extern "C" void Store_R0_R1_R2(); +extern "C" void Store_R0_R1_R2_R3(); +extern "C" void Store_R1(); +extern "C" void Store_R1_R2(); +extern "C" void Store_R1_R2_R3(); +extern "C" void Store_R2(); +extern "C" void Store_R2_R3(); +extern "C" void Store_R3(); + +extern "C" void Load_Ref_R0(); +extern "C" void Load_Ref_R1(); +extern "C" void Load_Ref_R2(); +extern "C" void Load_Ref_R3(); + +extern "C" void Store_Ref_R0(); +extern "C" void Store_Ref_R1(); +extern "C" void Store_Ref_R2(); +extern "C" void Store_Ref_R3(); PCODE GPRegsRoutines[] = { - (PCODE)Load_X0, // 00 - (PCODE)Load_X0_X1, // 01 - (PCODE)Load_X0_X1_X2, // 02 - (PCODE)Load_X0_X1_X2_X3, // 03 + (PCODE)Load_R0, // 00 + (PCODE)Load_R0_R1, // 01 + (PCODE)Load_R0_R1_R2, // 02 + (PCODE)Load_R0_R1_R2_R3, // 03 (PCODE)0, // 04 - (PCODE)Load_X1, // 05 - (PCODE)Load_X1_X2, // 06 - (PCODE)Load_X1_X2_X3, // 07 + (PCODE)Load_R1, // 05 + (PCODE)Load_R1_R2, // 06 + (PCODE)Load_R1_R2_R3, // 07 (PCODE)0, // 08 (PCODE)0, // 09 - (PCODE)Load_X2, // 10 - (PCODE)Load_X2_X3, // 11 + (PCODE)Load_R2, // 10 + (PCODE)Load_R2_R3, // 11 (PCODE)0, // 12 (PCODE)0, // 13 (PCODE)0, // 14 - (PCODE)Load_X3, // 15 + (PCODE)Load_R3, // 15 }; PCODE GPRegsStoreRoutines[] = { - (PCODE)Store_X0, // 00 - (PCODE)Store_X0_X1, // 01 - (PCODE)Store_X0_X1_X2, // 02 - (PCODE)Store_X0_X1_X2_X3, // 03 + (PCODE)Store_R0, // 00 + (PCODE)Store_R0_R1, // 01 + (PCODE)Store_R0_R1_R2, // 02 + (PCODE)Store_R0_R1_R2_R3, // 03 (PCODE)0, // 04 - (PCODE)Store_X1, // 05 - (PCODE)Store_X1_X2, // 06 - (PCODE)Store_X1_X2_X3, // 07 + (PCODE)Store_R1, // 05 + (PCODE)Store_R1_R2, // 06 + (PCODE)Store_R1_R2_R3, // 07 (PCODE)0, // 08 (PCODE)0, // 09 - (PCODE)Store_X2, // 10 - (PCODE)Store_X2_X3, // 11 + (PCODE)Store_R2, // 10 + (PCODE)Store_R2_R3, // 11 (PCODE)0, // 12 (PCODE)0, // 13 (PCODE)0, // 14 - (PCODE)Store_X3, // 15 + (PCODE)Store_R3, // 15 }; PCODE GPRegsRefRoutines[] = { - (PCODE)Load_Ref_X0, // 0 - (PCODE)Load_Ref_X1, // 1 - (PCODE)Load_Ref_X2, // 2 - (PCODE)Load_Ref_X3, // 3 + (PCODE)Load_Ref_R0, // 0 + (PCODE)Load_Ref_R1, // 1 + (PCODE)Load_Ref_R2, // 2 + (PCODE)Load_Ref_R3, // 3 }; PCODE GPRegsRefStoreRoutines[] = { - (PCODE)Store_Ref_X0, // 0 - (PCODE)Store_Ref_X1, // 1 - (PCODE)Store_Ref_X2, // 2 - (PCODE)Store_Ref_X3, // 3 + (PCODE)Store_Ref_R0, // 0 + (PCODE)Store_Ref_R1, // 1 + (PCODE)Store_Ref_R2, // 2 + (PCODE)Store_Ref_R3, // 3 }; #endif // TARGET_ARM From 5b3ef8e1def71bd60ba6fa44cc0771e0c35d16ed Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 23 Sep 2025 17:25:40 +0900 Subject: [PATCH 07/33] [INTRP] Call methods with 2 args and return value --- .../interpreter/inc/interpretershared.h | 4 ++++ src/coreclr/vm/arm/asmhelpers.S | 24 ++++++++++++++++--- src/coreclr/vm/interpexec.cpp | 2 +- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index d9e79f3bffea57..5e6758cf8c9392 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,7 +14,11 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER +#ifdef TARGET_64BIT #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack +#else // !TARGET_64BIT +#define INTERP_STACK_SLOT_SIZE 4 // Alignment of each var offset on the interpreter stack +#endif #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame struct InterpHelperData { diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 0eb8e933915d6c..371cce5b01c135 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -905,7 +905,10 @@ LEAF_ENTRY Load_R0 LEAF_END Load_R0 LEAF_ENTRY Load_R0_R1 - EMIT_BREAKPOINT // TODO + ldr r0, [r7], #4 + ldr r1, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1 LEAF_ENTRY Load_R0_R1_R2 @@ -1033,6 +1036,7 @@ LEAF_END Store_Stack_Ref NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler PROLOG_PUSH "{r5-r8,lr}" mov r8, sp + sub sp, sp, 12 sub sp, sp, r3 mov r6, r0 mov r7, r1 @@ -1043,7 +1047,19 @@ NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler NESTED_END CallJittedMethodRetVoid, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH "{r5-r8,lr}" + mov r8, sp + sub sp, sp, 12 + str r2, [sp] + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + ldr r5, [r6], #4 + blx r5 + ldr r2, [sp] + str r0, [r2] + mov sp, r8 + EPILOG_POP "{r5-r8,pc}" NESTED_END CallJittedMethodRetI8, _TEXT NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler @@ -1094,10 +1110,12 @@ NESTED_END InterpreterStub, _TEXT NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler PROLOG_PUSH {lr} - add r0, sp, #__PWTB_TransitionBlock + 4 + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r9 mov r2, 0 bl C_FUNC(ExecuteInterpretedMethod) + add sp, sp, 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetVoid, _TEXT diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index d6cc958f06341e..4c45cb933bc275 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -344,7 +344,7 @@ typedef void (*HELPER_FTN_V_PP)(void*, void*); InterpThreadContext::InterpThreadContext() { // FIXME VirtualAlloc/mmap with INTERP_STACK_ALIGNMENT alignment - pStackStart = pStackPointer = (int8_t*)malloc(INTERP_STACK_SIZE); + pStackStart = pStackPointer = (int8_t*)aligned_alloc(INTERP_STACK_ALIGNMENT, INTERP_STACK_SIZE); pStackEnd = pStackStart + INTERP_STACK_SIZE; } From 02320b03657514e0a3c708d1c1bfc3e0de8b77e3 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 23 Sep 2025 18:31:06 +0900 Subject: [PATCH 08/33] [INTRP] Implement Args Load and Store Assemblies --- src/coreclr/vm/arm/asmhelpers.S | 91 ++++++++++++++------------------- 1 file changed, 38 insertions(+), 53 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 371cce5b01c135..2456f39549ce14 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -906,43 +906,34 @@ LEAF_END Load_R0 LEAF_ENTRY Load_R0_R1 ldr r0, [r7], #4 +ALTERNATE_ENTRY Load_R1 ldr r1, [r7], #4 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1 LEAF_ENTRY Load_R0_R1_R2 - EMIT_BREAKPOINT // TODO + ldr r0, [r7], #4 +ALTERNATE_ENTRY Load_R1_R2 + ldr r1, [r7], #4 +ALTERNATE_ENTRY Load_R2 + ldr r2, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_R2 LEAF_ENTRY Load_R0_R1_R2_R3 - EMIT_BREAKPOINT // TODO + ldr r0, [r7], #4 +ALTERNATE_ENTRY Load_R1_R2_R3 + ldr r1, [r7], #4 +ALTERNATE_ENTRY Load_R2_R3 + ldr r2, [r7], #4 +ALTERNATE_ENTRY Load_R3 + ldr r3, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_R2_R3 -LEAF_ENTRY Load_R1 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R1 - -LEAF_ENTRY Load_R1_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R1_R2 - -LEAF_ENTRY Load_R1_R2_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R1_R2_R3 - -LEAF_ENTRY Load_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R2 - -LEAF_ENTRY Load_R2_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R2_R3 - -LEAF_ENTRY Load_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Load_R3 - LEAF_ENTRY Store_R0 str r0, [r7], #4 ldr r5, [r6], #4 @@ -950,41 +941,35 @@ LEAF_ENTRY Store_R0 LEAF_END Store_R0 LEAF_ENTRY Store_R0_R1 - EMIT_BREAKPOINT // TODO + str r0, [r7], #4 +ALTERNATE_ENTRY Store_R1 + str r1, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1 LEAF_ENTRY Store_R0_R1_R2 - EMIT_BREAKPOINT // TODO + str r0, [r7], #4 +ALTERNATE_ENTRY Store_R1_R2 + str r1, [r7], #4 +ALTERNATE_ENTRY Store_R2 + str r2, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2 LEAF_ENTRY Store_R0_R1_R2_R3 - EMIT_BREAKPOINT // TODO + str r0, [r7], #4 +ALTERNATE_ENTRY Store_R1_R2_R3 + str r1, [r7], #4 +ALTERNATE_ENTRY Store_R2_R3 + str r2, [r7], #4 +ALTERNATE_ENTRY Store_R3 + str r3, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 -LEAF_ENTRY Store_R1 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R1 - -LEAF_ENTRY Store_R1_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R1_R2 - -LEAF_ENTRY Store_R1_R2_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R1_R2_R3 - -LEAF_ENTRY Store_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R2 - -LEAF_ENTRY Store_R2_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R2_R3 - -LEAF_ENTRY Store_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Store_R3 - LEAF_ENTRY Load_Ref_R0 EMIT_BREAKPOINT // TODO LEAF_END Load_Ref_R0 From cb816e69c268c5c887cfb8c3444a4797d8f9848c Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 23 Sep 2025 20:15:54 +0900 Subject: [PATCH 09/33] [INTRP] Add Load_Ref and Store_Ref --- src/coreclr/vm/arm/asmhelpers.S | 83 +++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 2456f39549ce14..608e531222616f 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -970,37 +970,71 @@ ALTERNATE_ENTRY Store_R3 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 -LEAF_ENTRY Load_Ref_R0 - EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_R0 +.macro Load_Ref argReg -LEAF_ENTRY Load_Ref_R1 - EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_R1 +LEAF_ENTRY Load_Ref_\argReg + mov \argReg, r7 + ldr r5, [r6], #4 + add r7, r7, r5 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Load_Ref_\argReg -LEAF_ENTRY Load_Ref_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_R2 +.endm -LEAF_ENTRY Load_Ref_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Load_Ref_R3 +Load_Ref R0 +Load_Ref R1 +Load_Ref R2 +Load_Ref R3 + +.macro Copy_Ref argReg + cmp r5, #8 + blt LOCAL_LABEL(CopyBy4\argReg) +LOCAL_LABEL(RefCopyLoop8\argReg): + ldr r9, [\argReg], #4 + ldr r10, [\argReg], #4 + str r9, [r7], #4 + str r10, [r7], #4 + subs r5, r5, #8 + bgt LOCAL_LABEL(RefCopyLoop8\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add r5, r5, #8 +LOCAL_LABEL(CopyBy4\argReg): + cmp r5, #4 + blt LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyLoop4\argReg): + ldr r9, [\argReg], #4 + str r9, [r7], #4 + subs r5, r5, #4 + bgt LOCAL_LABEL(RefCopyLoop4\argReg) + beq LOCAL_LABEL(RefCopyDone\argReg) + add r5, r5, #4 +LOCAL_LABEL(RefCopyLoop1\argReg): + ldrb r9, [\argReg], #1 + strb r9, [r7], #1 + subs r5, r5, #1 + bne LOCAL_LABEL(RefCopyLoop1\argReg) +LOCAL_LABEL(RefCopyDone\argReg): + // Align r7 to the stack slot size + add r7, r7, 7 + and r7, r7, 0xfffffff8 +.endm -LEAF_ENTRY Store_Ref_R0 - EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_R0 +.macro Store_Ref argReg -LEAF_ENTRY Store_Ref_R1 - EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_R1 +LEAF_ENTRY Store_Ref_\argReg + ldr r5, [r6], #4 // size of the value type + // Copy_Ref \argReg + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_Ref_\argReg -LEAF_ENTRY Store_Ref_R2 - EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_R2 +.endm -LEAF_ENTRY Store_Ref_R3 - EMIT_BREAKPOINT // TODO -LEAF_END Store_Ref_R3 +Store_Ref R0 +Store_Ref R1 +Store_Ref R2 +Store_Ref R3 LEAF_ENTRY Load_Stack EMIT_BREAKPOINT // TODO @@ -1086,7 +1120,6 @@ LOCAL_LABEL(HaveInterpThreadContext): add r6, r7, #OFFSETOF__CallStubHeader__Routines ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE ldr r5, [r6] // InterpThreadContext - #EMIT_BREAKPOINT // TODO blx r5 EPILOG_WITH_TRANSITION_BLOCK_RETURN From 0cc0032c1c8c227614883bd55fed950014adc3d2 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 23 Sep 2025 20:21:47 +0900 Subject: [PATCH 10/33] [INTRP] Return I8 --- src/coreclr/vm/arm/asmhelpers.S | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 608e531222616f..700b8b1043faed 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1139,7 +1139,15 @@ NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler NESTED_END InterpreterStubRetVoid, _TEXT NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r9 + mov r2, 0 + bl C_FUNC(ExecuteInterpretedMethod) + ldr r0, [r0] + add sp, sp, 12 + EPILOG_POP {pc} NESTED_END InterpreterStubRetI8, _TEXT NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler From a09723da039ee3e37edd6fa5a15a8f028ac5046a Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 23 Sep 2025 20:35:54 +0900 Subject: [PATCH 11/33] [INTRP] Update I8 to I4 for ARM32 --- src/coreclr/vm/arm/asmhelpers.S | 12 ++++++++++-- src/coreclr/vm/callstubgenerator.cpp | 22 ++++++++++++++++++++-- src/coreclr/vm/callstubgenerator.h | 1 + 3 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 700b8b1043faed..2234473e533696 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1065,7 +1065,7 @@ NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler EPILOG_POP "{r5-r8,pc}" NESTED_END CallJittedMethodRetVoid, _TEXT -NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler +NESTED_ENTRY CallJittedMethodRetI4, _TEXT, NoHandler PROLOG_PUSH "{r5-r8,lr}" mov r8, sp sub sp, sp, 12 @@ -1079,6 +1079,10 @@ NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler str r0, [r2] mov sp, r8 EPILOG_POP "{r5-r8,pc}" +NESTED_END CallJittedMethodRetI4, _TEXT + +NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO NESTED_END CallJittedMethodRetI8, _TEXT NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler @@ -1138,7 +1142,7 @@ NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler NESTED_END InterpreterStubRetVoid, _TEXT -NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler +NESTED_ENTRY InterpreterStubRetI4, _TEXT, NoHandler PROLOG_PUSH {lr} sub sp, sp, 12 add r0, sp, #__PWTB_TransitionBlock + 16 @@ -1148,6 +1152,10 @@ NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler ldr r0, [r0] add sp, sp, 12 EPILOG_POP {pc} +NESTED_END InterpreterStubRetI4, _TEXT + +NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO NESTED_END InterpreterStubRetI8, _TEXT NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index a73530dba8138d..98e256f069afe7 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1153,9 +1153,15 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +#ifndef TARGET_64BIT +extern "C" void CallJittedMethodRetI4(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +#endif // !TARGET_64BIT extern "C" void InterpreterStubRetVoid(); extern "C" void InterpreterStubRetDouble(); extern "C" void InterpreterStubRetI8(); +#ifndef TARGET_64BIT +extern "C" void InterpreterStubRetI4(); +#endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) extern "C" void CallJittedMethodRetBuffRCX(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); @@ -1219,6 +1225,10 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt INVOKE_FUNCTION_PTR(CallJittedMethodRetDouble); case ReturnTypeI8: INVOKE_FUNCTION_PTR(CallJittedMethodRetI8); +#ifndef TARGET_64BIT + case ReturnTypeI4: + INVOKE_FUNCTION_PTR(CallJittedMethodRetI4); +#endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: INVOKE_FUNCTION_PTR(CallJittedMethodRetBuffRCX); @@ -1284,6 +1294,10 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu RETURN_TYPE_HANDLER(InterpreterStubRetDouble); case ReturnTypeI8: RETURN_TYPE_HANDLER(InterpreterStubRetI8); +#ifndef TARGET_64BIT + case ReturnTypeI4: + RETURN_TYPE_HANDLER(InterpreterStubRetI4); +#endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: RETURN_TYPE_HANDLER(InterpreterStubRetBuffRCX); @@ -1852,8 +1866,6 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg case ELEMENT_TYPE_U2: case ELEMENT_TYPE_I4: case ELEMENT_TYPE_U4: - case ELEMENT_TYPE_I8: - case ELEMENT_TYPE_U8: case ELEMENT_TYPE_I: case ELEMENT_TYPE_U: case ELEMENT_TYPE_CLASS: @@ -1865,6 +1877,12 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg case ELEMENT_TYPE_ARRAY: case ELEMENT_TYPE_SZARRAY: case ELEMENT_TYPE_FNPTR: +#ifndef TARGET_64BIT + return ReturnTypeI4; + break; +#endif + case ELEMENT_TYPE_I8: + case ELEMENT_TYPE_U8: return ReturnTypeI8; break; case ELEMENT_TYPE_R4: diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 7fb9eb6c5f86cb..9d73aa54c92010 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -68,6 +68,7 @@ class CallStubGenerator enum ReturnType { ReturnTypeVoid, + ReturnTypeI4, ReturnTypeI8, ReturnTypeDouble, #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) From 07d975a068c72b7e15bca8438fde8f0a944b917b Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 24 Sep 2025 16:50:43 +0900 Subject: [PATCH 12/33] [INTRP] Call to Method with LONG args --- src/coreclr/interpreter/compiler.cpp | 17 +++++++++++++++++ src/coreclr/vm/arm/asmhelpers.S | 13 +++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 851580db7fafb8..2ba06d61d5cfe7 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -684,6 +684,12 @@ void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE cls int size = m_compHnd->getClassSize(clsHnd); PushTypeExplicit(stackType, clsHnd, size); } +#ifndef TARGET_64BIT + else if (stackType == StackTypeI8) + { + PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE * 2); + } +#endif // !TARGET_64BIT else { // We don't really care about the exact size for non-valuetypes @@ -1771,7 +1777,11 @@ void InterpCompiler::EmitConv(StackInfo *sp, StackType type, InterpOpcode convOp InterpInst *newInst = AddIns(convOp); newInst->SetSVar(sp->var); +#ifndef TARGET_64BIT + int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, type == StackTypeI8 ? INTERP_STACK_SLOT_SIZE * 2 : INTERP_STACK_SLOT_SIZE); +#else // TARGET_64BIT int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); +#endif // !TARGET_64BIT new (sp) StackInfo(type, NULL, var); newInst->SetDVar(var); @@ -1836,6 +1846,13 @@ int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, Inte if (align < INTERP_STACK_SLOT_SIZE) align = INTERP_STACK_SLOT_SIZE; } +#ifndef TARGET_64BIT + else if (interpType == InterpTypeI8) + { + size = INTERP_STACK_SLOT_SIZE * 2; // not really + align = INTERP_STACK_SLOT_SIZE * 2; + } +#endif // !TARGET_64BIT else { size = INTERP_STACK_SLOT_SIZE; // not really diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 2234473e533696..f853a0a284ba8c 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1139,7 +1139,6 @@ NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler bl C_FUNC(ExecuteInterpretedMethod) add sp, sp, 12 EPILOG_POP {pc} - NESTED_END InterpreterStubRetVoid, _TEXT NESTED_ENTRY InterpreterStubRetI4, _TEXT, NoHandler @@ -1155,7 +1154,17 @@ NESTED_ENTRY InterpreterStubRetI4, _TEXT, NoHandler NESTED_END InterpreterStubRetI4, _TEXT NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r9 + mov r2, 0 + bl C_FUNC(ExecuteInterpretedMethod) + ldr r2, [r0] + ldr r1, [r0, 4] + mov r0, r2 + add sp, sp, 12 + EPILOG_POP {pc} NESTED_END InterpreterStubRetI8, _TEXT NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler From 9875d8b3e22a6699d95467204666aba804d3ebda Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 25 Sep 2025 12:59:12 +0900 Subject: [PATCH 13/33] [INTRP] Initial Support for Float and Double --- src/coreclr/interpreter/compiler.cpp | 6 +++--- src/coreclr/vm/arm/asmhelpers.S | 31 +++++++++++++++++++++++++++- src/coreclr/vm/callstubgenerator.cpp | 10 +++++++++ src/coreclr/vm/callstubgenerator.h | 5 ++++- 4 files changed, 47 insertions(+), 5 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 2ba06d61d5cfe7..e1565b4d5672b9 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -685,7 +685,7 @@ void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE cls PushTypeExplicit(stackType, clsHnd, size); } #ifndef TARGET_64BIT - else if (stackType == StackTypeI8) + else if (stackType == StackTypeI8 || stackType == StackTypeR8) { PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE * 2); } @@ -1778,7 +1778,7 @@ void InterpCompiler::EmitConv(StackInfo *sp, StackType type, InterpOpcode convOp newInst->SetSVar(sp->var); #ifndef TARGET_64BIT - int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, type == StackTypeI8 ? INTERP_STACK_SLOT_SIZE * 2 : INTERP_STACK_SLOT_SIZE); + int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, (type == StackTypeI8 || type == StackTypeR8) ? INTERP_STACK_SLOT_SIZE * 2 : INTERP_STACK_SLOT_SIZE); #else // TARGET_64BIT int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); #endif // !TARGET_64BIT @@ -1847,7 +1847,7 @@ int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, Inte align = INTERP_STACK_SLOT_SIZE; } #ifndef TARGET_64BIT - else if (interpType == InterpTypeI8) + else if (interpType == InterpTypeI8 || interpType == InterpTypeR8) { size = INTERP_STACK_SLOT_SIZE * 2; // not really align = INTERP_STACK_SLOT_SIZE * 2; diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index f853a0a284ba8c..acd11486830ccd 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -970,6 +970,7 @@ ALTERNATE_ENTRY Store_R3 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 +// TODO: *ref is not tested yet. .macro Load_Ref argReg LEAF_ENTRY Load_Ref_\argReg @@ -1085,6 +1086,10 @@ NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler EMIT_BREAKPOINT // TODO NESTED_END CallJittedMethodRetI8, _TEXT +NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler + EMIT_BREAKPOINT // TODO +NESTED_END CallJittedMethodRetFloat, _TEXT + NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler EMIT_BREAKPOINT // TODO NESTED_END CallJittedMethodRetDouble, _TEXT @@ -1167,8 +1172,32 @@ NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler EPILOG_POP {pc} NESTED_END InterpreterStubRetI8, _TEXT +// This is for arm softfp. It is the same to InterpreterStubRetI4. +NESTED_ENTRY InterpreterStubRetFloat, _TEXT, NoHandler + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r9 + mov r2, 0 + bl C_FUNC(ExecuteInterpretedMethod) + ldr r0, [r0] + add sp, sp, 12 + EPILOG_POP {pc} +NESTED_END InterpreterStubRetDouble, _TEXT + +// This is for arm softfp. It is the same to InterpreterStubRetI8. NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r9 + mov r2, 0 + bl C_FUNC(ExecuteInterpretedMethod) + ldr r2, [r0] + ldr r1, [r0, 4] + mov r0, r2 + add sp, sp, 12 + EPILOG_POP {pc} NESTED_END InterpreterStubRetDouble, _TEXT NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 98e256f069afe7..947e8349e2608c 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1155,12 +1155,14 @@ extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t* extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); #ifndef TARGET_64BIT extern "C" void CallJittedMethodRetI4(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetFloat(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); #endif // !TARGET_64BIT extern "C" void InterpreterStubRetVoid(); extern "C" void InterpreterStubRetDouble(); extern "C" void InterpreterStubRetI8(); #ifndef TARGET_64BIT extern "C" void InterpreterStubRetI4(); +extern "C" void InterpreterStubRetFloat(); #endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) @@ -1228,6 +1230,8 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt #ifndef TARGET_64BIT case ReturnTypeI4: INVOKE_FUNCTION_PTR(CallJittedMethodRetI4); + case ReturnTypeFloat: + INVOKE_FUNCTION_PTR(CallJittedMethodRetFloat); #endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: @@ -1297,6 +1301,8 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu #ifndef TARGET_64BIT case ReturnTypeI4: RETURN_TYPE_HANDLER(InterpreterStubRetI4); + case ReturnTypeFloat: + RETURN_TYPE_HANDLER(InterpreterStubRetFloat); #endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: @@ -1886,6 +1892,10 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg return ReturnTypeI8; break; case ELEMENT_TYPE_R4: +#ifndef TARGET_64BIT + return ReturnTypeFloat; + break; +#endif case ELEMENT_TYPE_R8: return ReturnTypeDouble; break; diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 9d73aa54c92010..7c644c78697b6a 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -68,9 +68,12 @@ class CallStubGenerator enum ReturnType { ReturnTypeVoid, - ReturnTypeI4, ReturnTypeI8, ReturnTypeDouble, +#ifndef TARGET_64BIT + ReturnTypeI4, + ReturnTypeFloat, +#endif // TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) ReturnTypeBuffArg1, ReturnTypeBuffArg2, From 4d7d19f763111f2d36a8c16809940b1601346e2b Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 25 Sep 2025 15:15:32 +0900 Subject: [PATCH 14/33] [INTRP] Update for ARM SOFTFP --- src/coreclr/vm/arm/asmhelpers.S | 39 ++++++++++++---------------- src/coreclr/vm/callstubgenerator.cpp | 16 +++++++++--- 2 files changed, 28 insertions(+), 27 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index acd11486830ccd..8f34ec645221ea 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1083,7 +1083,20 @@ NESTED_ENTRY CallJittedMethodRetI4, _TEXT, NoHandler NESTED_END CallJittedMethodRetI4, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH "{r5-r8,lr}" + mov r8, sp + sub sp, sp, 12 + str r2, [sp] + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + ldr r5, [r6], #4 + blx r5 + ldr r2, [sp] + str r0, [r2] + str r1, [r2, 4] + mov sp, r8 + EPILOG_POP "{r5-r8,pc}" NESTED_END CallJittedMethodRetI8, _TEXT NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler @@ -1172,32 +1185,12 @@ NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler EPILOG_POP {pc} NESTED_END InterpreterStubRetI8, _TEXT -// This is for arm softfp. It is the same to InterpreterStubRetI4. NESTED_ENTRY InterpreterStubRetFloat, _TEXT, NoHandler - PROLOG_PUSH {lr} - sub sp, sp, 12 - add r0, sp, #__PWTB_TransitionBlock + 16 - mov r1, r9 - mov r2, 0 - bl C_FUNC(ExecuteInterpretedMethod) - ldr r0, [r0] - add sp, sp, 12 - EPILOG_POP {pc} + EMIT_BREAKPOINT // TODO NESTED_END InterpreterStubRetDouble, _TEXT -// This is for arm softfp. It is the same to InterpreterStubRetI8. NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler - PROLOG_PUSH {lr} - sub sp, sp, 12 - add r0, sp, #__PWTB_TransitionBlock + 16 - mov r1, r9 - mov r2, 0 - bl C_FUNC(ExecuteInterpretedMethod) - ldr r2, [r0] - ldr r1, [r0, 4] - mov r0, r2 - add sp, sp, 12 - EPILOG_POP {pc} + EMIT_BREAKPOINT // TODO NESTED_END InterpreterStubRetDouble, _TEXT NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 947e8349e2608c..37fb4414e15cb7 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1224,14 +1224,18 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt case ReturnTypeVoid: INVOKE_FUNCTION_PTR(CallJittedMethodRetVoid); case ReturnTypeDouble: +#ifndef ARM_SOFTFP INVOKE_FUNCTION_PTR(CallJittedMethodRetDouble); +#endif // !ARM_SOFTFP case ReturnTypeI8: INVOKE_FUNCTION_PTR(CallJittedMethodRetI8); #ifndef TARGET_64BIT - case ReturnTypeI4: - INVOKE_FUNCTION_PTR(CallJittedMethodRetI4); case ReturnTypeFloat: +#ifndef ARM_SOFTFP INVOKE_FUNCTION_PTR(CallJittedMethodRetFloat); +#endif // !ARM_SOFTFP + case ReturnTypeI4: + INVOKE_FUNCTION_PTR(CallJittedMethodRetI4); #endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: @@ -1295,14 +1299,18 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu case ReturnTypeVoid: RETURN_TYPE_HANDLER(InterpreterStubRetVoid); case ReturnTypeDouble: +#ifndef ARM_SOFTFP RETURN_TYPE_HANDLER(InterpreterStubRetDouble); +#endif // !ARM_SOFTFP case ReturnTypeI8: RETURN_TYPE_HANDLER(InterpreterStubRetI8); #ifndef TARGET_64BIT - case ReturnTypeI4: - RETURN_TYPE_HANDLER(InterpreterStubRetI4); case ReturnTypeFloat: +#ifndef ARM_SOFTFP RETURN_TYPE_HANDLER(InterpreterStubRetFloat); +#endif // !ARM_SOFTFP + case ReturnTypeI4: + RETURN_TYPE_HANDLER(InterpreterStubRetI4); #endif // !TARGET_64BIT #if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) case ReturnTypeBuffArg1: From ab7eca6cde8515fac5c632906e73f47b06309766 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 25 Sep 2025 15:48:00 +0900 Subject: [PATCH 15/33] [INTRP] Update all assemblies for arm softp Need to test the implementations --- src/coreclr/vm/arm/asmhelpers.S | 54 ++++++++++++++++++++++++++++++--- 1 file changed, 49 insertions(+), 5 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 8f34ec645221ea..a81c754e45e440 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1038,19 +1038,53 @@ Store_Ref R2 Store_Ref R3 LEAF_ENTRY Load_Stack - EMIT_BREAKPOINT // TODO + ldr r8, [r6], #4 // SP offset + ldr r9, [r6], #4 // number of stack slots + add r8, sp, r8 +LOCAL_LABEL(CopyLoop): + ldr r5, [r7], #4 + str r5, [r8], #4 + subs r9, r9, #4 + bne LOCAL_LABEL(CopyLoop) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Load_Stack LEAF_ENTRY Store_Stack - EMIT_BREAKPOINT // TODO + ldr r8, [r6], #4 // SP offset + ldr r9, [r6], #4 // number of stack slots + add r8, sp, r8 + add r8, r8, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop): + ldr r5, [r8], #4 + str r5, [r7], #4 + subs r9, r9, #4 + bne LOCAL_LABEL(StoreCopyLoop) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_Stack LEAF_ENTRY Load_Stack_Ref - EMIT_BREAKPOINT // TODO + ldr r8, [r6], #4 // SP offset + ldr r9, [r6], #4 // size of the value type + add r8, sp,r8 + str r7, [r8] + add r7, r7, r9 + // Align r7 to the stack slot size + add r7, r7, 3 + and r7, r7, 0xfffffffc + ldr r5, [r6], #8 + EPILOG_BRANCH_REG r5 LEAF_END Load_Stack_Ref LEAF_ENTRY Store_Stack_Ref - EMIT_BREAKPOINT // TODO + ldr r8, [r6], #4 // SP offset + ldr r9, [r6], #4 // size of the value type + add r8, sp, r8 + ldr r8, [r8, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] + Copy_Ref r8 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 LEAF_END Store_Stack_Ref NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler @@ -1108,7 +1142,17 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler NESTED_END CallJittedMethodRetDouble, _TEXT NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO + PROLOG_PUSH "{r5-r9,lr}" + mov r9, sp + sub sp, sp, 8 + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + mov r8, r2 + ldr r5, [r6], #4 + blx r5 + mov sp, r9 + EPILOG_POP "{r5-r9,lr}" NESTED_END CallJittedMethodRetBuff, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler From 1cf65b78ff40f6697fdfbbe80537a08918a8f5a0 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 25 Sep 2025 19:10:30 +0900 Subject: [PATCH 16/33] [INTRP] Can Pass Arguments using Stack --- src/coreclr/vm/arm/asmhelpers.S | 92 ++++++++++++---------------- src/coreclr/vm/callstubgenerator.cpp | 26 +++++++- 2 files changed, 63 insertions(+), 55 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index a81c754e45e440..78025073c09eb9 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -993,9 +993,9 @@ Load_Ref R3 blt LOCAL_LABEL(CopyBy4\argReg) LOCAL_LABEL(RefCopyLoop8\argReg): ldr r9, [\argReg], #4 - ldr r10, [\argReg], #4 + ldr r4, [\argReg], #4 str r9, [r7], #4 - str r10, [r7], #4 + str r4, [r7], #4 subs r5, r5, #8 bgt LOCAL_LABEL(RefCopyLoop8\argReg) beq LOCAL_LABEL(RefCopyDone\argReg) @@ -1025,7 +1025,7 @@ LOCAL_LABEL(RefCopyDone\argReg): LEAF_ENTRY Store_Ref_\argReg ldr r5, [r6], #4 // size of the value type - // Copy_Ref \argReg + Copy_Ref \argReg ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_Ref_\argReg @@ -1051,14 +1051,14 @@ LOCAL_LABEL(CopyLoop): LEAF_END Load_Stack LEAF_ENTRY Store_Stack - ldr r8, [r6], #4 // SP offset - ldr r9, [r6], #4 // number of stack slots - add r8, sp, r8 - add r8, r8, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock + ldr r9, [r6], #4 // SP offset + ldr r10, [r6], #4 // number of stack slots + add r9, sp, r9 + add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock LOCAL_LABEL(StoreCopyLoop): - ldr r5, [r8], #4 + ldr r5, [r9], #4 str r5, [r7], #4 - subs r9, r9, #4 + subs r10, r10, #4 bne LOCAL_LABEL(StoreCopyLoop) ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 @@ -1079,7 +1079,7 @@ LEAF_END Load_Stack_Ref LEAF_ENTRY Store_Stack_Ref ldr r8, [r6], #4 // SP offset - ldr r9, [r6], #4 // size of the value type + ldr r5, [r6], #4 // size of the value type add r8, sp, r8 ldr r8, [r8, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] Copy_Ref r8 @@ -1088,49 +1088,47 @@ LEAF_ENTRY Store_Stack_Ref LEAF_END Store_Stack_Ref NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler - PROLOG_PUSH "{r5-r8,lr}" - mov r8, sp - sub sp, sp, 12 + PROLOG_PUSH "{r4-r9,lr}" + mov r4, sp + sub sp, sp, 4 sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 blx r5 - mov sp, r8 - EPILOG_POP "{r5-r8,pc}" + mov sp, r4 + EPILOG_POP "{r4-r9,pc}" NESTED_END CallJittedMethodRetVoid, _TEXT NESTED_ENTRY CallJittedMethodRetI4, _TEXT, NoHandler - PROLOG_PUSH "{r5-r8,lr}" - mov r8, sp - sub sp, sp, 12 - str r2, [sp] + PROLOG_PUSH "{r4-r9,lr}" + push { r2 } + mov r4, sp sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 blx r5 - ldr r2, [sp] + mov sp, r4 + pop { r2 } str r0, [r2] - mov sp, r8 - EPILOG_POP "{r5-r8,pc}" + EPILOG_POP "{r4-r9,pc}" NESTED_END CallJittedMethodRetI4, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler - PROLOG_PUSH "{r5-r8,lr}" - mov r8, sp - sub sp, sp, 12 - str r2, [sp] + PROLOG_PUSH "{r4-r9,lr}" + push { r2 } + mov r4, sp sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 blx r5 - ldr r2, [sp] + mov sp, r4 + pop { r2 } str r0, [r2] str r1, [r2, 4] - mov sp, r8 - EPILOG_POP "{r5-r8,pc}" + EPILOG_POP "{r4-r9,pc}" NESTED_END CallJittedMethodRetI8, _TEXT NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler @@ -1142,50 +1140,40 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler NESTED_END CallJittedMethodRetDouble, _TEXT NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler - PROLOG_PUSH "{r5-r9,lr}" - mov r9, sp - sub sp, sp, 8 - sub sp, sp, r3 - mov r6, r0 - mov r7, r1 - mov r8, r2 - ldr r5, [r6], #4 - blx r5 - mov sp, r9 - EPILOG_POP "{r5-r9,lr}" + EMIT_BREAKPOINT // TODO NESTED_END CallJittedMethodRetBuff, _TEXT NESTED_ENTRY InterpreterStub, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK // IR bytecode address - mov r9, METHODDESC_REGISTER // InterpMethod + mov r4, METHODDESC_REGISTER // InterpMethod - PROLOG_PUSH "{r0-r3}" + PROLOG_PUSH "{r0-r4}" INLINE_GET_TLS_VAR C_FUNC(t_CurrentThreadInfo) - mov r10, r0 + mov r6, r0 - ldr r10, [r10, #OFFSETOF__ThreadLocalInfo__m_pThread] + ldr r6, [r6, #OFFSETOF__ThreadLocalInfo__m_pThread] - ldr r5, [r10, #OFFSETOF__Thread__m_pInterpThreadContext] + ldr r5, [r6, #OFFSETOF__Thread__m_pInterpThreadContext] cbnz r5, LOCAL_LABEL(HaveInterpThreadContext) - mov r0, r10 + mov r0, r6 bl C_FUNC(_ZN6Thread22GetInterpThreadContextEv) // Thread::GetInterpThreadContext mov r5, r0 LOCAL_LABEL(HaveInterpThreadContext): # RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters - EPILOG_POP "{r0-r3}" + EPILOG_POP "{r0-r4}" # RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters - ldr r7, [r9] // InterpMethod* + ldr r7, [r4] // InterpMethod* ldr r7, [r7, #OFFSETOF__InterpMethod__pCallStub] add r6, r7, #OFFSETOF__CallStubHeader__Routines ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE - ldr r5, [r6] // InterpThreadContext + ldr r5, [r6], 4 // InterpThreadContext blx r5 EPILOG_WITH_TRANSITION_BLOCK_RETURN @@ -1196,7 +1184,7 @@ NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler PROLOG_PUSH {lr} sub sp, sp, 12 add r0, sp, #__PWTB_TransitionBlock + 16 - mov r1, r9 + mov r1, r4 mov r2, 0 bl C_FUNC(ExecuteInterpretedMethod) add sp, sp, 12 @@ -1207,7 +1195,7 @@ NESTED_ENTRY InterpreterStubRetI4, _TEXT, NoHandler PROLOG_PUSH {lr} sub sp, sp, 12 add r0, sp, #__PWTB_TransitionBlock + 16 - mov r1, r9 + mov r1, r4 mov r2, 0 bl C_FUNC(ExecuteInterpretedMethod) ldr r0, [r0] @@ -1219,7 +1207,7 @@ NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler PROLOG_PUSH {lr} sub sp, sp, 12 add r0, sp, #__PWTB_TransitionBlock + 16 - mov r1, r9 + mov r1, r4 mov r2, 0 bl C_FUNC(ExecuteInterpretedMethod) ldr r2, [r0] diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 37fb4414e15cb7..eefdb5a7a8c49f 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1653,7 +1653,12 @@ void CallStubGenerator::ComputeCallStub(MetaSig &sig, PCODE *pRoutines) else if (m_s1 != NoRange) { pRoutines[m_routineIndex++] = GetStackRoutine(); +#ifdef TARGET_64BIT pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; +#else // !TARGET_64BIT + pRoutines[m_routineIndex++] = m_s1; + pRoutines[m_routineIndex++] = m_s2 - m_s1 + 1; +#endif // TARGET_64BIT } ReturnType returnType = GetReturnType(&argIt); @@ -1696,7 +1701,12 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD // No stack argument is used to pass the current argument, but we already have a range of stack arguments, // store the routine for the range pRoutines[m_routineIndex++] = GetStackRoutine(); +#ifdef TARGET_64BIT pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; +#else // !TARGET_64BIT + pRoutines[m_routineIndex++] = m_s1; + pRoutines[m_routineIndex++] = m_s2 - m_s1 + 1; +#endif // TARGET_64BIT m_s1 = NoRange; } @@ -1767,9 +1777,9 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } #ifdef ENREGISTERED_PARAMTYPE_MAXSIZE - else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8) && (!pArgIt || !pArgIt->IsArgPassedByRef())) + else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= TARGET_POINTER_SIZE) && (!pArgIt || !pArgIt->IsArgPassedByRef())) #else - else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= 8)) + else if ((argLocDesc.m_byteStackIndex == m_s2 + 1) && (argLocDesc.m_byteStackSize >= TARGET_POINTER_SIZE)) #endif // ENREGISTERED_PARAMTYPE_MAXSIZE { // Extend an existing range, but only if the argument is at least pointer size large. @@ -1782,7 +1792,12 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD { // Discontinuous range - store a routine for the current and start a new one pRoutines[m_routineIndex++] = GetStackRoutine(); +#ifdef TARGET_64BIT pRoutines[m_routineIndex++] = ((int64_t)(m_s2 - m_s1 + 1) << 32) | m_s1; +#else // !TARGET_64BIT + pRoutines[m_routineIndex++] = m_s1; + pRoutines[m_routineIndex++] = m_s2 - m_s1 + 1; +#endif // TARGET_64BIT m_s1 = argLocDesc.m_byteStackIndex; m_s2 = m_s1 + argLocDesc.m_byteStackSize - 1; } @@ -1827,7 +1842,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD // automatically do alignment of the stack offset themselves when updating the stack offset, // and if we were to pass them aligned sizes they would potentially read bytes past the end of the VT. int alignedArgSize = m_interpreterToNative - ? ALIGN_UP(unalignedArgSize, 8) + ? ALIGN_UP(unalignedArgSize, TARGET_POINTER_SIZE) : unalignedArgSize; if (argLocDesc.m_cGenReg == 1) @@ -1840,7 +1855,12 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD { _ASSERTE(argLocDesc.m_byteStackIndex != -1); pRoutines[m_routineIndex++] = GetStackRefRoutine(); +#ifdef TARGET_64BIT pRoutines[m_routineIndex++] = ((int64_t)alignedArgSize << 32) | argLocDesc.m_byteStackIndex; +#else // !TARGET_64BIT + pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; + pRoutines[m_routineIndex++] = alignedArgSize; +#endif // TARGET_64BIT m_s1 = NoRange; } } From aabd45ba9d3304a9e9263d5fd593fad8f6d290cd Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Fri, 26 Sep 2025 13:10:42 +0900 Subject: [PATCH 17/33] [INTRP] Remove Redundant Codes --- src/coreclr/vm/arm/asmhelpers.S | 90 ---------------------------- src/coreclr/vm/callstubgenerator.cpp | 39 ++---------- src/coreclr/vm/callstubgenerator.h | 4 +- 3 files changed, 6 insertions(+), 127 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 78025073c09eb9..6a3ede90ce6640 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -970,73 +970,6 @@ ALTERNATE_ENTRY Store_R3 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 -// TODO: *ref is not tested yet. -.macro Load_Ref argReg - -LEAF_ENTRY Load_Ref_\argReg - mov \argReg, r7 - ldr r5, [r6], #4 - add r7, r7, r5 - ldr r5, [r6], #4 - EPILOG_BRANCH_REG r5 -LEAF_END Load_Ref_\argReg - -.endm - -Load_Ref R0 -Load_Ref R1 -Load_Ref R2 -Load_Ref R3 - -.macro Copy_Ref argReg - cmp r5, #8 - blt LOCAL_LABEL(CopyBy4\argReg) -LOCAL_LABEL(RefCopyLoop8\argReg): - ldr r9, [\argReg], #4 - ldr r4, [\argReg], #4 - str r9, [r7], #4 - str r4, [r7], #4 - subs r5, r5, #8 - bgt LOCAL_LABEL(RefCopyLoop8\argReg) - beq LOCAL_LABEL(RefCopyDone\argReg) - add r5, r5, #8 -LOCAL_LABEL(CopyBy4\argReg): - cmp r5, #4 - blt LOCAL_LABEL(RefCopyLoop1\argReg) -LOCAL_LABEL(RefCopyLoop4\argReg): - ldr r9, [\argReg], #4 - str r9, [r7], #4 - subs r5, r5, #4 - bgt LOCAL_LABEL(RefCopyLoop4\argReg) - beq LOCAL_LABEL(RefCopyDone\argReg) - add r5, r5, #4 -LOCAL_LABEL(RefCopyLoop1\argReg): - ldrb r9, [\argReg], #1 - strb r9, [r7], #1 - subs r5, r5, #1 - bne LOCAL_LABEL(RefCopyLoop1\argReg) -LOCAL_LABEL(RefCopyDone\argReg): - // Align r7 to the stack slot size - add r7, r7, 7 - and r7, r7, 0xfffffff8 -.endm - -.macro Store_Ref argReg - -LEAF_ENTRY Store_Ref_\argReg - ldr r5, [r6], #4 // size of the value type - Copy_Ref \argReg - ldr r5, [r6], #4 - EPILOG_BRANCH_REG r5 -LEAF_END Store_Ref_\argReg - -.endm - -Store_Ref R0 -Store_Ref R1 -Store_Ref R2 -Store_Ref R3 - LEAF_ENTRY Load_Stack ldr r8, [r6], #4 // SP offset ldr r9, [r6], #4 // number of stack slots @@ -1064,29 +997,6 @@ LOCAL_LABEL(StoreCopyLoop): EPILOG_BRANCH_REG r5 LEAF_END Store_Stack -LEAF_ENTRY Load_Stack_Ref - ldr r8, [r6], #4 // SP offset - ldr r9, [r6], #4 // size of the value type - add r8, sp,r8 - str r7, [r8] - add r7, r7, r9 - // Align r7 to the stack slot size - add r7, r7, 3 - and r7, r7, 0xfffffffc - ldr r5, [r6], #8 - EPILOG_BRANCH_REG r5 -LEAF_END Load_Stack_Ref - -LEAF_ENTRY Store_Stack_Ref - ldr r8, [r6], #4 // SP offset - ldr r5, [r6], #4 // size of the value type - add r8, sp, r8 - ldr r8, [r8, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock] - Copy_Ref r8 - ldr r5, [r6], #4 - EPILOG_BRANCH_REG r5 -LEAF_END Store_Stack_Ref - NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler PROLOG_PUSH "{r4-r9,lr}" mov r4, sp diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index eefdb5a7a8c49f..ad93eeb2106f79 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -19,10 +19,10 @@ extern "C" void Store_Stack_2B(); extern "C" void Store_Stack_4B(); #endif // TARGET_APPLE && TARGET_ARM64 -#ifndef UNIX_AMD64_ABI +#if !defined(UNIX_AMD64_ABI) && defined(ENREGISTERED_PARAMTYPE_MAXSIZE) extern "C" void Load_Stack_Ref(); extern "C" void Store_Stack_Ref(); -#endif // !UNIX_AMD64_ABI +#endif // !UNIX_AMD64_ABI && ENREGISTERED_PARAMTYPE_MAXSIZE #ifdef TARGET_AMD64 @@ -1003,16 +1003,6 @@ extern "C" void Store_R2(); extern "C" void Store_R2_R3(); extern "C" void Store_R3(); -extern "C" void Load_Ref_R0(); -extern "C" void Load_Ref_R1(); -extern "C" void Load_Ref_R2(); -extern "C" void Load_Ref_R3(); - -extern "C" void Store_Ref_R0(); -extern "C" void Store_Ref_R1(); -extern "C" void Store_Ref_R2(); -extern "C" void Store_Ref_R3(); - PCODE GPRegsRoutines[] = { (PCODE)Load_R0, // 00 @@ -1053,22 +1043,6 @@ PCODE GPRegsStoreRoutines[] = (PCODE)Store_R3, // 15 }; -PCODE GPRegsRefRoutines[] = -{ - (PCODE)Load_Ref_R0, // 0 - (PCODE)Load_Ref_R1, // 1 - (PCODE)Load_Ref_R2, // 2 - (PCODE)Load_Ref_R3, // 3 -}; - -PCODE GPRegsRefStoreRoutines[] = -{ - (PCODE)Store_Ref_R0, // 0 - (PCODE)Store_Ref_R1, // 1 - (PCODE)Store_Ref_R2, // 2 - (PCODE)Store_Ref_R3, // 3 -}; - #endif // TARGET_ARM #define LOG_COMPUTE_CALL_STUB 0 @@ -1117,7 +1091,7 @@ PCODE CallStubGenerator::GetGPRegRangeRoutine(int r1, int r2) return m_interpreterToNative ? GPRegsRoutines[index] : GPRegsStoreRoutines[index]; } -#ifndef UNIX_AMD64_ABI +#if !defined(UNIX_AMD64_ABI) && defined(ENREGISTERED_PARAMTYPE_MAXSIZE) PCODE CallStubGenerator::GetGPRegRefRoutine(int r) { #if LOG_COMPUTE_CALL_STUB @@ -1134,7 +1108,7 @@ PCODE CallStubGenerator::GetStackRefRoutine() return m_interpreterToNative ? (PCODE)Load_Stack_Ref : (PCODE)Store_Stack_Ref; } -#endif // UNIX_AMD64_ABI +#endif // !UNIX_AMD64_ABI && ENREGISTERED_PARAMTYPE_MAXSIZE PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) { @@ -1855,12 +1829,7 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD { _ASSERTE(argLocDesc.m_byteStackIndex != -1); pRoutines[m_routineIndex++] = GetStackRefRoutine(); -#ifdef TARGET_64BIT pRoutines[m_routineIndex++] = ((int64_t)alignedArgSize << 32) | argLocDesc.m_byteStackIndex; -#else // !TARGET_64BIT - pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; - pRoutines[m_routineIndex++] = alignedArgSize; -#endif // TARGET_64BIT m_s1 = NoRange; } } diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 7c644c78697b6a..b97d8c14c6aa45 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -120,10 +120,10 @@ class CallStubGenerator CallStubHeader::InvokeFunctionPtr m_pInvokeFunction = NULL; bool m_interpreterToNative = false; -#ifndef UNIX_AMD64_ABI +#if !defined(UNIX_AMD64_ABI) && defined(ENREGISTERED_PARAMTYPE_MAXSIZE) PCODE GetGPRegRefRoutine(int r); PCODE GetStackRefRoutine(); -#endif // !UNIX_AMD64_ABI +#endif // !UNIX_AMD64_ABI && ENREGISTERED_PARAMTYPE_MAXSIZE PCODE GetStackRoutine(); #if defined(TARGET_APPLE) && defined(TARGET_ARM64) PCODE GetStackRoutine_1B(); From e6a79a090ff782435a7dfc0c5f6a0db6d21cf385 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Fri, 26 Sep 2025 14:34:22 +0900 Subject: [PATCH 18/33] [INTRP] Support Struct --- src/coreclr/vm/callstubgenerator.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index ad93eeb2106f79..5f5fc6ec8e1f65 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -2020,6 +2020,20 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg break; } } +#elif TARGET_ARM + switch (thReturnValueType.GetSize()) + { + case 1: + case 2: + case 4: + return ReturnTypeI4; + break; + case 8: + return ReturnTypeI8; + default: + _ASSERTE(!"The return types should be <= 8 bytes in size"); + break; + } #else _ASSERTE(!"Struct returns by value are not supported yet"); #endif From a5e1bd5103cb21de81a26ef2eaf846e480392151 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Fri, 26 Sep 2025 17:51:21 +0900 Subject: [PATCH 19/33] [INTRP] Support RetBuff --- src/coreclr/pal/inc/unixasmmacrosarm.inc | 1 + src/coreclr/vm/arm/asmhelpers.S | 56 +++++++++++++++++++++--- src/coreclr/vm/callstubgenerator.cpp | 21 +++++++-- src/coreclr/vm/callstubgenerator.h | 2 +- 4 files changed, 70 insertions(+), 10 deletions(-) diff --git a/src/coreclr/pal/inc/unixasmmacrosarm.inc b/src/coreclr/pal/inc/unixasmmacrosarm.inc index 0bbe1f12f4e234..205a2fc0664a7b 100644 --- a/src/coreclr/pal/inc/unixasmmacrosarm.inc +++ b/src/coreclr/pal/inc/unixasmmacrosarm.inc @@ -172,6 +172,7 @@ C_FUNC(\Name): .endif __PWTB_StackAlloc = __PWTB_TransitionBlock + __PWTB_ArgumentRegisters = __PWTB_StackAlloc + 36 .ifnc \pushArgRegs, DoNotPushArgRegs PUSH_ARGUMENT_REGISTERS diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 6a3ede90ce6640..28c580860c12bf 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1049,9 +1049,34 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler EMIT_BREAKPOINT // TODO NESTED_END CallJittedMethodRetDouble, _TEXT -NESTED_ENTRY CallJittedMethodRetBuff, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO -NESTED_END CallJittedMethodRetBuff, _TEXT +NESTED_ENTRY CallJittedMethodRetBuffR0, _TEXT, NoHandler + PROLOG_PUSH "{r4-r9,lr}" + mov r4, sp + sub sp, sp, 4 + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + ldr r5, [r6], #4 + mov r0, r2 + blx r5 + mov sp, r4 + EPILOG_POP "{r4-r9,pc}" +NESTED_END CallJittedMethodRetBuffR0, _TEXT + +NESTED_ENTRY CallJittedMethodRetBuffR1, _TEXT, NoHandler + PROLOG_PUSH "{r4-r9,lr}" + mov r4, sp + sub sp, sp, 4 + sub sp, sp, r3 + mov r6, r0 + mov r7, r1 + ldr r5, [r6], #4 + mov r1, r2 + blx r5 + mov sp, r4 + EPILOG_POP "{r4-r9,pc}" +NESTED_END CallJittedMethodRetBuffR1, _TEXT + NESTED_ENTRY InterpreterStub, _TEXT, NoHandler PROLOG_WITH_TRANSITION_BLOCK @@ -1135,8 +1160,27 @@ NESTED_ENTRY InterpreterStubRetDouble, _TEXT, NoHandler EMIT_BREAKPOINT // TODO NESTED_END InterpreterStubRetDouble, _TEXT -NESTED_ENTRY InterpreterStubRetBuff, _TEXT, NoHandler - EMIT_BREAKPOINT // TODO -NESTED_END InterpreterStubRetBuff, _TEXT +NESTED_ENTRY InterpreterStubRetBuffR0, _TEXT, NoHandler + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r4 + ldr r2, [sp, #__PWTB_ArgumentRegisters + 16] + bl C_FUNC(ExecuteInterpretedMethod) + ldr r0, [r0] + add sp, sp, 12 + EPILOG_POP {pc} +NESTED_END InterpreterStubRetBuffR0, _TEXT +NESTED_ENTRY InterpreterStubRetBuffR1, _TEXT, NoHandler + PROLOG_PUSH {lr} + sub sp, sp, 12 + add r0, sp, #__PWTB_TransitionBlock + 16 + mov r1, r4 + ldr r2, [sp, #__PWTB_ArgumentRegisters + 20] + bl C_FUNC(ExecuteInterpretedMethod) + ldr r1, [r0] + add sp, sp, 12 + EPILOG_POP {pc} +NESTED_END InterpreterStubRetBuffR1, _TEXT diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 5f5fc6ec8e1f65..6d43cde6abcd13 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1144,6 +1144,11 @@ extern "C" void CallJittedMethodRetBuffRCX(PCODE *routines, int8_t*pArgs, int8_t extern "C" void CallJittedMethodRetBuffRDX(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void InterpreterStubRetBuffRCX(); extern "C" void InterpreterStubRetBuffRDX(); +#elif defined(TARGET_ARM) // TARGET_ARM +extern "C" void CallJittedMethodRetBuffR0(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void CallJittedMethodRetBuffR1(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); +extern "C" void InterpreterStubRetBuffR0(); +extern "C" void InterpreterStubRetBuffR1(); #else // TARGET_WINDOWS && TARGET_AMD64 extern "C" void CallJittedMethodRetBuff(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void InterpreterStubRetBuff(); @@ -1216,6 +1221,11 @@ CallStubHeader::InvokeFunctionPtr CallStubGenerator::GetInvokeFunctionPtr(CallSt INVOKE_FUNCTION_PTR(CallJittedMethodRetBuffRCX); case ReturnTypeBuffArg2: INVOKE_FUNCTION_PTR(CallJittedMethodRetBuffRDX); +#elif defined(TARGET_ARM) + case ReturnTypeBuffArg1: + INVOKE_FUNCTION_PTR(CallJittedMethodRetBuffR0); + case ReturnTypeBuffArg2: + INVOKE_FUNCTION_PTR(CallJittedMethodRetBuffR1); #else // TARGET_WINDOWS && TARGET_AMD64 case ReturnTypeBuff: INVOKE_FUNCTION_PTR(CallJittedMethodRetBuff); @@ -1291,6 +1301,11 @@ PCODE CallStubGenerator::GetInterpreterReturnTypeHandler(CallStubGenerator::Retu RETURN_TYPE_HANDLER(InterpreterStubRetBuffRCX); case ReturnTypeBuffArg2: RETURN_TYPE_HANDLER(InterpreterStubRetBuffRDX); +#elif defined(TARGET_ARM) + case ReturnTypeBuffArg1: + RETURN_TYPE_HANDLER(InterpreterStubRetBuffR0); + case ReturnTypeBuffArg2: + RETURN_TYPE_HANDLER(InterpreterStubRetBuffR1); #else // TARGET_WINDOWS && TARGET_AMD64 case ReturnTypeBuff: RETURN_TYPE_HANDLER(InterpreterStubRetBuff); @@ -1620,11 +1635,11 @@ void CallStubGenerator::ComputeCallStub(MetaSig &sig, PCODE *pRoutines) { pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); } - else if (m_x1 != NoRange) + if (m_x1 != NoRange) { pRoutines[m_routineIndex++] = GetFPRegRangeRoutine(m_x1, m_x2); } - else if (m_s1 != NoRange) + if (m_s1 != NoRange) { pRoutines[m_routineIndex++] = GetStackRoutine(); #ifdef TARGET_64BIT @@ -1841,7 +1856,7 @@ CallStubGenerator::ReturnType CallStubGenerator::GetReturnType(ArgIterator *pArg { if (pArgIt->HasRetBuffArg()) { -#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) +#if (defined(TARGET_WINDOWS) && defined(TARGET_AMD64)) || defined(TARGET_ARM) if (pArgIt->HasThis()) { return ReturnTypeBuffArg2; diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index b97d8c14c6aa45..5eb2d3a55bd665 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -74,7 +74,7 @@ class CallStubGenerator ReturnTypeI4, ReturnTypeFloat, #endif // TARGET_64BIT -#if defined(TARGET_WINDOWS) && defined(TARGET_AMD64) +#if (defined(TARGET_WINDOWS) && defined(TARGET_AMD64)) || defined(TARGET_ARM) ReturnTypeBuffArg1, ReturnTypeBuffArg2, #else From b3ba6ec68f377f36b7c8dcfda40acf72a1647a93 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Mon, 13 Oct 2025 17:45:27 +0900 Subject: [PATCH 20/33] [DO NOT MERGE] For Test --- src/coreclr/interpreter/eeinterp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index 1762477c0fc2fc..acc667ca37ed9d 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -50,8 +50,8 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, bool doInterpret = false; - if ((g_interpModule != NULL) && (methodInfo->scope == g_interpModule)) - doInterpret = true; + // if ((g_interpModule != NULL) && (methodInfo->scope == g_interpModule)) + // doInterpret = true; { switch (InterpConfig.InterpMode()) From 30e25e0c4e0cbef9d4e9f64331718a5722c47c92 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 14 Oct 2025 15:26:52 +0900 Subject: [PATCH 21/33] [INTRP] Fix release build errors --- src/coreclr/vm/arm/asmconstants.h | 2 +- src/coreclr/vm/arm/asmhelpers.S | 2 ++ src/coreclr/vm/callstubgenerator.cpp | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/arm/asmconstants.h b/src/coreclr/vm/arm/asmconstants.h index 85c36e600a1d9f..077f561cbc2831 100644 --- a/src/coreclr/vm/arm/asmconstants.h +++ b/src/coreclr/vm/arm/asmconstants.h @@ -207,7 +207,7 @@ ASMCONSTANTS_C_ASSERT(OFFSETOF__ThreadLocalInfo__m_pThread == offsetof(ThreadLoc #ifdef _DEBUG #define OFFSETOF__InterpMethod__pCallStub 0x14 #else -#define OFFSETOF__InterpMethod__pCallStub 0x0 +#define OFFSETOF__InterpMethod__pCallStub 0x10 #endif ASMCONSTANTS_C_ASSERT(OFFSETOF__InterpMethod__pCallStub == offsetof(InterpMethod, pCallStub)) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 28c580860c12bf..6459813b74c41c 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -898,6 +898,7 @@ LEAF_ENTRY ThisPtrRetBufPrecodeWorker, _TEXT EPILOG_BRANCH_REG r12 LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT +#ifdef FEATURE_INTERPRETER LEAF_ENTRY Load_R0 ldr r0, [r7], #4 ldr r5, [r6], #4 @@ -1184,3 +1185,4 @@ NESTED_ENTRY InterpreterStubRetBuffR1, _TEXT, NoHandler EPILOG_POP {pc} NESTED_END InterpreterStubRetBuffR1, _TEXT +#endif // FEATURE_INTERPRETER diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 6d43cde6abcd13..70734ec6807de0 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1117,7 +1117,7 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) #endif int index = x1 * NUM_FLOAT_ARGUMENT_REGISTERS + x2; #ifdef TARGET_ARM - _ASSERTE(!"Not supported FP reg in ARMEL"); + _ASSERTE(!"Not support FP reg yet"); return NULL; #else return m_interpreterToNative ? FPRegsRoutines[index] : FPRegsStoreRoutines[index]; From 82769b45da85b77172fe0487341b9e0dfc4392e8 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 14 Oct 2025 15:34:28 +0900 Subject: [PATCH 22/33] Revert "[DO NOT MERGE] For Test" This reverts commit b3ba6ec68f377f36b7c8dcfda40acf72a1647a93. --- src/coreclr/interpreter/eeinterp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/interpreter/eeinterp.cpp b/src/coreclr/interpreter/eeinterp.cpp index acc667ca37ed9d..1762477c0fc2fc 100644 --- a/src/coreclr/interpreter/eeinterp.cpp +++ b/src/coreclr/interpreter/eeinterp.cpp @@ -50,8 +50,8 @@ CorJitResult CILInterp::compileMethod(ICorJitInfo* compHnd, bool doInterpret = false; - // if ((g_interpModule != NULL) && (methodInfo->scope == g_interpModule)) - // doInterpret = true; + if ((g_interpModule != NULL) && (methodInfo->scope == g_interpModule)) + doInterpret = true; { switch (InterpConfig.InterpMode()) From f4024539598c3cd655e3b163f779ebccd6afaff1 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 14 Oct 2025 16:30:10 +0900 Subject: [PATCH 23/33] Workarounds for errors --- src/coreclr/vm/callstubgenerator.cpp | 2 +- src/coreclr/vm/interpexec.cpp | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 70734ec6807de0..ac8c72c4f9daac 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1118,7 +1118,7 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) int index = x1 * NUM_FLOAT_ARGUMENT_REGISTERS + x2; #ifdef TARGET_ARM _ASSERTE(!"Not support FP reg yet"); - return NULL; + return 0; #else return m_interpreterToNative ? FPRegsRoutines[index] : FPRegsStoreRoutines[index]; #endif diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index 4c45cb933bc275..cef7b384923a42 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -344,7 +344,11 @@ typedef void (*HELPER_FTN_V_PP)(void*, void*); InterpThreadContext::InterpThreadContext() { // FIXME VirtualAlloc/mmap with INTERP_STACK_ALIGNMENT alignment +#ifdef TARGET_UNIX pStackStart = pStackPointer = (int8_t*)aligned_alloc(INTERP_STACK_ALIGNMENT, INTERP_STACK_SIZE); +#else // !TARGET_UNIX + pStackStart = pStackPointer = (int8_t*)malloc(INTERP_STACK_SIZE); +#endif // TARGET_UNIX pStackEnd = pStackStart + INTERP_STACK_SIZE; } @@ -2594,10 +2598,10 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr break; } } - + OBJECTREF targetMethodObj = (*delegateObj)->GetTarget(); LOCAL_VAR(callArgsOffset, OBJECTREF) = targetMethodObj; - + if ((targetMethod = NonVirtualEntry2MethodDesc(targetAddress)) != NULL) { // In this case targetMethod holds a pointer to the MethodDesc that will be called by using targetMethodObj as From 4d48c033cc39aea0d93a14a3e57ce5a87cb6de7d Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Tue, 14 Oct 2025 18:05:48 +0900 Subject: [PATCH 24/33] Revert INTERP_STACK_SLOT_SIZE --- src/coreclr/interpreter/inc/interpretershared.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index 5e6758cf8c9392..d9e79f3bffea57 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,11 +14,7 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER -#ifdef TARGET_64BIT #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack -#else // !TARGET_64BIT -#define INTERP_STACK_SLOT_SIZE 4 // Alignment of each var offset on the interpreter stack -#endif #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame struct InterpHelperData { From 1607eb23d753e09e6a0c411f9420493270939bfb Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 15 Oct 2025 17:53:00 +0900 Subject: [PATCH 25/33] Revert "Revert INTERP_STACK_SLOT_SIZE" This reverts commit 4d48c033cc39aea0d93a14a3e57ce5a87cb6de7d. --- src/coreclr/interpreter/inc/interpretershared.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index d9e79f3bffea57..5e6758cf8c9392 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,7 +14,11 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER +#ifdef TARGET_64BIT #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack +#else // !TARGET_64BIT +#define INTERP_STACK_SLOT_SIZE 4 // Alignment of each var offset on the interpreter stack +#endif #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame struct InterpHelperData { From 8280ee2c5891c25ac47babdda9753f4ea2f3d987 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 15 Oct 2025 19:53:38 +0900 Subject: [PATCH 26/33] Update StackVal for 32 bit target --- src/coreclr/vm/interpexec.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index d03f3b236943f7..dbbf0981dbdf57 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -15,10 +15,14 @@ struct StackVal union { int32_t i; +#ifdef TARGET_64BIT int64_t l; +#endif // TARGET_64BIT size_t s; float f; +#ifdef TARGET_64BIT double d; +#endif // TARGET_64BIT void *o; void *p; } data; From b93b7cdfd46751ab4d60f284414fc08b2849fb51 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Mon, 20 Oct 2025 16:09:38 +0900 Subject: [PATCH 27/33] Fix align for 8 bytes and value type --- src/coreclr/interpreter/compileropt.cpp | 7 +++++++ src/coreclr/vm/arm/asmhelpers.S | 6 ++++++ src/coreclr/vm/arm/cgencpu.h | 2 +- src/coreclr/vm/callstubgenerator.cpp | 24 +++++++++++++++++++++++- 4 files changed, 37 insertions(+), 2 deletions(-) diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index fa7d297802d71d..f680ba74e4a1c1 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -14,6 +14,13 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) size_t align = INTERP_STACK_SLOT_SIZE; +#ifdef TARGET_ARM + if(m_pVars[var].interpType == InterpTypeI8 || m_pVars[var].interpType == InterpTypeR8) + { + align = INTERP_STACK_SLOT_SIZE * 2; + } + else +#endif // TARGET_ARM if (size > INTERP_STACK_SLOT_SIZE) { assert(m_pVars[var].interpType == InterpTypeVT); diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index 6459813b74c41c..fd8f876ca0ccec 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -971,6 +971,12 @@ ALTERNATE_ENTRY Store_R3 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 +LEAF_ENTRY InjectInterpStackAlign + add r7, r7, #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END InjectInterpStackAlign + LEAF_ENTRY Load_Stack ldr r8, [r6], #4 // SP offset ldr r9, [r6], #4 // number of stack slots diff --git a/src/coreclr/vm/arm/cgencpu.h b/src/coreclr/vm/arm/cgencpu.h index 1b019a0f2c5153..c786a3c88bc6f0 100644 --- a/src/coreclr/vm/arm/cgencpu.h +++ b/src/coreclr/vm/arm/cgencpu.h @@ -62,7 +62,7 @@ EXTERN_C void checkStack(void); #define COMMETHOD_PREPAD 12 // # extra bytes to allocate in addition to sizeof(ComCallMethodDesc) -#define STACK_ALIGN_SIZE 4 +#define STACK_ALIGN_SIZE 8 #define JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a jump instruction #define BACK_TO_BACK_JUMP_ALLOCATE_SIZE 8 // # bytes to allocate for a back to back jump instruction diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 832a042e3077b3..5d6cd41a1b1420 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1602,7 +1602,29 @@ void CallStubGenerator::ComputeCallStub(MetaSig &sig, PCODE *pRoutines) // Each entry on the interpreter stack is always aligned to at least 8 bytes, but some arguments are 16 byte aligned TypeHandle thArgTypeHandle; - if ((argIt.GetArgType(&thArgTypeHandle) == ELEMENT_TYPE_VALUETYPE) && thArgTypeHandle.GetSize() > 8) + CorElementType corType = argIt.GetArgType(&thArgTypeHandle); +#ifdef TARGET_ARM + if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8) + { + unsigned align = INTERP_STACK_SLOT_SIZE * 2; + if (interpreterStackOffset != ALIGN_UP(interpreterStackOffset, align)) + { + TerminateCurrentRoutineIfNotOfNewType(RoutineType::None, pRoutines); + + interpreterStackOffset += INTERP_STACK_SLOT_SIZE; + pRoutines[m_routineIndex++] = (PCODE)InjectInterpStackAlign; +#if LOG_COMPUTE_CALL_STUB + printf("Inject stack align argument\n"); +#endif + } + + assert(interpreterStackOffset == ALIGN_UP(interpreterStackOffset, align)); + + interpStackSlotSize = INTERP_STACK_SLOT_SIZE * 2; + } + else +#endif + if ((corType == ELEMENT_TYPE_VALUETYPE) && thArgTypeHandle.GetSize() > INTERP_STACK_SLOT_SIZE) { unsigned align = CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle); if (align < INTERP_STACK_SLOT_SIZE) From a9c0d6e36c368b1805bcaa361a7fd91a67a6a4d3 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Mon, 20 Oct 2025 18:38:16 +0900 Subject: [PATCH 28/33] 8 bytes stack size for WASM --- src/coreclr/interpreter/inc/interpretershared.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index 5e6758cf8c9392..7f21f5159a8ce7 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,11 +14,11 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER -#ifdef TARGET_64BIT +#if defined(TARGET_64BIT) || defined(TARGET_WASM) #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack -#else // !TARGET_64BIT +#else // !TARGET_64BIT && !TARGET_WASM #define INTERP_STACK_SLOT_SIZE 4 // Alignment of each var offset on the interpreter stack -#endif +#endif // TARGET_64BIT || TARGET_WASM #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame struct InterpHelperData { From 705cf2eee7126c2df2a74ecbd1477f92879db8b3 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 22 Oct 2025 17:59:14 +0900 Subject: [PATCH 29/33] Change interpreter stack size to 8 bytes - Handle copying args / ret value between interpreter stack and native stack - No Range Expansion for value (>= 8 bytes) - Terminate current routines and add a routine for the value --- src/coreclr/interpreter/compiler.cpp | 21 +-- src/coreclr/interpreter/compileropt.cpp | 7 - .../interpreter/inc/interpretershared.h | 4 - src/coreclr/vm/arm/asmhelpers.S | 107 ++++++++++++---- src/coreclr/vm/callstubgenerator.cpp | 121 +++++++++++++++--- src/coreclr/vm/callstubgenerator.h | 4 + src/coreclr/vm/interpexec.h | 4 - 7 files changed, 190 insertions(+), 78 deletions(-) diff --git a/src/coreclr/interpreter/compiler.cpp b/src/coreclr/interpreter/compiler.cpp index 7d717aab399de3..02e02f734e30ce 100644 --- a/src/coreclr/interpreter/compiler.cpp +++ b/src/coreclr/interpreter/compiler.cpp @@ -684,12 +684,6 @@ void InterpCompiler::PushStackType(StackType stackType, CORINFO_CLASS_HANDLE cls int size = m_compHnd->getClassSize(clsHnd); PushTypeExplicit(stackType, clsHnd, size); } -#ifndef TARGET_64BIT - else if (stackType == StackTypeI8 || stackType == StackTypeR8) - { - PushTypeExplicit(stackType, clsHnd, INTERP_STACK_SLOT_SIZE * 2); - } -#endif // !TARGET_64BIT else { // We don't really care about the exact size for non-valuetypes @@ -1807,11 +1801,7 @@ void InterpCompiler::EmitConv(StackInfo *sp, StackType type, InterpOpcode convOp InterpInst *newInst = AddIns(convOp); newInst->SetSVar(sp->var); -#ifndef TARGET_64BIT - int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, (type == StackTypeI8 || type == StackTypeR8) ? INTERP_STACK_SLOT_SIZE * 2 : INTERP_STACK_SLOT_SIZE); -#else // TARGET_64BIT int32_t var = CreateVarExplicit(g_interpTypeFromStackType[type], NULL, INTERP_STACK_SLOT_SIZE); -#endif // !TARGET_64BIT new (sp) StackInfo(type, NULL, var); newInst->SetDVar(var); @@ -1876,20 +1866,13 @@ int32_t InterpCompiler::GetInterpTypeStackSize(CORINFO_CLASS_HANDLE clsHnd, Inte if (align < INTERP_STACK_SLOT_SIZE) align = INTERP_STACK_SLOT_SIZE; - // We do not align beyond the stack alignment - // (This is relevant for structs with very high alignment requirements, + // We do not align beyond the stack alignment + // (This is relevant for structs with very high alignment requirements, // where we align within struct layout, but the structs are not actually // aligned on the stack) if (align > INTERP_STACK_ALIGNMENT) align = INTERP_STACK_ALIGNMENT; } -#ifndef TARGET_64BIT - else if (interpType == InterpTypeI8 || interpType == InterpTypeR8) - { - size = INTERP_STACK_SLOT_SIZE * 2; // not really - align = INTERP_STACK_SLOT_SIZE * 2; - } -#endif // !TARGET_64BIT else { size = INTERP_STACK_SLOT_SIZE; // not really diff --git a/src/coreclr/interpreter/compileropt.cpp b/src/coreclr/interpreter/compileropt.cpp index f680ba74e4a1c1..fa7d297802d71d 100644 --- a/src/coreclr/interpreter/compileropt.cpp +++ b/src/coreclr/interpreter/compileropt.cpp @@ -14,13 +14,6 @@ int32_t InterpCompiler::AllocVarOffset(int var, int32_t *pPos) size_t align = INTERP_STACK_SLOT_SIZE; -#ifdef TARGET_ARM - if(m_pVars[var].interpType == InterpTypeI8 || m_pVars[var].interpType == InterpTypeR8) - { - align = INTERP_STACK_SLOT_SIZE * 2; - } - else -#endif // TARGET_ARM if (size > INTERP_STACK_SLOT_SIZE) { assert(m_pVars[var].interpType == InterpTypeVT); diff --git a/src/coreclr/interpreter/inc/interpretershared.h b/src/coreclr/interpreter/inc/interpretershared.h index 7f21f5159a8ce7..d9e79f3bffea57 100644 --- a/src/coreclr/interpreter/inc/interpretershared.h +++ b/src/coreclr/interpreter/inc/interpretershared.h @@ -14,11 +14,7 @@ #define INTERP_API __attribute__ ((visibility ("default"))) #endif // _MSC_VER -#if defined(TARGET_64BIT) || defined(TARGET_WASM) #define INTERP_STACK_SLOT_SIZE 8 // Alignment of each var offset on the interpreter stack -#else // !TARGET_64BIT && !TARGET_WASM -#define INTERP_STACK_SLOT_SIZE 4 // Alignment of each var offset on the interpreter stack -#endif // TARGET_64BIT || TARGET_WASM #define INTERP_STACK_ALIGNMENT 16 // Alignment of interpreter stack at the start of a frame struct InterpHelperData { diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index fd8f876ca0ccec..e4c7c493ac5595 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -900,77 +900,111 @@ LEAF_END ThisPtrRetBufPrecodeWorker, _TEXT #ifdef FEATURE_INTERPRETER LEAF_ENTRY Load_R0 - ldr r0, [r7], #4 + ldr r0, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Load_R0 LEAF_ENTRY Load_R0_R1 - ldr r0, [r7], #4 + ldr r0, [r7], #8 ALTERNATE_ENTRY Load_R1 - ldr r1, [r7], #4 + ldr r1, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1 LEAF_ENTRY Load_R0_R1_R2 - ldr r0, [r7], #4 + ldr r0, [r7], #8 ALTERNATE_ENTRY Load_R1_R2 - ldr r1, [r7], #4 + ldr r1, [r7], #8 ALTERNATE_ENTRY Load_R2 - ldr r2, [r7], #4 + ldr r2, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_R2 LEAF_ENTRY Load_R0_R1_R2_R3 - ldr r0, [r7], #4 + ldr r0, [r7], #8 ALTERNATE_ENTRY Load_R1_R2_R3 - ldr r1, [r7], #4 + ldr r1, [r7], #8 ALTERNATE_ENTRY Load_R2_R3 - ldr r2, [r7], #4 + ldr r2, [r7], #8 ALTERNATE_ENTRY Load_R3 - ldr r3, [r7], #4 + ldr r3, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Load_R0_R1_R2_R3 +LEAF_ENTRY Load_R0_R1_4B + ldr r0, [r7], #4 + ldr r1, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Load_R0_R1_4B + +LEAF_ENTRY Load_R0_R1_R2_R3_4B + ldr r0, [r7], #4 + ldr r1, [r7], #4 +ALTERNATE_ENTRY Load_R2_R3_4B + ldr r2, [r7], #4 + ldr r3, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Load_R0_R1_R2_R3_4B + LEAF_ENTRY Store_R0 - str r0, [r7], #4 + str r0, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_R0 LEAF_ENTRY Store_R0_R1 - str r0, [r7], #4 + str r0, [r7], #8 ALTERNATE_ENTRY Store_R1 - str r1, [r7], #4 + str r1, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1 LEAF_ENTRY Store_R0_R1_R2 - str r0, [r7], #4 + str r0, [r7], #8 ALTERNATE_ENTRY Store_R1_R2 - str r1, [r7], #4 + str r1, [r7], #8 ALTERNATE_ENTRY Store_R2 - str r2, [r7], #4 + str r2, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2 LEAF_ENTRY Store_R0_R1_R2_R3 - str r0, [r7], #4 + str r0, [r7], #8 ALTERNATE_ENTRY Store_R1_R2_R3 - str r1, [r7], #4 + str r1, [r7], #8 ALTERNATE_ENTRY Store_R2_R3 - str r2, [r7], #4 + str r2, [r7], #8 ALTERNATE_ENTRY Store_R3 - str r3, [r7], #4 + str r3, [r7], #8 ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_R0_R1_R2_R3 +LEAF_ENTRY Store_R0_R1_4B + str r0, [r7], #4 + str r1, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_R0_R1_4B + +LEAF_ENTRY Store_R0_R1_R2_R3_4B + str r0, [r7], #4 + str r1, [r7], #4 +ALTERNATE_ENTRY Store_R2_R3_4B + str r2, [r7], #4 + str r3, [r7], #4 + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_R0_R1_R2_R3_4B + LEAF_ENTRY InjectInterpStackAlign add r7, r7, #4 ldr r5, [r6], #4 @@ -982,7 +1016,7 @@ LEAF_ENTRY Load_Stack ldr r9, [r6], #4 // number of stack slots add r8, sp, r8 LOCAL_LABEL(CopyLoop): - ldr r5, [r7], #4 + ldr r5, [r7], #8 str r5, [r8], #4 subs r9, r9, #4 bne LOCAL_LABEL(CopyLoop) @@ -990,6 +1024,19 @@ LOCAL_LABEL(CopyLoop): EPILOG_BRANCH_REG r5 LEAF_END Load_Stack +LEAF_ENTRY Load_Stack_4B + ldr r8, [r6], #4 // SP offset + ldr r9, [r6], #4 // number of stack slots + add r8, sp, r8 +LOCAL_LABEL(CopyLoop_4B): + ldr r5, [r7], #4 + str r5, [r8], #4 + subs r9, r9, #4 + bne LOCAL_LABEL(CopyLoop_4B) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Load_Stack_4B + LEAF_ENTRY Store_Stack ldr r9, [r6], #4 // SP offset ldr r10, [r6], #4 // number of stack slots @@ -997,13 +1044,27 @@ LEAF_ENTRY Store_Stack add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock LOCAL_LABEL(StoreCopyLoop): ldr r5, [r9], #4 - str r5, [r7], #4 + str r5, [r7], #8 subs r10, r10, #4 bne LOCAL_LABEL(StoreCopyLoop) - ldr r5, [r6], #4 + ldr r5, [r6], #8 EPILOG_BRANCH_REG r5 LEAF_END Store_Stack +LEAF_ENTRY Store_Stack_4B + ldr r9, [r6], #4 // SP offset + ldr r10, [r6], #4 // number of stack slots + add r9, sp, r9 + add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock +LOCAL_LABEL(StoreCopyLoop_4B): + ldr r5, [r9], #4 + str r5, [r7], #4 + subs r10, r10, #4 + bne LOCAL_LABEL(StoreCopyLoop_4B) + ldr r5, [r6], #4 + EPILOG_BRANCH_REG r5 +LEAF_END Store_Stack_4B + NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler PROLOG_PUSH "{r4-r9,lr}" mov r4, sp diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index 5d6cd41a1b1420..daf3ab54509d06 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1004,6 +1004,15 @@ extern "C" void Store_R2(); extern "C" void Store_R2_R3(); extern "C" void Store_R3(); +extern "C" void Load_R0_R1_4B(); +extern "C" void Load_R0_R1_R2_R3_4B(); +extern "C" void Load_R2_R3_4B(); +extern "C" void Load_Stack_4B(); +extern "C" void Store_R0_R1_4B(); +extern "C" void Store_R0_R1_R2_R3_4B(); +extern "C" void Store_R2_R3_4B(); +extern "C" void Store_Stack_4B(); + PCODE GPRegsRoutines[] = { (PCODE)Load_R0, // 00 @@ -1044,6 +1053,46 @@ PCODE GPRegsStoreRoutines[] = (PCODE)Store_R3, // 15 }; +PCODE GPRegLoadRoutines_4B[] = +{ + (PCODE)0, // 00 + (PCODE)Load_R0_R1_4B, // 01 + (PCODE)0, // 02 + (PCODE)Load_R0_R1_R2_R3_4B, // 03 + (PCODE)0, // 04 + (PCODE)0, // 05 + (PCODE)0, // 06 + (PCODE)0, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)0, // 10 + (PCODE)Load_R2_R3_4B, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)0, // 15 +}; + +PCODE GPRegStoreRoutines_4B[] = +{ + (PCODE)0, // 00 + (PCODE)Store_R0_R1_4B, // 01 + (PCODE)0, // 02 + (PCODE)Store_R0_R1_R2_R3_4B, // 03 + (PCODE)0, // 04 + (PCODE)0, // 05 + (PCODE)0, // 06 + (PCODE)0, // 07 + (PCODE)0, // 08 + (PCODE)0, // 09 + (PCODE)0, // 10 + (PCODE)Store_R2_R3_4B, // 11 + (PCODE)0, // 12 + (PCODE)0, // 13 + (PCODE)0, // 14 + (PCODE)0, // 15 +}; + #endif // TARGET_ARM #define LOG_COMPUTE_CALL_STUB 0 @@ -1125,6 +1174,25 @@ PCODE CallStubGenerator::GetFPRegRangeRoutine(int x1, int x2) #endif } +#ifdef TARGET_ARM +PCODE CallStubGenerator::GetRegRoutine_4B(int r1, int r2) +{ +#if LOG_COMPUTE_CALL_STUB + printf("GetRegRoutine_4B\n"); +#endif + int index = r1 * NUM_ARGUMENT_REGISTERS + r2; + return m_interpreterToNative ? GPRegLoadRoutines_4B[index] : GPRegStoreRoutines_4B[index]; +} + +PCODE CallStubGenerator::GetStackRoutine_4B() +{ +#if LOG_COMPUTE_CALL_STUB + printf("GetStackRoutine_4B\n"); +#endif + return m_interpreterToNative ? (PCODE)Load_Stack_4B : (PCODE)Store_Stack_4B; +} +#endif // TARGET_ARM + extern "C" void CallJittedMethodRetVoid(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetDouble(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); extern "C" void CallJittedMethodRetI8(PCODE *routines, int8_t*pArgs, int8_t*pRet, int totalStackSize); @@ -1603,27 +1671,6 @@ void CallStubGenerator::ComputeCallStub(MetaSig &sig, PCODE *pRoutines) // Each entry on the interpreter stack is always aligned to at least 8 bytes, but some arguments are 16 byte aligned TypeHandle thArgTypeHandle; CorElementType corType = argIt.GetArgType(&thArgTypeHandle); -#ifdef TARGET_ARM - if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8) - { - unsigned align = INTERP_STACK_SLOT_SIZE * 2; - if (interpreterStackOffset != ALIGN_UP(interpreterStackOffset, align)) - { - TerminateCurrentRoutineIfNotOfNewType(RoutineType::None, pRoutines); - - interpreterStackOffset += INTERP_STACK_SLOT_SIZE; - pRoutines[m_routineIndex++] = (PCODE)InjectInterpStackAlign; -#if LOG_COMPUTE_CALL_STUB - printf("Inject stack align argument\n"); -#endif - } - - assert(interpreterStackOffset == ALIGN_UP(interpreterStackOffset, align)); - - interpStackSlotSize = INTERP_STACK_SLOT_SIZE * 2; - } - else -#endif if ((corType == ELEMENT_TYPE_VALUETYPE) && thArgTypeHandle.GetSize() > INTERP_STACK_SLOT_SIZE) { unsigned align = CEEInfo::getClassAlignmentRequirementStatic(thArgTypeHandle); @@ -1753,6 +1800,20 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD #if LOG_COMPUTE_CALL_STUB printf("m_cGenReg=%d\n", (int)argLocDesc.m_cGenReg); #endif // LOG_COMPUTE_CALL_STUB +#ifdef TARGET_ARM + TypeHandle thArgTypeHandle; + CorElementType corType = pArgIt ? pArgIt->GetArgType(&thArgTypeHandle) : ELEMENT_TYPE_END; + if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8 || (corType == ELEMENT_TYPE_VALUETYPE && thArgTypeHandle.GetSize() >= INTERP_STACK_SLOT_SIZE)) + { + if (m_r1 != NoRange) + { + pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); + } + pRoutines[m_routineIndex++] = GetRegRoutine_4B(argLocDesc.m_idxGenReg, argLocDesc.m_idxGenReg + argLocDesc.m_cGenReg - 1); + m_r1 = NoRange; + } + else +#endif // TARGET_ARM if (m_r1 == NoRange) // No active range yet { // Start a new range @@ -1808,6 +1869,24 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD #if LOG_COMPUTE_CALL_STUB printf("m_byteStackSize=%d\n", (int)argLocDesc.m_byteStackSize); #endif // LOG_COMPUTE_CALL_STUB +#ifdef TARGET_ARM + TypeHandle thArgTypeHandle; + CorElementType corType = pArgIt ? pArgIt->GetArgType(&thArgTypeHandle) : ELEMENT_TYPE_END; + if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8 || (corType == ELEMENT_TYPE_VALUETYPE && thArgTypeHandle.GetSize() >= INTERP_STACK_SLOT_SIZE)) + { + if (m_s1 != NoRange) + { + pRoutines[m_routineIndex++] = GetStackRoutine(); + pRoutines[m_routineIndex++] = m_s1; + pRoutines[m_routineIndex++] = m_s2 - m_s1 + 1; + } + pRoutines[m_routineIndex++] = GetStackRoutine_4B(); + pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; + pRoutines[m_routineIndex++] = argLocDesc.m_byteStackSize; + m_s1 = NoRange; + } + else +#endif // TARGET_ARM if (m_s1 == NoRange) // No active range yet { // Start a new range diff --git a/src/coreclr/vm/callstubgenerator.h b/src/coreclr/vm/callstubgenerator.h index 412f3901a5dea7..53ed5145abea6d 100644 --- a/src/coreclr/vm/callstubgenerator.h +++ b/src/coreclr/vm/callstubgenerator.h @@ -132,6 +132,10 @@ class CallStubGenerator #endif // TARGET_APPLE && TARGET_ARM64 PCODE GetFPRegRangeRoutine(int x1, int x2); PCODE GetGPRegRangeRoutine(int r1, int r2); +#ifdef TARGET_ARM + PCODE GetRegRoutine_4B(int r1, int r2); + PCODE GetStackRoutine_4B(); +#endif // TARGET_ARM ReturnType GetReturnType(ArgIterator *pArgIt); CallStubHeader::InvokeFunctionPtr GetInvokeFunctionPtr(ReturnType returnType); PCODE GetInterpreterReturnTypeHandler(ReturnType returnType); diff --git a/src/coreclr/vm/interpexec.h b/src/coreclr/vm/interpexec.h index dbbf0981dbdf57..d03f3b236943f7 100644 --- a/src/coreclr/vm/interpexec.h +++ b/src/coreclr/vm/interpexec.h @@ -15,14 +15,10 @@ struct StackVal union { int32_t i; -#ifdef TARGET_64BIT int64_t l; -#endif // TARGET_64BIT size_t s; float f; -#ifdef TARGET_64BIT double d; -#endif // TARGET_64BIT void *o; void *p; } data; From fe42ce95e615940476240b343e4d1849b238ccb0 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 22 Oct 2025 20:24:46 +0900 Subject: [PATCH 30/33] Update ProcessArgument --- src/coreclr/vm/callstubgenerator.cpp | 28 ++++++++++------------------ 1 file changed, 10 insertions(+), 18 deletions(-) diff --git a/src/coreclr/vm/callstubgenerator.cpp b/src/coreclr/vm/callstubgenerator.cpp index daf3ab54509d06..4c6404baed9bee 100644 --- a/src/coreclr/vm/callstubgenerator.cpp +++ b/src/coreclr/vm/callstubgenerator.cpp @@ -1786,6 +1786,14 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD LIMITED_METHOD_CONTRACT; RoutineType argType = RoutineType::None; +#ifdef TARGET_ARM + TypeHandle thArgTypeHandle; + CorElementType corType = pArgIt ? pArgIt->GetArgType(&thArgTypeHandle) : ELEMENT_TYPE_END; + bool splitRequired = (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8 || (corType == ELEMENT_TYPE_VALUETYPE && thArgTypeHandle.GetSize() >= INTERP_STACK_SLOT_SIZE)); + if (splitRequired) + argType = RoutineType::None; + else +#endif // TARGET_ARM if (argLocDesc.m_cGenReg != 0) argType = RoutineType::GPReg; else if (argLocDesc.m_cFloatReg != 0) @@ -1801,16 +1809,9 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD printf("m_cGenReg=%d\n", (int)argLocDesc.m_cGenReg); #endif // LOG_COMPUTE_CALL_STUB #ifdef TARGET_ARM - TypeHandle thArgTypeHandle; - CorElementType corType = pArgIt ? pArgIt->GetArgType(&thArgTypeHandle) : ELEMENT_TYPE_END; - if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8 || (corType == ELEMENT_TYPE_VALUETYPE && thArgTypeHandle.GetSize() >= INTERP_STACK_SLOT_SIZE)) + if (splitRequired) { - if (m_r1 != NoRange) - { - pRoutines[m_routineIndex++] = GetGPRegRangeRoutine(m_r1, m_r2); - } pRoutines[m_routineIndex++] = GetRegRoutine_4B(argLocDesc.m_idxGenReg, argLocDesc.m_idxGenReg + argLocDesc.m_cGenReg - 1); - m_r1 = NoRange; } else #endif // TARGET_ARM @@ -1870,20 +1871,11 @@ void CallStubGenerator::ProcessArgument(ArgIterator *pArgIt, ArgLocDesc& argLocD printf("m_byteStackSize=%d\n", (int)argLocDesc.m_byteStackSize); #endif // LOG_COMPUTE_CALL_STUB #ifdef TARGET_ARM - TypeHandle thArgTypeHandle; - CorElementType corType = pArgIt ? pArgIt->GetArgType(&thArgTypeHandle) : ELEMENT_TYPE_END; - if (corType == ELEMENT_TYPE_I8 || corType == ELEMENT_TYPE_U8 || corType == ELEMENT_TYPE_R8 || (corType == ELEMENT_TYPE_VALUETYPE && thArgTypeHandle.GetSize() >= INTERP_STACK_SLOT_SIZE)) + if (splitRequired) { - if (m_s1 != NoRange) - { - pRoutines[m_routineIndex++] = GetStackRoutine(); - pRoutines[m_routineIndex++] = m_s1; - pRoutines[m_routineIndex++] = m_s2 - m_s1 + 1; - } pRoutines[m_routineIndex++] = GetStackRoutine_4B(); pRoutines[m_routineIndex++] = argLocDesc.m_byteStackIndex; pRoutines[m_routineIndex++] = argLocDesc.m_byteStackSize; - m_s1 = NoRange; } else #endif // TARGET_ARM From 4e3ac8dfe35523c3ad3105fed91c2ccbd7f77f4b Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Thu, 23 Oct 2025 13:20:15 +0900 Subject: [PATCH 31/33] Fix InterpreterStub --- src/coreclr/vm/arm/asmhelpers.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index e4c7c493ac5595..b6c15b267923f6 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1162,8 +1162,10 @@ NESTED_ENTRY InterpreterStub, _TEXT, NoHandler ldr r5, [r6, #OFFSETOF__Thread__m_pInterpThreadContext] cbnz r5, LOCAL_LABEL(HaveInterpThreadContext) - mov r0, r6 - bl C_FUNC(_ZN6Thread22GetInterpThreadContextEv) // Thread::GetInterpThreadContext +LOCAL_LABEL(NoManagedThreadOrCallStub): + add r0, sp, #__PWTB_TransitionBlock + 20 + mov r1, r4 + bl C_FUNC(GetInterpThreadContextWithPossiblyMissingThreadOrCallStub); mov r5, r0 LOCAL_LABEL(HaveInterpThreadContext): @@ -1174,6 +1176,8 @@ LOCAL_LABEL(HaveInterpThreadContext): ldr r7, [r4] // InterpMethod* ldr r7, [r7, #OFFSETOF__InterpMethod__pCallStub] + cmp r7, #0 + beq LOCAL_LABEL(NoManagedThreadOrCallStub) add r6, r7, #OFFSETOF__CallStubHeader__Routines ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE ldr r5, [r6], 4 // InterpThreadContext From f28a210e3d22efd25ad7d9376d809dbcbc4109f8 Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 29 Oct 2025 16:58:56 +0900 Subject: [PATCH 32/33] Fix errors in arm assembly code --- src/coreclr/vm/arm/asmhelpers.S | 89 +++++++++++++++++---------------- 1 file changed, 47 insertions(+), 42 deletions(-) diff --git a/src/coreclr/vm/arm/asmhelpers.S b/src/coreclr/vm/arm/asmhelpers.S index b6c15b267923f6..00bae322469abb 100644 --- a/src/coreclr/vm/arm/asmhelpers.S +++ b/src/coreclr/vm/arm/asmhelpers.S @@ -1039,74 +1039,74 @@ LEAF_END Load_Stack_4B LEAF_ENTRY Store_Stack ldr r9, [r6], #4 // SP offset - ldr r10, [r6], #4 // number of stack slots + ldr r8, [r6], #4 // number of stack slots add r9, sp, r9 add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock LOCAL_LABEL(StoreCopyLoop): ldr r5, [r9], #4 str r5, [r7], #8 - subs r10, r10, #4 + subs r8, r8, #4 bne LOCAL_LABEL(StoreCopyLoop) - ldr r5, [r6], #8 + ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_Stack LEAF_ENTRY Store_Stack_4B ldr r9, [r6], #4 // SP offset - ldr r10, [r6], #4 // number of stack slots + ldr r8, [r6], #4 // number of stack slots add r9, sp, r9 add r9, r9, #__PWTB_TransitionBlock + SIZEOF__TransitionBlock LOCAL_LABEL(StoreCopyLoop_4B): ldr r5, [r9], #4 str r5, [r7], #4 - subs r10, r10, #4 + subs r8, r8, #4 bne LOCAL_LABEL(StoreCopyLoop_4B) ldr r5, [r6], #4 EPILOG_BRANCH_REG r5 LEAF_END Store_Stack_4B NESTED_ENTRY CallJittedMethodRetVoid, _TEXT, NoHandler - PROLOG_PUSH "{r4-r9,lr}" + PROLOG_PUSH "{r4-r10,lr}" mov r4, sp - sub sp, sp, 4 sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 + CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 - EPILOG_POP "{r4-r9,pc}" + EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetVoid, _TEXT NESTED_ENTRY CallJittedMethodRetI4, _TEXT, NoHandler - PROLOG_PUSH "{r4-r9,lr}" - push { r2 } + PROLOG_PUSH "{r4-r10,lr}" + mov r10, r2 mov r4, sp sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 + CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 - pop { r2 } - str r0, [r2] - EPILOG_POP "{r4-r9,pc}" + str r0, [r10] + EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI4, _TEXT NESTED_ENTRY CallJittedMethodRetI8, _TEXT, NoHandler - PROLOG_PUSH "{r4-r9,lr}" - push { r2 } + PROLOG_PUSH "{r4-r10,lr}" + mov r10, r2 mov r4, sp sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 + CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 - pop { r2 } - str r0, [r2] - str r1, [r2, 4] - EPILOG_POP "{r4-r9,pc}" + str r0, [r10] + str r1, [r10, 4] + EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetI8, _TEXT NESTED_ENTRY CallJittedMethodRetFloat, _TEXT, NoHandler @@ -1118,31 +1118,31 @@ NESTED_ENTRY CallJittedMethodRetDouble, _TEXT, NoHandler NESTED_END CallJittedMethodRetDouble, _TEXT NESTED_ENTRY CallJittedMethodRetBuffR0, _TEXT, NoHandler - PROLOG_PUSH "{r4-r9,lr}" + PROLOG_PUSH "{r4-r10,lr}" mov r4, sp - sub sp, sp, 4 sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 mov r0, r2 + CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 - EPILOG_POP "{r4-r9,pc}" + EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetBuffR0, _TEXT NESTED_ENTRY CallJittedMethodRetBuffR1, _TEXT, NoHandler - PROLOG_PUSH "{r4-r9,lr}" + PROLOG_PUSH "{r4-r10,lr}" mov r4, sp - sub sp, sp, 4 sub sp, sp, r3 mov r6, r0 mov r7, r1 ldr r5, [r6], #4 mov r1, r2 + CHECK_STACK_ALIGNMENT blx r5 mov sp, r4 - EPILOG_POP "{r4-r9,pc}" + EPILOG_POP "{r4-r10,pc}" NESTED_END CallJittedMethodRetBuffR1, _TEXT @@ -1152,7 +1152,7 @@ NESTED_ENTRY InterpreterStub, _TEXT, NoHandler // IR bytecode address mov r4, METHODDESC_REGISTER // InterpMethod - PROLOG_PUSH "{r0-r4}" + PROLOG_PUSH "{r0-r3}" INLINE_GET_TLS_VAR C_FUNC(t_CurrentThreadInfo) mov r6, r0 @@ -1163,17 +1163,14 @@ NESTED_ENTRY InterpreterStub, _TEXT, NoHandler cbnz r5, LOCAL_LABEL(HaveInterpThreadContext) LOCAL_LABEL(NoManagedThreadOrCallStub): - add r0, sp, #__PWTB_TransitionBlock + 20 + add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 + CHECK_STACK_ALIGNMENT bl C_FUNC(GetInterpThreadContextWithPossiblyMissingThreadOrCallStub); mov r5, r0 LOCAL_LABEL(HaveInterpThreadContext): - # RESTORE_ARGUMENT_REGISTERS sp, __PWTB_ArgumentRegisters - EPILOG_POP "{r0-r4}" - # RESTORE_FLOAT_ARGUMENT_REGISTERS sp, __PWTB_FloatArgumentRegisters - ldr r7, [r4] // InterpMethod* ldr r7, [r7, #OFFSETOF__InterpMethod__pCallStub] cmp r7, #0 @@ -1181,6 +1178,9 @@ LOCAL_LABEL(HaveInterpThreadContext): add r6, r7, #OFFSETOF__CallStubHeader__Routines ldr r7, [r5, #OFFSETOF__InterpThreadContext__pStackPointer] // HERE ldr r5, [r6], 4 // InterpThreadContext + + EPILOG_POP "{r0-r3}" + CHECK_STACK_ALIGNMENT blx r5 EPILOG_WITH_TRANSITION_BLOCK_RETURN @@ -1189,38 +1189,41 @@ NESTED_END InterpreterStub, _TEXT NESTED_ENTRY InterpreterStubRetVoid, _TEXT, NoHandler PROLOG_PUSH {lr} - sub sp, sp, 12 + alloc_stack 12 add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 mov r2, 0 + CHECK_STACK_ALIGNMENT bl C_FUNC(ExecuteInterpretedMethod) - add sp, sp, 12 + free_stack 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetVoid, _TEXT NESTED_ENTRY InterpreterStubRetI4, _TEXT, NoHandler PROLOG_PUSH {lr} - sub sp, sp, 12 + alloc_stack 12 add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 mov r2, 0 + CHECK_STACK_ALIGNMENT bl C_FUNC(ExecuteInterpretedMethod) ldr r0, [r0] - add sp, sp, 12 + free_stack 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetI4, _TEXT NESTED_ENTRY InterpreterStubRetI8, _TEXT, NoHandler PROLOG_PUSH {lr} - sub sp, sp, 12 + alloc_stack 12 add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 mov r2, 0 + CHECK_STACK_ALIGNMENT bl C_FUNC(ExecuteInterpretedMethod) ldr r2, [r0] ldr r1, [r0, 4] mov r0, r2 - add sp, sp, 12 + free_stack 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetI8, _TEXT @@ -1234,25 +1237,27 @@ NESTED_END InterpreterStubRetDouble, _TEXT NESTED_ENTRY InterpreterStubRetBuffR0, _TEXT, NoHandler PROLOG_PUSH {lr} - sub sp, sp, 12 + alloc_stack 12 add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 ldr r2, [sp, #__PWTB_ArgumentRegisters + 16] + CHECK_STACK_ALIGNMENT bl C_FUNC(ExecuteInterpretedMethod) ldr r0, [r0] - add sp, sp, 12 + free_stack 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetBuffR0, _TEXT NESTED_ENTRY InterpreterStubRetBuffR1, _TEXT, NoHandler PROLOG_PUSH {lr} - sub sp, sp, 12 + alloc_stack 12 add r0, sp, #__PWTB_TransitionBlock + 16 mov r1, r4 - ldr r2, [sp, #__PWTB_ArgumentRegisters + 20] + ldr r2, [sp, #__PWTB_ArgumentRegisters + 16] + CHECK_STACK_ALIGNMENT bl C_FUNC(ExecuteInterpretedMethod) ldr r1, [r0] - add sp, sp, 12 + free_stack 12 EPILOG_POP {pc} NESTED_END InterpreterStubRetBuffR1, _TEXT From 3819a3f9dec064911de9d4437dc19b124fafed3e Mon Sep 17 00:00:00 2001 From: Dong-Heon Jung Date: Wed, 29 Oct 2025 16:59:17 +0900 Subject: [PATCH 33/33] Fix stack value Sync stack value between interpreter frame and native stack for unwinding --- src/coreclr/vm/interpexec.cpp | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/coreclr/vm/interpexec.cpp b/src/coreclr/vm/interpexec.cpp index d3b2cbcab39f1b..5d13ed0a953fa6 100644 --- a/src/coreclr/vm/interpexec.cpp +++ b/src/coreclr/vm/interpexec.cpp @@ -2518,7 +2518,14 @@ void InterpExecMethod(InterpreterFrame *pInterpreterFrame, InterpMethodContextFr } else { +#ifdef TARGET_ARM + TADDR targetSP = pInterpreterFrame->GetInterpExecMethodSP(); + pInterpreterFrame->SetInterpExecMethodSP(targetSP - 8); // Pass two arguments via native stack for InvokeUnmanagedMethodWithTransition +#endif // TARGET_ARM InvokeUnmanagedMethodWithTransition(targetMethod, stack, pFrame, callArgsAddress, returnValueAddress, callTarget); +#ifdef TARGET_ARM + pInterpreterFrame->SetInterpExecMethodSP(targetSP); // Restore stack pointer +#endif // TARGET_ARM } break;