From c88d0797e25f92e9ac9fb146bbce4dac78c5a215 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Mon, 24 Jun 2024 17:42:09 +0800 Subject: [PATCH 01/10] [LoongArch64] Add nativeaot support on LoongArch64. --- eng/Subsets.props | 2 +- src/coreclr/CMakeLists.txt | 2 +- .../src/Internal/Runtime/TransitionBlock.cs | 74 ++ src/coreclr/nativeaot/Directory.Build.props | 3 + src/coreclr/nativeaot/Runtime/CommonMacros.h | 5 + src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 26 +- src/coreclr/nativeaot/Runtime/ICodeManager.h | 21 + src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 35 + src/coreclr/nativeaot/Runtime/PalRedhawk.h | 104 +++ .../nativeaot/Runtime/PalRedhawkCommon.h | 31 + .../nativeaot/Runtime/StackFrameIterator.cpp | 212 ++++- .../nativeaot/Runtime/StackFrameIterator.h | 11 + .../nativeaot/Runtime/ThunksMapping.cpp | 24 + src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 2 +- .../nativeaot/Runtime/inc/TargetPtrs.h | 2 + src/coreclr/nativeaot/Runtime/inc/rhbinder.h | 65 ++ .../nativeaot/Runtime/loongarch64/AllocFast.S | 276 ++++++ .../Runtime/loongarch64/AsmOffsetsCpu.h | 67 ++ .../Runtime/loongarch64/ExceptionHandling.S | 831 ++++++++++++++++++ .../nativeaot/Runtime/loongarch64/GcProbe.S | 198 +++++ .../loongarch64/InteropThunksHelpers.S | 52 ++ .../nativeaot/Runtime/loongarch64/MiscStubs.S | 5 + .../nativeaot/Runtime/loongarch64/PInvoke.S | 65 ++ .../Runtime/loongarch64/StubDispatch.S | 117 +++ .../Runtime/loongarch64/UniversalTransition.S | 191 ++++ .../Runtime/loongarch64/WriteBarriers.S | 354 ++++++++ src/coreclr/nativeaot/Runtime/regdisplay.h | 58 +- src/coreclr/nativeaot/Runtime/startup.cpp | 7 +- .../nativeaot/Runtime/unix/PalRedhawkInline.h | 14 +- .../nativeaot/Runtime/unix/UnixContext.cpp | 171 +++- .../nativeaot/Runtime/unix/UnixContext.h | 55 ++ .../Runtime/unix/UnixNativeCodeManager.cpp | 150 +++- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 297 +++++++ .../nativeaot/Runtime/unix/unixasmmacros.inc | 2 + .../Runtime/unix/unixasmmacrosloongarch64.inc | 328 +++++++ .../TypeLoader/TypeSystemContextFactory.cs | 2 + .../Target_LoongArch64/LoongArch64Emitter.cs | 8 +- .../LoongArch64ReadyToRunHelperNode.cs | 47 +- .../ILCompiler.Compiler/Compiler/JitHelper.cs | 25 +- .../ObjectWriter/CodeView/CodeViewNative.cs | 71 ++ .../CodeView/CodeViewSymbolsBuilder.cs | 6 + .../ObjectWriter/Dwarf/DwarfBuilder.cs | 6 + .../Compiler/ObjectWriter/Dwarf/DwarfCie.cs | 13 + .../Dwarf/DwarfExpressionBuilder.cs | 4 + .../Compiler/ObjectWriter/ElfNative.cs | 118 +++ .../Compiler/ObjectWriter/ElfObjectWriter.cs | 47 +- .../llvm-libunwind/src/CompactUnwinder.hpp | 4 + .../external/llvm-libunwind/src/Registers.hpp | 9 +- 48 files changed, 4165 insertions(+), 52 deletions(-) create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S create mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S create mode 100644 src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc diff --git a/eng/Subsets.props b/eng/Subsets.props index 41f63f11bbd79..f61fae7580c4d 100644 --- a/eng/Subsets.props +++ b/eng/Subsets.props @@ -120,7 +120,7 @@ <_NativeAotSupportedOS Condition="'$(TargetOS)' == 'windows' or '$(TargetOS)' == 'linux' or '$(TargetOS)' == 'osx' or '$(TargetOS)' == 'maccatalyst' or '$(TargetOS)' == 'iossimulator' or '$(TargetOS)' == 'ios' or '$(TargetOS)' == 'tvossimulator' or '$(TargetOS)' == 'tvos' or '$(TargetOS)' == 'freebsd'">true - <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true + <_NativeAotSupportedArch Condition="'$(TargetArchitecture)' == 'x64' or '$(TargetArchitecture)' == 'arm64' or '$(TargetArchitecture)' == 'arm' or '$(TargetArchitecture)' == 'loongarch64' or ('$(TargetOS)' == 'windows' and '$(TargetArchitecture)' == 'x86')">true true diff --git a/src/coreclr/CMakeLists.txt b/src/coreclr/CMakeLists.txt index 7ed0d509212cc..aed48f266ef00 100644 --- a/src/coreclr/CMakeLists.txt +++ b/src/coreclr/CMakeLists.txt @@ -147,7 +147,7 @@ add_subdirectory(tools/aot/jitinterface) if(NOT CLR_CROSS_COMPONENTS_BUILD) # NativeAOT only buildable for a subset of CoreCLR-supported configurations - if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) + if(CLR_CMAKE_HOST_ARCH_ARM64 OR CLR_CMAKE_HOST_ARCH_AMD64 OR CLR_CMAKE_HOST_ARCH_ARM OR CLR_CMAKE_HOST_ARCH_LOONGARCH64 OR (CLR_CMAKE_HOST_ARCH_I386 AND CLR_CMAKE_HOST_WIN32)) add_subdirectory(nativeaot) endif() endif(NOT CLR_CROSS_COMPONENTS_BUILD) diff --git a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs index 98126202f1164..ccff78114d4e9 100644 --- a/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs +++ b/src/coreclr/nativeaot/Common/src/Internal/Runtime/TransitionBlock.cs @@ -35,6 +35,12 @@ #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE #define ENREGISTERED_PARAMTYPE_MAXSIZE #elif TARGET_WASM +#elif TARGET_LOONGARCH64 +#define CALLDESCR_ARGREGS // CallDescrWorker has ArgumentRegister parameter +#define CALLDESCR_FPARGREGS // CallDescrWorker has FloatArgumentRegisters parameter +#define ENREGISTERED_RETURNTYPE_MAXSIZE +#define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE +#define ENREGISTERED_PARAMTYPE_MAXSIZE #else #error Unknown architecture! #endif @@ -300,6 +306,60 @@ internal struct ArchitectureConstants public const int STACK_ELEM_SIZE = 4; public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } } +#elif TARGET_LOONGARCH64 + [StructLayout(LayoutKind.Sequential)] + internal struct ReturnBlock + { + private IntPtr returnValue; + private IntPtr returnValue2; + private IntPtr returnValue3; + private IntPtr returnValue4; + } + + [StructLayout(LayoutKind.Sequential)] + internal struct ArgumentRegisters + { + private IntPtr r4; + private IntPtr r5; + private IntPtr r6; + private IntPtr r7; + private IntPtr r8; + private IntPtr r9; + private IntPtr r10; + private IntPtr r11; + public static unsafe int GetOffsetOfr11() + { + return sizeof(IntPtr) * 7; + } + } + + [StructLayout(LayoutKind.Sequential)] + internal struct FloatArgumentRegisters + { + private double f0; + private double f1; + private double f2; + private double f3; + private double f4; + private double f5; + private double f6; + private double f7; + } + + internal struct ArchitectureConstants + { + // To avoid corner case bugs, limit maximum size of the arguments with sufficient margin + public const int MAX_ARG_SIZE = 0xFFFFFF; + + public const int NUM_ARGUMENT_REGISTERS = 8; + public const int ARGUMENTREGISTERS_SIZE = NUM_ARGUMENT_REGISTERS * 8; + public const int ENREGISTERED_RETURNTYPE_MAXSIZE = 32; // bytes (four FP registers: d0,d1,d2 and d3) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE = 16; // bytes (two int registers: x0 and x1) + public const int ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE_PRIMITIVE = 8; + public const int ENREGISTERED_PARAMTYPE_MAXSIZE = 16; // bytes (max value type size that can be passed by value) + public const int STACK_ELEM_SIZE = 8; + public static int StackElemSize(int size) { return (((size) + STACK_ELEM_SIZE - 1) & ~(STACK_ELEM_SIZE - 1)); } + } #endif // @@ -392,6 +452,20 @@ public static unsafe int GetOffsetOfArgumentRegisters() { return sizeof(ReturnBlock); } +#elif TARGET_LOONGARCH64 + public ReturnBlock m_returnBlock; + public static unsafe int GetOffsetOfReturnValuesBlock() + { + return 0; + } + + public ArgumentRegisters m_argumentRegisters; + public static unsafe int GetOffsetOfArgumentRegisters() + { + return sizeof(ReturnBlock); + } + + public IntPtr m_alignmentPad; #else #error Portability problem #endif diff --git a/src/coreclr/nativeaot/Directory.Build.props b/src/coreclr/nativeaot/Directory.Build.props index b06c29baeb43e..c01756cfc8aba 100644 --- a/src/coreclr/nativeaot/Directory.Build.props +++ b/src/coreclr/nativeaot/Directory.Build.props @@ -89,6 +89,9 @@ TARGET_64BIT;TARGET_ARM64;$(DefineConstants) + + TARGET_64BIT;TARGET_LOONGARCH64;$(DefineConstants) + TARGET_WINDOWS;$(DefineConstants) diff --git a/src/coreclr/nativeaot/Runtime/CommonMacros.h b/src/coreclr/nativeaot/Runtime/CommonMacros.h index fe6a081bbecb9..be6556ba15351 100644 --- a/src/coreclr/nativeaot/Runtime/CommonMacros.h +++ b/src/coreclr/nativeaot/Runtime/CommonMacros.h @@ -119,6 +119,11 @@ inline bool IS_ALIGNED(T* val, uintptr_t alignment); #define LOG2_PTRSIZE 2 #define POINTER_SIZE 4 +#elif defined(HOST_LOONGARCH64) + +#define LOG2_PTRSIZE 3 +#define POINTER_SIZE 8 + #else #error Unsupported target architecture #endif diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 325128c4e01fc..fc5f4dcf48c9a 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -187,6 +187,22 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, pContext->Sp = pPalContext->SP; pContext->Lr = pPalContext->LR; pContext->Pc = pPalContext->IP; +#elif defined(HOST_LOONGARCH64) + pContext->R4 = pPalContext->R4; + pContext->R5 = pPalContext->R5; + pContext->R23 = pPalContext->R23; + pContext->R24 = pPalContext->R24; + pContext->R25 = pPalContext->R25; + pContext->R26 = pPalContext->R26; + pContext->R27 = pPalContext->R27; + pContext->R28 = pPalContext->R28; + pContext->R29 = pPalContext->R29; + pContext->R30 = pPalContext->R30; + pContext->R31 = pPalContext->R31; + pContext->Fp = pPalContext->FP; + pContext->Sp = pPalContext->SP; + pContext->Ra = pPalContext->RA; + pContext->Pc = pPalContext->IP; #elif defined(HOST_WASM) // No registers, no work to do yet #else @@ -195,7 +211,7 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, } FCIMPLEND -#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) +#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) struct DISPATCHER_CONTEXT { uintptr_t ControlPc; @@ -266,7 +282,7 @@ EXTERN_C int32_t RhpPInvokeExceptionGuard() } #endif -#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM) +#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM) || defined(HOST_LOONGARCH64) FCDECL2(void, RhpThrowHwEx, int exceptionCode, TADDR faultingIP); #else FCIMPL0(void, RhpThrowHwEx) @@ -328,7 +344,7 @@ EXTERN_C CODE_LOCATION RhpCheckedAssignRefEBPAVLocation; #endif EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation1; -#if !defined(HOST_ARM64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) EXTERN_C CODE_LOCATION RhpByRefAssignRefAVLocation2; #endif @@ -361,7 +377,7 @@ static bool InWriteBarrierHelper(uintptr_t faultingIP) (uintptr_t)&RhpCheckedAssignRefEBPAVLocation, #endif (uintptr_t)&RhpByRefAssignRefAVLocation1, -#if !defined(HOST_ARM64) +#if !defined(HOST_ARM64) && !defined(HOST_LOONGARCH64) (uintptr_t)&RhpByRefAssignRefAVLocation2, #endif }; @@ -443,6 +459,8 @@ static uintptr_t UnwindSimpleHelperToCaller( pContext->SetSp(sp+sizeof(uintptr_t)); // pop the stack #elif defined(HOST_ARM) || defined(HOST_ARM64) uintptr_t adjustedFaultingIP = pContext->GetLr(); +#elif defined(HOST_LOONGARCH64) + uintptr_t adjustedFaultingIP = pContext->GetRa(); #else uintptr_t adjustedFaultingIP = 0; // initializing to make the compiler happy PORTABILITY_ASSERT("UnwindSimpleHelperToCaller"); diff --git a/src/coreclr/nativeaot/Runtime/ICodeManager.h b/src/coreclr/nativeaot/Runtime/ICodeManager.h index dfc6e9efa915a..d1dbd47e51985 100644 --- a/src/coreclr/nativeaot/Runtime/ICodeManager.h +++ b/src/coreclr/nativeaot/Runtime/ICodeManager.h @@ -65,6 +65,27 @@ inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) return returnKind; } +#elif defined(TARGET_LOONGARCH64) +// Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back +C_ASSERT(PTFF_R4_IS_GCREF == ((uint64_t)GCRK_Object << 32)); +C_ASSERT(PTFF_R4_IS_BYREF == ((uint64_t)GCRK_Byref << 32)); +C_ASSERT(PTFF_R5_IS_GCREF == ((uint64_t)GCRK_Scalar_Obj << 32)); +C_ASSERT(PTFF_R5_IS_BYREF == ((uint64_t)GCRK_Scalar_Byref << 32)); + +inline uint64_t ReturnKindToTransitionFrameFlags(GCRefKind returnKind) +{ + // just need to report gc ref bits here. + // appropriate PTFF_SAVE_ bits will be added by the frame building routine. + return ((uint64_t)returnKind << 32); +} + +inline GCRefKind TransitionFrameFlagsToReturnKind(uint64_t transFrameFlags) +{ + GCRefKind returnKind = (GCRefKind)((transFrameFlags & (PTFF_R4_IS_GCREF | PTFF_R4_IS_BYREF | PTFF_R5_IS_GCREF | PTFF_R5_IS_BYREF)) >> 32); + ASSERT((returnKind == GCRK_Scalar) || ((transFrameFlags & PTFF_SAVE_R4) && (transFrameFlags & PTFF_SAVE_R5))); + return returnKind; +} + #elif defined(TARGET_AMD64) // Verify that we can use bitwise shifts to convert from GCRefKind to PInvokeTransitionFrameFlags and back diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index c5bbcc2284277..30a9489d4b4e7 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -334,6 +334,41 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) int64_t distToTarget = ((int64_t)pCode[0] << 38) >> 36; return (uint8_t *)pCode + distToTarget; } +#elif TARGET_LOONGARCH64 + uint32_t * pCode = (uint32_t *)pCodeOrg; + // is this "addi.d $a0, $a0, 8"? + if (pCode[0] == 0x02c02084) + { + // unboxing sequence + unboxingStub = true; + pCode++; + } + // is this an indirect jump? + // pcalau12i $t7, imm20; ld.d $t7, $t7, imm12; jirl $r0, $t7, 0 + if ((pCode[0] & 0xfe000000) == 0x1a000000 && + (pCode[1] & 0xffc00000) == 0x28c00000 && + (pCode[2] & 0xfc000000) == 0x4c000000) + { + // normal import stub - dist to IAT cell is relative to (PC & ~0xfff) + // pcalau12i: imm = SignExtend(imm20:Zeros(12), 64); + int64_t distToIatCell = ((((int64_t)pCode[0] & ~0x1f) << 39) >> 32); + // ld.d: offset = SignExtend(imm12, 64); + distToIatCell += (((int64_t)pCode[1] << 42) >> 52); + uint8_t ** pIatCell = (uint8_t **)(((int64_t)pCode & ~0xfff) + distToIatCell); + return *pIatCell; + } + // is this an unboxing stub followed by a relative jump? + // pcalau12i $r21, imm20; jirl $r0, $r21, imm16 + else if (unboxingStub && + (pCode[0] & 0xfe00001f) == 0x1a000015 && + (pCode[1] & 0xfc0003ff) == 0x4c0002a0) + { + // relative jump - dist is relative to the instruction + // offset = SignExtend(immhi10:immlo16:'00', 64); + int64_t distToTarget = ((((int64_t)pCode[0] & ~0x1f) << 39) >> 32); + distToTarget += ((((int64_t)pCode[1] & ~0x3ff) << 38) >> 46); + return (uint8_t *)(((int64_t)pCode & ~0xfff) + distToTarget); + } #else UNREFERENCED_PARAMETER(unboxingStub); PORTABILITY_ASSERT("RhGetCodeTarget"); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 9257324bd1589..ef96825c15e37 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -448,6 +448,110 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { } } CONTEXT, *PCONTEXT; +#elif defined(HOST_LOONGARCH64) + +#define CONTEXT_LOONGARCH64 0x00400000L + +#define CONTEXT_CONTROL (CONTEXT_LOONGARCH64 | 0x1L) +#define CONTEXT_INTEGER (CONTEXT_LOONGARCH64 | 0x2L) + +// Specify the number of breakpoints and watchpoints that the OS +// will track. Architecturally, LOONGARCH64 supports up to 16. In practice, +// however, almost no one implements more than 4 of each. + +#define LOONGARCH64_MAX_BREAKPOINTS 8 +#define LOONGARCH64_MAX_WATCHPOINTS 2 + +typedef struct _NEON128 { + uint64_t Low; + int64_t High; +} NEON128, *PNEON128; + +typedef struct DECLSPEC_ALIGN(16) _CONTEXT { + // + // Control flags. + // + uint32_t ContextFlags; + + // + // Integer registers + // + uint32_t Csr; // NZVF + DAIF + CurrentEL + SPSel + union { + struct { + uint64_t R0; + uint64_t R2; + uint64_t R4; + uint64_t R5; + uint64_t R6; + uint64_t R7; + uint64_t R8; + uint64_t R9; + uint64_t R10; + uint64_t R11; + uint64_t R12; + uint64_t R13; + uint64_t R14; + uint64_t R15; + uint64_t R16; + uint64_t R17; + uint64_t R18; + uint64_t R19; + uint64_t R20; + uint64_t R21; + uint64_t R23; + uint64_t R24; + uint64_t R25; + uint64_t R26; + uint64_t R27; + uint64_t R28; + uint64_t R29; + uint64_t R30; + uint64_t R31; +#pragma warning(push) +#pragma warning(disable:4201) // nameless struct + }; + uint64_t R[29]; + }; +#pragma warning(pop) + uint64_t Fp; // R22 + uint64_t Ra; // R1 + uint64_t Sp; // R3 + uint64_t Pc; + + // + // Floating Point/NEON Registers + // + NEON128 V[32]; + uint32_t Fpcr; + uint32_t Fpsr; + + // + // Debug registers + // + uint32_t Bcr[LOONGARCH64_MAX_BREAKPOINTS]; + uint64_t Bvr[LOONGARCH64_MAX_BREAKPOINTS]; + uint32_t Wcr[LOONGARCH64_MAX_WATCHPOINTS]; + uint64_t Wvr[LOONGARCH64_MAX_WATCHPOINTS]; + + void SetIp(uintptr_t ip) { Pc = ip; } + void SetArg0Reg(uintptr_t val) { R4 = val; } + void SetArg1Reg(uintptr_t val) { R5 = val; } + uintptr_t GetIp() { return Pc; } + uintptr_t GetRa() { return Ra; } + uintptr_t GetSp() { return Sp; } + + template + void ForEachPossibleObjectRef(F lambda) + { + for (uint64_t* pReg = &R0; pReg <= &R31; pReg++) + lambda((size_t*)pReg); + + // Ra can be used as a scratch register + lambda((size_t*)&Ra); + } +} CONTEXT, *PCONTEXT; + #elif defined(HOST_WASM) typedef struct DECLSPEC_ALIGN(8) _CONTEXT { diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index 29896b7f53ba8..d47a12c39fed8 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -91,6 +91,37 @@ struct PAL_LIMITED_CONTEXT uintptr_t GetLr() const { return LR; } void SetIp(uintptr_t ip) { IP = ip; } void SetSp(uintptr_t sp) { SP = sp; } +#elif defined(TARGET_LOONGARCH64) + uintptr_t FP; + uintptr_t RA; + + uintptr_t R4; + uintptr_t R5; + uintptr_t R23; + uintptr_t R24; + uintptr_t R25; + uintptr_t R26; + uintptr_t R27; + uintptr_t R28; + uintptr_t R29; + uintptr_t R30; + uintptr_t R31; + uintptr_t R2; + + uintptr_t SP; + uintptr_t IP; + + uint64_t F[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved + // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + + + uintptr_t GetIp() const { return IP; } + uintptr_t GetSp() const { return SP; } + uintptr_t GetFp() const { return FP; } + uintptr_t GetRa() const { return RA; } + void SetIp(uintptr_t ip) { IP = ip; } + void SetSp(uintptr_t sp) { SP = sp; } + #elif defined(UNIX_AMD64_ABI) // Param regs: rdi, rsi, rdx, rcx, r8, r9, scratch: rax, rdx (both return val), preserved: rbp, rbx, r12-r15 uintptr_t IP; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index ae073e57c7ecd..5155fe0b1e3a4 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -239,6 +239,54 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF m_HijackedReturnValueKind = retValueKind; } +#elif defined(TARGET_LOONGARCH64) + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_FramePointer); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_HOST_MEMBER_TADDR(PInvokeTransitionFrame, pFrame, m_RIP); + + ASSERT(!(pFrame->m_Flags & PTFF_SAVE_FP)); // FP should never contain a GC ref + + if (pFrame->m_Flags & PTFF_SAVE_R23) { m_RegDisplay.pR23 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R24) { m_RegDisplay.pR24 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R25) { m_RegDisplay.pR25 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R26) { m_RegDisplay.pR26 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R27) { m_RegDisplay.pR27 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R28) { m_RegDisplay.pR28 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R29) { m_RegDisplay.pR29 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R30) { m_RegDisplay.pR30 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R31) { m_RegDisplay.pR31 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_SP) { m_RegDisplay.SP = *pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_R0) { m_RegDisplay.pR0 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_RA) { m_RegDisplay.pRA = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R2) { m_RegDisplay.pR2 = pPreservedRegsCursor++; } + + if (pFrame->m_Flags & PTFF_SAVE_R4) { m_RegDisplay.pR4 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R5) { m_RegDisplay.pR5 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R6) { m_RegDisplay.pR6 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R7) { m_RegDisplay.pR7 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R8) { m_RegDisplay.pR8 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R9) { m_RegDisplay.pR9 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R10) { m_RegDisplay.pR10 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R11) { m_RegDisplay.pR11 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R12) { m_RegDisplay.pR12 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R13) { m_RegDisplay.pR13 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R14) { m_RegDisplay.pR14 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R15) { m_RegDisplay.pR15 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R16) { m_RegDisplay.pR16 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R17) { m_RegDisplay.pR17 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R18) { m_RegDisplay.pR18 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R19) { m_RegDisplay.pR19 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R20) { m_RegDisplay.pR20 = pPreservedRegsCursor++; } + if (pFrame->m_Flags & PTFF_SAVE_R21) { m_RegDisplay.pR21 = pPreservedRegsCursor++; } + + GCRefKind retValueKind = TransitionFrameFlagsToReturnKind(pFrame->m_Flags); + if (retValueKind != GCRK_Scalar) + { + m_pHijackedReturnValue = (PTR_OBJECTREF)m_RegDisplay.pR4; + m_HijackedReturnValueKind = retValueKind; + } + #else // TARGET_ARM if (pFrame->m_Flags & PTFF_SAVE_RBX) { m_RegDisplay.pRbx = pPreservedRegsCursor++; } if (pFrame->m_Flags & PTFF_SAVE_RSI) { m_RegDisplay.pRsi = pPreservedRegsCursor++; } @@ -423,6 +471,35 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO m_RegDisplay.pX1 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, X1); // TODO: Copy X2-X7 when we start supporting HVA's +#elif defined(TARGET_LOONGARCH64) + // + // preserved regs + // + m_RegDisplay.pR23 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R23); + m_RegDisplay.pR24 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R24); + m_RegDisplay.pR25 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R25); + m_RegDisplay.pR26 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R26); + m_RegDisplay.pR27 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R27); + m_RegDisplay.pR28 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R28); + m_RegDisplay.pR29 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R29); + m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R30); + m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R31); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, FP); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, RA); + + // + // preserved vfp regs + // + for (int32_t i = 0; i < 16 - 8; i++) + { + m_RegDisplay.F[i] = pCtx->F[i]; + } + // + // scratch regs + // + m_RegDisplay.pR4 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R4); + m_RegDisplay.pR5 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R5); + #elif defined(UNIX_AMD64_ABI) // // preserved regs @@ -611,6 +688,48 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R11); m_RegDisplay.pR12 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R12); m_RegDisplay.pLR = (PTR_uintptr_t)PTR_TO_REG(pCtx, Lr); + +#elif defined(TARGET_LOONGARCH64) + + // + // preserved regs + // + m_RegDisplay.pR23 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R23); + m_RegDisplay.pR24 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R24); + m_RegDisplay.pR25 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R25); + m_RegDisplay.pR26 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R26); + m_RegDisplay.pR27 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R27); + m_RegDisplay.pR28 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R28); + m_RegDisplay.pR29 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R29); + m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R30); + m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R31); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_REG(pCtx, Fp); + m_RegDisplay.pRA = (PTR_uintptr_t)PTR_TO_REG(pCtx, Ra); + + // + // scratch regs + // + m_RegDisplay.pR0 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R0); + m_RegDisplay.pR2 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R2); + m_RegDisplay.pR4 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R4); + m_RegDisplay.pR5 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R5); + m_RegDisplay.pR6 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R6); + m_RegDisplay.pR7 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R7); + m_RegDisplay.pR8 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R8); + m_RegDisplay.pR9 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R9); + m_RegDisplay.pR10 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R10); + m_RegDisplay.pR11 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R11); + m_RegDisplay.pR12 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R12); + m_RegDisplay.pR13 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R13); + m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R14); + m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R15); + m_RegDisplay.pR16 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R16); + m_RegDisplay.pR17 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R17); + m_RegDisplay.pR18 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R18); + m_RegDisplay.pR19 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R19); + m_RegDisplay.pR20 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R20); + m_RegDisplay.pR21 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R21); + #else PORTABILITY_ASSERT("StackFrameIterator::InternalInit"); #endif // TARGET_ARM @@ -731,6 +850,18 @@ void StackFrameIterator::UpdateFromExceptionDispatch(PTR_StackFrameIterator pSou m_RegDisplay.pX28 = thisFuncletPtrs.pX28; m_RegDisplay.pFP = thisFuncletPtrs.pFP; +#elif defined(TARGET_LOONGARCH64) + m_RegDisplay.pR23 = thisFuncletPtrs.pR23; + m_RegDisplay.pR24 = thisFuncletPtrs.pR24; + m_RegDisplay.pR25 = thisFuncletPtrs.pR25; + m_RegDisplay.pR26 = thisFuncletPtrs.pR26; + m_RegDisplay.pR27 = thisFuncletPtrs.pR27; + m_RegDisplay.pR28 = thisFuncletPtrs.pR28; + m_RegDisplay.pR29 = thisFuncletPtrs.pR29; + m_RegDisplay.pR30 = thisFuncletPtrs.pR30; + m_RegDisplay.pR31 = thisFuncletPtrs.pR31; + m_RegDisplay.pFP = thisFuncletPtrs.pFP; + #elif defined(UNIX_AMD64_ABI) // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. m_RegDisplay.pRbp = thisFuncletPtrs.pRbp; @@ -964,12 +1095,54 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pX27 = SP++; m_RegDisplay.pX28 = SP++; +#elif defined(TARGET_LOONGARCH64) + PTR_uint64_t f = (PTR_uint64_t)(m_RegDisplay.SP); + + for (int i = 0; i < 8; i++) + { + m_RegDisplay.F[i] = *f++; + } + + SP = (PTR_uintptr_t)f; + + if (!isFilterInvoke) + { + // RhpCallCatchFunclet puts a couple of extra things on the stack that aren't put there by the other two + // thunks, but we don't need to know what they are here, so we just skip them. + SP += EQUALS_RETURN_ADDRESS(m_ControlPC, RhpCallCatchFunclet2) ? 6 : 4; + // Save the preserved regs portion of the REGDISPLAY across the unwind through the C# EH dispatch code. + m_funcletPtrs.pR23 = m_RegDisplay.pR23; + m_funcletPtrs.pR24 = m_RegDisplay.pR24; + m_funcletPtrs.pR25 = m_RegDisplay.pR25; + m_funcletPtrs.pR26 = m_RegDisplay.pR26; + m_funcletPtrs.pR27 = m_RegDisplay.pR27; + m_funcletPtrs.pR28 = m_RegDisplay.pR28; + m_funcletPtrs.pR29 = m_RegDisplay.pR29; + m_funcletPtrs.pR30 = m_RegDisplay.pR30; + m_funcletPtrs.pR31 = m_RegDisplay.pR31; + m_funcletPtrs.pFP = m_RegDisplay.pFP; + } + + m_RegDisplay.pFP = SP++; + + m_RegDisplay.SetIP(*SP++); + + m_RegDisplay.pR23 = SP++; + m_RegDisplay.pR24 = SP++; + m_RegDisplay.pR25 = SP++; + m_RegDisplay.pR26 = SP++; + m_RegDisplay.pR27 = SP++; + m_RegDisplay.pR28 = SP++; + m_RegDisplay.pR29 = SP++; + m_RegDisplay.pR30 = SP++; + m_RegDisplay.pR31 = SP++; + #else SP = (PTR_uintptr_t)(m_RegDisplay.SP); ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif -#if !defined(TARGET_ARM64) +#if !defined(TARGET_ARM64) && !defined(TARGET_LOONGARCH64) m_RegDisplay.SetIP(PCODEToPINSTR(*SP++)); #endif @@ -1104,6 +1277,30 @@ struct UniversalTransitionStackFrame { pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); } + +#elif defined(TARGET_LOONGARCH64) + + // Conservative GC reporting must be applied to everything between the base of the + // ReturnBlock and the top of the StackPassedArgs. +private: + uintptr_t m_pushedFP; // ChildSP+000 CallerSP-100 (0x08 bytes) (fp) + uintptr_t m_pushedRA; // ChildSP+008 CallerSP-0F8 (0x08 bytes) (ra) + Fp128 m_fpArgRegs[8]; // ChildSP+010 CallerSP-0F0 (0x80 bytes) (q0-q7) + uintptr_t m_returnBlock[4]; // ChildSP+090 CallerSP-070 (0x40 bytes) + uintptr_t m_intArgRegs[9]; // ChildSP+0B0 CallerSP-050 (0x48 bytes) (x0-x8) + uintptr_t m_alignmentPad; // ChildSP+0F8 CallerSP-008 (0x08 bytes) + uintptr_t m_stackPassedArgs[1]; // ChildSP+100 CallerSP+000 (unknown size) + +public: + PTR_uintptr_t get_CallerSP() { return GET_POINTER_TO_FIELD(m_stackPassedArgs[0]); } + PTR_uintptr_t get_AddressOfPushedCallerIP() { return GET_POINTER_TO_FIELD(m_pushedRA); } + PTR_uintptr_t get_LowerBoundForConservativeReporting() { return GET_POINTER_TO_FIELD(m_returnBlock[0]); } + + void UnwindNonVolatileRegisters(REGDISPLAY * pRegisterSet) + { + pRegisterSet->pFP = GET_POINTER_TO_FIELD(m_pushedFP); + } + #elif defined(TARGET_WASM) private: // WASMTODO: #error NYI for this arch @@ -1175,6 +1372,8 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() #define STACK_ALIGN_SIZE 16 #elif defined(TARGET_X86) #define STACK_ALIGN_SIZE 4 +#elif defined(TARGET_LOONGARCH64) +#define STACK_ALIGN_SIZE 16 #elif defined(TARGET_WASM) #define STACK_ALIGN_SIZE 4 #endif @@ -1240,6 +1439,17 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pRdi = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, Rdi); m_RegDisplay.pRsi = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, Rsi); m_RegDisplay.pRbx = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, Rbx); +#elif defined(TARGET_LOONGARCH64) + m_RegDisplay.pR23 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R23); + m_RegDisplay.pR24 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R24); + m_RegDisplay.pR25 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R25); + m_RegDisplay.pR26 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R26); + m_RegDisplay.pR27 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R27); + m_RegDisplay.pR28 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R28); + m_RegDisplay.pR29 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R29); + m_RegDisplay.pR30 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R30); + m_RegDisplay.pR31 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R31); + m_RegDisplay.pFP = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, FP); #else ASSERT_UNCONDITIONALLY("NYI for this arch"); #endif diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index cf7f524de8dbb..7eb6351f4dbe3 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -179,6 +179,17 @@ class StackFrameIterator PTR_uintptr_t pX27; PTR_uintptr_t pX28; PTR_uintptr_t pFP; +#elif defined(TARGET_LOONGARCH64) + PTR_uintptr_t pR23; + PTR_uintptr_t pR24; + PTR_uintptr_t pR25; + PTR_uintptr_t pR26; + PTR_uintptr_t pR27; + PTR_uintptr_t pR28; + PTR_uintptr_t pR29; + PTR_uintptr_t pR30; + PTR_uintptr_t pR31; + PTR_uintptr_t pFP; #elif defined(UNIX_AMD64_ABI) PTR_uintptr_t pRbp; PTR_uintptr_t pRbx; diff --git a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp index d22f30e19d9e0..c8f91a07a2819 100644 --- a/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp +++ b/src/coreclr/nativeaot/Runtime/ThunksMapping.cpp @@ -22,6 +22,8 @@ #define THUNK_SIZE 20 #elif TARGET_ARM64 #define THUNK_SIZE 16 +#elif TARGET_LOONGARCH64 +#define THUNK_SIZE 16 #else #define THUNK_SIZE (2 * OS_PAGE_SIZE) // This will cause RhpGetNumThunksPerBlock to return 0 #endif @@ -231,6 +233,28 @@ EXTERN_C void* QCALLTYPE RhAllocateThunksMapping() *((uint32_t*)pCurrentThunkAddress) = 0xD43E0000; pCurrentThunkAddress += 4; + +#elif TARGET_LOONGARCH64 + + //pcaddi $t7, + //pcaddi $t8, - + //ld.d $t8, $t8, + //jirl $r0, $t8, 0 + + int delta = (int)(pCurrentDataAddress - pCurrentThunkAddress); + *((uint32_t*)pCurrentThunkAddress) = 0x18000013 | (((delta & 0x3FFFFC) >> 2) << 5); + pCurrentThunkAddress += 4; + + delta += OS_PAGE_SIZE - POINTER_SIZE - (i * POINTER_SIZE * 2) - 4; + *((uint32_t*)pCurrentThunkAddress) = 0x18000014 | (((delta & 0x3FFFFC) >> 2) << 5); + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x28C00294; + pCurrentThunkAddress += 4; + + *((uint32_t*)pCurrentThunkAddress) = 0x4C000280; + pCurrentThunkAddress += 4; + #else UNREFERENCED_PARAMETER(pCurrentDataAddress); UNREFERENCED_PARAMETER(pCurrentThunkAddress); diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 2cb9445144c22..450df71d5a889 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -56,7 +56,7 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) { FireEtwGCRestartEEBegin_V1(GetClrInstanceId()); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) // Flush the store buffers on all CPUs, to ensure that they all see changes made // by the GC threads. This only matters on weak memory ordered processors as // the strong memory ordered processors wouldn't have reordered the relevant reads. diff --git a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h index 0ef7f5e8a84f7..ece8ae50b379e 100644 --- a/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h +++ b/src/coreclr/nativeaot/Runtime/inc/TargetPtrs.h @@ -13,6 +13,8 @@ typedef uint32_t UIntTarget; typedef uint64_t UIntTarget; #elif defined(TARGET_WASM) typedef uint32_t UIntTarget; +#elif defined(TARGET_LOONGARCH64) +typedef uint64_t UIntTarget; #else #error unexpected target architecture #endif diff --git a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h index f0ebc5b7a7e50..f72ff28caf001 100644 --- a/src/coreclr/nativeaot/Runtime/inc/rhbinder.h +++ b/src/coreclr/nativeaot/Runtime/inc/rhbinder.h @@ -338,6 +338,67 @@ enum PInvokeTransitionFrameFlags : uint64_t PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition }; +#elif defined(TARGET_LOONGARCH64) +enum PInvokeTransitionFrameFlags : uint64_t +{ + // NOTE: Keep in sync with src\coreclr\nativeaot\Runtime\loongarch64\AsmMacros.h + + // NOTE: The order in which registers get pushed in the PInvokeTransitionFrame's m_PreservedRegs list has + // to match the order of these flags (that's also the order in which they are read in StackFrameIterator.cpp + + // standard preserved registers + PTFF_SAVE_R23 = 0x0000000000000001, + PTFF_SAVE_R24 = 0x0000000000000002, + PTFF_SAVE_R25 = 0x0000000000000004, + PTFF_SAVE_R26 = 0x0000000000000008, + PTFF_SAVE_R27 = 0x0000000000000010, + PTFF_SAVE_R28 = 0x0000000000000020, + PTFF_SAVE_R29 = 0x0000000000000040, + PTFF_SAVE_R30 = 0x0000000000000080, + PTFF_SAVE_R31 = 0x0000000000000100, + + PTFF_SAVE_SP = 0x0000000000000200, // Used for 'coop pinvokes' in runtime helper routines. Methods with + // PInvokes are required to have a frame pointers, but methods which + // call runtime helpers are not. Therefore, methods that call runtime + // helpers may need SP to seed the stackwalk. + + // Scratch registers + PTFF_SAVE_R0 = 0x0000000000000400, + PTFF_SAVE_R2 = 0x0000000000000800, + PTFF_SAVE_R4 = 0x0000000000001000, + PTFF_SAVE_R5 = 0x0000000000002000, + PTFF_SAVE_R6 = 0x0000000000004000, + PTFF_SAVE_R7 = 0x0000000000008000, + PTFF_SAVE_R8 = 0x0000000000010000, + PTFF_SAVE_R9 = 0x0000000000020000, + PTFF_SAVE_R10 = 0x0000000000040000, + PTFF_SAVE_R11 = 0x0000000000080000, + PTFF_SAVE_R12 = 0x0000000000100000, + PTFF_SAVE_R13 = 0x0000000000200000, + PTFF_SAVE_R14 = 0x0000000000400000, + PTFF_SAVE_R15 = 0x0000000000800000, + PTFF_SAVE_R16 = 0x0000000001000000, + PTFF_SAVE_R17 = 0x0000000002000000, + PTFF_SAVE_R18 = 0x0000000004000000, + PTFF_SAVE_R19 = 0x0000000008000000, + PTFF_SAVE_R20 = 0x0000000010000000, + PTFF_SAVE_R21 = 0x0000000020000000, + + PTFF_SAVE_FP = 0x0000000040000000, // should never be used, we require FP frames for methods with + // pinvoke and it is saved into the frame pointer field instead + + PTFF_SAVE_RA = 0x0000000080000000, // this is useful for the case of loop hijacking where we need both + // a return address pointing into the hijacked method and that method's + // ra register, which may hold a gc pointer + + // used by hijack handler to report return value of hijacked method + PTFF_R4_IS_GCREF = 0x0000000100000000, + PTFF_R4_IS_BYREF = 0x0000000200000000, + PTFF_R5_IS_GCREF = 0x0000000400000000, + PTFF_R5_IS_BYREF = 0x0000000800000000, + + PTFF_THREAD_ABORT = 0x0000001000000000, // indicates that ThreadAbortException should be thrown when returning from the transition +}; #else // TARGET_ARM enum PInvokeTransitionFrameFlags @@ -412,6 +473,8 @@ struct PInvokeTransitionFrame // can be an invalid pointer in universal transition cases (which never need to call GetThread) #ifdef TARGET_ARM64 uint64_t m_Flags; // PInvokeTransitionFrameFlags +#elif TARGET_LOONGARCH64 + uint64_t m_Flags; // PInvokeTransitionFrameFlags #else uint32_t m_Flags; // PInvokeTransitionFrameFlags #endif @@ -436,6 +499,8 @@ struct PInvokeTransitionFrame #define OFFSETOF__Thread__m_pTransitionFrame 0x40 #elif defined(TARGET_ARM64) #define OFFSETOF__Thread__m_pTransitionFrame 0x40 +#elif defined(TARGET_LOONGARCH64) +#define OFFSETOF__Thread__m_pTransitionFrame 0x40 #elif defined(TARGET_X86) #define OFFSETOF__Thread__m_pTransitionFrame 0x2c #elif defined(TARGET_ARM) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S new file mode 100644 index 0000000000000..a43b77c6ee959 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S @@ -0,0 +1,276 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +// GC type flags +GC_ALLOC_FINALIZE = 1 + +// +// Rename fields of nested structs +// +OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr +OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit + + + +// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's +// allocation context then automatically fallback to the slow allocation path. +// $a0 == MethodTable + LEAF_ENTRY RhpNewFast, _TEXT + + // a1 = GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD $a1 +#endif + + // + // a0 contains MethodTable pointer + // + ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize + + // + // a0: MethodTable pointer + // a1: Thread pointer + // a2: base size + // + + // Load potential new object address into t3. + ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t4, $a2, RhpNewFast_RarePath + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + + ori $a0, $t3, 0 + jirl $r0, $ra, 0 + +RhpNewFast_RarePath: + ori $a1, $zero, 0 + b RhpNewObject + LEAF_END RhpNewFast, _TEXT + +// Allocate non-array object with finalizer. +// a0 == MethodTable + LEAF_ENTRY RhpNewFinalizable, _TEXT + ori $a1, $zero, GC_ALLOC_FINALIZE + b RhpNewObject + LEAF_END RhpNewFinalizable, _TEXT + +// Allocate non-array object. +// a0 == MethodTable +// a1 == alloc flags + NESTED_ENTRY RhpNewObject, _TEXT, NoHandler + + PUSH_COOP_PINVOKE_FRAME $a3 + + // a3: transition frame + + // Preserve the MethodTable in s0 + ori $s0, $a0, 0 + + addi.w $a2, $zero, 0 // numElements + + // Call the rest of the allocation helper. + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + bl C_FUNC(RhpGcAlloc) + + // Set the new objects MethodTable pointer on success. + beq $a0, $zero, NewOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state +NewOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + ori $a0, $s0, 0 // MethodTable pointer + ori $a1, $zero, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewObject, _TEXT + +// Allocate a string. +// a0 == MethodTable +// a1 == element/character count + LEAF_ENTRY RhNewString, _TEXT + // Make sure computing the overall allocation size wont overflow + lu12i.w $a2, 0x3ffff // (MAX_STRING_LENGTH >> 12) & 0xFFFFF + ori $a2, $a2, 0xfdf // MAX_STRING_LENGTH & 0xFFF + bltu $a2, $a1, StringSizeOverflow + + // Compute overall allocation size (align(base size + (element size * elements), 8)). + addi.w $a2, $zero, STRING_COMPONENT_SIZE + ori $a3, $zero, 29 // STRING_BASE_SIZE + 7 + mulw.d.w $a2, $a1, $a2 + add.d $a2, $a2, $a3 // a2 = (a1[31:0] * a2[31:0])[64:0] + a3 + srli.d $a2, $a2, 3 + slli.d $a2, $a2, 3 + + // a0 == MethodTable + // a1 == element count + // a2 == string size + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_3 +#else + INLINE_GETTHREAD $a3 +#endif + + // Load potential new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t3, $a2, RhNewString_Rare + + // Reload new object address into r12. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer and element count. + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + st.d $a1, $t3, OFFSETOF__Array__m_Length + + // Return the object allocated in a0. + ori $a0, $t3, 0 + + jirl $r0, $ra, 0 + +StringSizeOverflow: + // We get here if the length of the final string object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an OOM exception that the caller of this allocator understands. + + // a0 holds MethodTable pointer already + ori $a1, $zero, 1 // Indicate that we should throw OverflowException + b C_FUNC(RhExceptionHandling_FailedAllocation) + +RhNewString_Rare: + b C_FUNC(RhpNewArrayRare) + LEAF_END RhNewString, _Text + +// Allocate one dimensional, zero based array (SZARRAY). +// $a0 == MethodTable +// $a1 == element count + LEAF_ENTRY RhpNewArray, _Text + + // We want to limit the element count to the non-negative 32-bit int range. + // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component + // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst + // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. + lu12i.w $a2, 0x7ffff + ori $a2, $a2, 0xfff + bltu $a2, $a1, ArraySizeOverflow + + ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize + mulw.d.w $a2, $a1, $a2 + ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize + add.d $a2, $a2, $a3 + addi.d $a2, $a2, 7 + srli.d $a2, $a2, 3 + slli.d $a2, $a2, 3 + // a0 == MethodTable + // a1 == element count + // a2 == array size + + INLINE_GETTHREAD $a3 + + // Load potential new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Determine whether the end of the object would lie outside of the current allocation context. If so, + // we abandon the attempt to allocate the object directly and fall back to the slow helper. + add.d $a2, $a2, $t3 + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit + bltu $t3, $a2, RhpNewArray_Rare + + // Reload new object address into t3. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Update the alloc pointer to account for the allocation. + st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + + // Set the new objects MethodTable pointer and element count. + st.d $a0, $t3, OFFSETOF__Object__m_pEEType + st.d $a1, $t3, OFFSETOF__Array__m_Length + + // Return the object allocated in r0. + ori $a0, $t3, 0 + + jirl $r0, $ra, 0 + +ArraySizeOverflow: + // We get here if the size of the final array object can not be represented as an unsigned + // 32-bit value. We are going to tail-call to a managed helper that will throw + // an overflow exception that the caller of this allocator understands. + + // $a0 holds MethodTable pointer already + ori $a1, $zero, 1 // Indicate that we should throw OverflowException + b C_FUNC(RhExceptionHandling_FailedAllocation) + +RhpNewArray_Rare: + b C_FUNC(RhpNewArrayRare) + LEAF_END RhpNewArray, _TEXT + +// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. +// a0 == MethodTable +// a1 == element count +// a2 == array size + Thread::m_alloc_context::alloc_ptr +// a3 == Thread + NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler + + // Recover array size by subtracting the alloc_ptr from a2. + ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr + sub.d $a2, $a2, $t3 + + PUSH_COOP_PINVOKE_FRAME $a3 + + // Preserve data we will need later into the callee saved registers + ori $s0, $a0, 0 // Preserve MethodTable + + ori $a2, $a1, 0 // numElements + ori $a1, $zero, 0 // uFlags + + // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) + bl C_FUNC(RhpGcAlloc) + + // Set the new objects MethodTable pointer and length on success. + beq $a0, $zero, ArrayOutOfMemory + + .cfi_remember_state + POP_COOP_PINVOKE_FRAME + EPILOG_RETURN + + .cfi_restore_state +ArrayOutOfMemory: + // This is the OOM failure path. We are going to tail-call to a managed helper that will throw + // an out of memory exception that the caller of this allocator understands. + + ori $a0, $s0, 0 // MethodTable Pointer + ori $a1, $zero, 0 // Indicate that we should throw OOM. + + POP_COOP_PINVOKE_FRAME + b C_FUNC(RhExceptionHandling_FailedAllocation) + + NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h new file mode 100644 index 0000000000000..0724e0f86fcff --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h @@ -0,0 +1,67 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// +// This file is used by AsmOffsets.h to validate that our +// assembly-code offsets always match their C++ counterparts. +// +// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix + +PLAT_ASM_SIZEOF(280, ExInfo) +PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) +PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) +PLAT_ASM_OFFSET(10, ExInfo, m_exception) +PLAT_ASM_OFFSET(18, ExInfo, m_kind) +PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) +PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) +PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) +PLAT_ASM_OFFSET(278, ExInfo, m_notifyDebuggerSP) + +PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) +PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) +PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) +PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) +PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) + +PLAT_ASM_SIZEOF(258, StackFrameIterator) +PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) +PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) +PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) +PLAT_ASM_OFFSET(248, StackFrameIterator, m_OriginalControlPC) +PLAT_ASM_OFFSET(250, StackFrameIterator, m_pPreviousTransitionFrame) + +PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT) + +PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) +PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) +PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4) +PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5) +PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R23) +PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R24) +PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R25) +PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R26) +PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R27) +PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R28) +PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R29) +PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R30) +PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R31) +PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R2) +PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) +PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP) + +PLAT_ASM_SIZEOF(148, REGDISPLAY) +PLAT_ASM_OFFSET(18, REGDISPLAY, SP) + +PLAT_ASM_OFFSET(b8, REGDISPLAY, pR23) +PLAT_ASM_OFFSET(c0, REGDISPLAY, pR24) +PLAT_ASM_OFFSET(c8, REGDISPLAY, pR25) +PLAT_ASM_OFFSET(d0, REGDISPLAY, pR26) +PLAT_ASM_OFFSET(d8, REGDISPLAY, pR27) +PLAT_ASM_OFFSET(e0, REGDISPLAY, pR28) +PLAT_ASM_OFFSET(e8, REGDISPLAY, pR29) +PLAT_ASM_OFFSET(f0, REGDISPLAY, pR30) +PLAT_ASM_OFFSET(f8, REGDISPLAY, pR31) +PLAT_ASM_OFFSET(10, REGDISPLAY, pR2) +PLAT_ASM_OFFSET(b0, REGDISPLAY, pFP) +PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) +PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S new file mode 100644 index 0000000000000..2b60eaa4c225f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S @@ -0,0 +1,831 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) + +#define HARDWARE_EXCEPTION 1 +#define SOFTWARE_EXCEPTION 0 + +.global RhpTrapThreads + +// ----------------------------------------------------------------------------- +// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) + .macro ALLOC_THROW_FRAME exceptionType + + ori $a3, $sp, 0 + + // Setup a PAL_LIMITED_CONTEXT on the stack { + .if \exceptionType == HARDWARE_EXCEPTION + addi.d $sp, $sp, -80 + .cfi_adjust_cfa_offset 0x50 + st.d $a3, $sp, 0 // a3 is the SP and a1 is the IP of the fault site + st.d $a1, $sp, 8 + .else + PROLOG_STACK_ALLOC 0x50 + .cfi_adjust_cfa_offset 0x50 + st.d $a3, $sp, 0 // a3 is the SP and ra is the IP of the fault site + st.d $ra, $sp, 8 + .endif + fst.d $f24, $sp, 0x10 + fst.d $f25, $sp, 0x18 + fst.d $f26, $sp, 0x20 + fst.d $f27, $sp, 0x28 + fst.d $f28, $sp, 0x30 + fst.d $f29, $sp, 0x38 + fst.d $f30, $sp, 0x40 + fst.d $f31, $sp, 0x48 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -0x70 + st.d $zero, $sp, 0x10 // locations reserved for return value, not used for exception handling + st.d $zero, $sp, 0x18 + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + // } end PAL_LIMITED_CONTEXT + + PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo + .endm + +// ----------------------------------------------------------------------------- +// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers + .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize + + // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,ra, #-60! + // is intentional. Above statement would also emit instruction to save + // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body + // of method. However, this method needs to be able to change fp before calling funclet. + // This is required to access locals in funclet. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, -0x60 + PROLOG_SAVE_REG_PAIR 23, 24, 0x10 + PROLOG_SAVE_REG_PAIR 25, 26, 0x20 + PROLOG_SAVE_REG_PAIR 27, 28, 0x30 + PROLOG_SAVE_REG_PAIR 29, 30, 0x40 + PROLOG_SAVE_REG_PAIR 31, 2, 0x50 + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 //fp + + .if \extraStackSize != 0 + PROLOG_STACK_ALLOC \extraStackSize + .endif + .endm + +// ----------------------------------------------------------------------------- +// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) +// extraStackSize - extra stack space that the user of the macro can use to +// store additional registers. +// It needs to match the value passed to the corresponding +// ALLOC_CALL_FUNCLET_FRAME. + .macro FREE_CALL_FUNCLET_FRAME extraStackSize + + .if \extraStackSize != 0 + EPILOG_STACK_FREE \extraStackSize + .endif + + EPILOG_RESTORE_REG_PAIR 23, 24, 0x10 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x20 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x30 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x40 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x50 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x60 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to restore preserved general purpose and FP registers from REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro RESTORE_PRESERVED_REGISTERS regdisplayReg + + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + ld.d $s0, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + ld.d $s1, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + ld.d $s2, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + ld.d $s3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + ld.d $s4, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + ld.d $s5, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + ld.d $s6, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + ld.d $s7, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + ld.d $s8, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + ld.d $fp, $t3, 0 + // + // load FP preserved regs + // + addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fld.d $f24, $t3, 0x00 + fld.d $f25, $t3, 0x08 + fld.d $f26, $t3, 0x10 + fld.d $f27, $t3, 0x18 + fld.d $f28, $t3, 0x20 + fld.d $f29, $t3, 0x28 + fld.d $f30, $t3, 0x30 + fld.d $f31, $t3, 0x38 + .endm + +// ----------------------------------------------------------------------------- +// Macro used to save preserved general purpose and FP registers to REGDISPLAY +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro SAVE_PRESERVED_REGISTERS regdisplayReg + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + st.d $s0, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + st.d $s1, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + st.d $s2, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + st.d $s3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + st.d $s4, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + st.d $s5, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + st.d $s6, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + st.d $s7, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + st.d $s8, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + st.d $fp, $t3, 0 + // + // store vfp preserved regs + // + addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F + fst.d $f24, $t3, 0x00 + fst.d $f25, $t3, 0x08 + fst.d $f26, $t3, 0x10 + fst.d $f27, $t3, 0x18 + fst.d $f28, $t3, 0x20 + fst.d $f29, $t3, 0x28 + fst.d $f30, $t3, 0x30 + fst.d $f31, $t3, 0x38 + .endm + + +// ----------------------------------------------------------------------------- +// Macro used to thrash preserved general purpose registers in REGDISPLAY +// to make sure nobody uses them +// regdisplayReg - register pointing to the REGDISPLAY structure + .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg + +#if _DEBUG + lu12i.w $a3, 0xbaadd + ori $a3, $a3, 0xeed + lu32i.d $a3, 0xddeed + lu52i.d $a3, $a3, 0xbaa + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 + st.d $a3, $t3, 0 + ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP + st.d $a3, $t3, 0 +#endif // _DEBUG + .endm + +.macro GetThreadX2 + st.d $a0, $sp, -16 + st.d $a1, $sp, -8 + addi.d $sp, $sp, -16 + bl C_FUNC(RhpGetThread) + ori $a2, $a0, 0 + ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 + addi.d $sp, $sp, 16 +.endm + +#define rsp_offsetof_ExInfo 0 +#define rsp_offsetof_Context STACKSIZEOF_ExInfo + +// +// RhpThrowHwEx +// +// INPUT: a0[31:0]: exception code of fault +// a1: faulting IP +// +// OUTPUT: +// + NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME HARDWARE_EXCEPTION + + GetThreadX2 + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + lu12i.w $a3, -1 + ori $a3, $a3, 0xfff + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + ori $a3, $zero, 2 + st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.HardwareFault + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0[31:0]: exception code + // a1: ExInfo* + bl C_FUNC(RhThrowHwEx) + + ALTERNATE_ENTRY RhpThrowHwEx2 + + // no return + EMIT_BREAKPOINT + + NESTED_END RhpThrowHwEx, _TEXT + +// +// RhpThrowEx +// +// INPUT: a0: exception object +// +// OUTPUT: +// + + NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadX2 + + // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return + // address could have been hijacked when we were in that C# code and we must remove the hijack and + // reflect the correct return address in our exception context record. The other throw helpers don't + // need this because they cannot be tail-called from C#. + + // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location + // where the tail-calling thread had saved RA, which may not match where we have saved RA. + + ld.d $a1, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + beq $a1, $zero, NotHijacked + + ld.d $a3, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + + // a0: exception object + // a1: hijacked return address + // a2: pThread + // a3: hijacked return address location + + addi.d $t3, $sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite + bltu $a3, $t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) + + // normal case where a valid return address location is hijacked + st.d $a1, $a3, 0 + b ClearThreadState + +TailCallWasHijacked: + + // Abnormal case where the return address location is now invalid because we ended up here via a tail + // call. In this case, our hijacked return address should be the correct caller of this method. + + // stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. + ori $ra, $a1, 0 + st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA) + st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP) + +ClearThreadState: + + // clear the Thread's hijack state + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + +NotHijacked: + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + lu12i.w $a3, -1 + ori $a3, $a3, 0xfff + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.Throw + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0: exception object + // a1: ExInfo* + bl C_FUNC(RhThrowEx) + + ALTERNATE_ENTRY RhpThrowEx2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpThrowEx, _TEXT + + +// +// void FASTCALL RhpRethrow() +// +// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo +// +// INPUT: +// +// OUTPUT: +// + + NESTED_ENTRY RhpRethrow, _TEXT, NoHandler + + ALLOC_THROW_FRAME SOFTWARE_EXCEPTION + + GetThreadX2 + + addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* + st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null + st.b $zero, $a1, OFFSETOF__ExInfo__m_kind // init to a deterministic value (ExKind.None) + ori $a3, $zero, 1 + st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 + lu12i.w $a3, -1 + ori $a3, $a3, 0xfff + st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx + + // link the ExInfo into the thread's ExInfo chain + ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead + ori $a0, $a3, 0 // a0 <- current ExInfo + st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead + st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo + + // set the exception context field on the ExInfo + addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* + st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext + + // a0 contains the currently active ExInfo + // a1 contains the address of the new ExInfo + bl C_FUNC(RhRethrow) + + ALTERNATE_ENTRY RhpRethrow2 + + // no return + EMIT_BREAKPOINT + NESTED_END RhpRethrow, _TEXT + +// +// void* FASTCALL RhpCallCatchFunclet(OBJECTREF exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo) +// +// INPUT: a0: exception object +// a1: handler funclet address +// a2: REGDISPLAY* +// a3: ExInfo* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x70 // Size needs to be equal with ExceptionHandling.asm variant of this function + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + st.d $a0, $sp, 0x40 // a0 to a3 are stored to restore them anytime + st.d $a1, $sp, 0x48 + st.d $a2, $sp, 0x50 + st.d $a3, $sp, 0x58 + st.d $zero, $sp, 0x60 // $zero makes space for the local "is_not_handling_thread_abort"; last qword will store the thread obj + +#define rsp_offset_is_not_handling_thread_abort 0x60 +#define rsp_offset_a0 0x40 +#define rsp_offset_a1 0x48 +#define rsp_offset_a2 0x50 +#define rsp_offset_a3 0x58 +#define rsp_CatchFunclet_offset_thread 0x68 + + // + // clear the DoNotTriggerGc flag, trashes a4-a6 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_CatchFunclet_offset_thread + ori $a5, $a0, 0 + ld.d $a0, $sp, 0x40 + ld.d $a1, $sp, 0x48 + ld.d $a2, $sp, 0x50 + ld.d $a3, $sp, 0x58 + + ld.d $a4, $a5, OFFSETOF__Thread__m_threadAbortException + sub.d $a4, $a4, $a0 + st.d $a4, $sp, rsp_offset_is_not_handling_thread_abort // Non-zero if the exception is not ThreadAbortException + + addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Catch: //TODO-LOONGARCH64: change ld/st to atomic instructions. + ld.w $a4, $t3, 0 + bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc + st.w $a4, $t3, 0 + ori $a6, $zero, 0 + beq $a6, $zero, ClearSuccess_Catch + b ClearRetry_Catch +ClearSuccess_Catch: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a2 + + // + // call the funclet + // + // a0 still contains the exception object + jirl $ra, $a1, 0 + + ALTERNATE_ENTRY RhpCallCatchFunclet2 + + // $a0 contains resume IP + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + st.d $a0, $sp, rsp_offset_a0 + + SAVE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_CatchFunclet_offset_thread // a0 <- Thread* + ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + bl C_FUNC(RhpValidateExInfoPop) + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_offset_a0 // reload resume IP +#endif + + ld.d $a1, $sp, rsp_CatchFunclet_offset_thread + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 + + ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + +PopExInfoLoop: + ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo + beq $a3, $zero, DonePopping // if (pExInfo == null) { we're done } + blt $a3, $a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +DonePopping: + st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 + + andi $t7, $a3, TrapThreadsFlags_AbortInProgress_Bit + beq $t7, $zero, NoAbort + + ld.d $a3, $sp, rsp_offset_is_not_handling_thread_abort + bne $a3, $zero, NoAbort + + // It was the ThreadAbortException, so rethrow it + // reset SP + ori $a1, $a0, 0 // a1 <- continuation address as exception PC + addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + ori $sp, $a2, 0 + b C_FUNC(RhpThrowHwEx) + +NoAbort: + // reset SP and jump to continuation address + ori $sp, $a2, 0 + jirl $r0, $a0, 0 + +#undef rsp_offset_is_not_handling_thread_abort +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CatchFunclet_offset_thread + + NESTED_END RhpCallCatchFunclet, _Text + +// +// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: handler funclet address +// a1: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler + + ALLOC_CALL_FUNCLET_FRAME 0x60 // Size needs to be equal with ExceptionHandling.asm variant of this function + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + st.d $a0, $sp, 0x40 // a0 and a1 are saved so we have them later + st.d $a1, $sp, 0x48 + +#define rsp_offset_a1 0x48 +#define rsp_FinallyFunclet_offset_thread 0x50 + + + // We want to suppress hijacking between invocations of subsequent finallys. We do this because we + // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the + // method) and then been popped off the stack, leaving behind no trace of its effect. + // + // So we clear the state before and set it after invocation of the handler. + // + + // + // clear the DoNotTriggerGc flag, trashes a2-a4 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_FinallyFunclet_offset_thread + ori $a2, $a0, 0 + ld.d $a0, $sp, 0x40 + ld.d $a1, $sp, 0x48 + + addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry: //TODO-LOONGARCH64: change ld/st to atomic instructions. + ld.w $a4, $t3, 0 + bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc + st.w $a4, $t3, 0 + ori $a3, $zero, 0 + beq $a3, $zero, ClearSuccess + b ClearRetry +ClearSuccess: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a1 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a1 + + // + // call the funclet + // + jirl $ra, $a0, 0 + + ALTERNATE_ENTRY RhpCallFinallyFunclet2 + + ld.d $a1, $sp, rsp_offset_a1 // reload REGDISPLAY pointer + + // + // save new values of preserved regs into REGDISPLAY + // + SAVE_PRESERVED_REGISTERS $a1 + + // + // set the DoNotTriggerGc flag, trashes a1-a3 + // + + ld.d $a2, $sp, rsp_FinallyFunclet_offset_thread + + addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags +SetRetry: //TODO-LOONGARCH64: change ld/st to atomic instructions. + ld.w $a1, $t3, 0 + ori $a1, $a1, TSF_DoNotTriggerGc + st.w $a1, $t3, 0 + ori $a3, $zero, 0 + beq $a3, $zero, SetSuccess + b SetRetry +SetSuccess: + + fld.d $f24, $sp, 0x00 + fld.d $f25, $sp, 0x08 + fld.d $f26, $sp, 0x10 + fld.d $f27, $sp, 0x18 + fld.d $f28, $sp, 0x20 + fld.d $f29, $sp, 0x28 + fld.d $f30, $sp, 0x30 + fld.d $f31, $sp, 0x38 + + FREE_CALL_FUNCLET_FRAME 0x60 + EPILOG_RETURN + +#undef rsp_offset_a1 +#undef rsp_FinallyFunclet_offset_thread + + NESTED_END RhpCallFinallyFunclet, _Text + + +// +// void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) +// +// INPUT: a0: exception object +// a1: filter funclet address +// a2: REGDISPLAY* +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler + ALLOC_CALL_FUNCLET_FRAME 0x40 + fst.d $f24, $sp, 0x00 + fst.d $f25, $sp, 0x08 + fst.d $f26, $sp, 0x10 + fst.d $f27, $sp, 0x18 + fst.d $f28, $sp, 0x20 + fst.d $f29, $sp, 0x28 + fst.d $f30, $sp, 0x30 + fst.d $f31, $sp, 0x38 + + ld.d $t3, $a2, OFFSETOF__REGDISPLAY__pFP + ld.d $fp, $t3, 0 + + // + // call the funclet + // + // $a0 still contains the exception object + jirl $ra, $a1, 0 + + ALTERNATE_ENTRY RhpCallFilterFunclet2 + + fld.d $f24, $sp, 0x00 + fld.d $f25, $sp, 0x08 + fld.d $f26, $sp, 0x10 + fld.d $f27, $sp, 0x18 + fld.d $f28, $sp, 0x20 + fld.d $f29, $sp, 0x28 + fld.d $f30, $sp, 0x30 + fld.d $f31, $sp, 0x38 + + FREE_CALL_FUNCLET_FRAME 0x40 + EPILOG_RETURN + + NESTED_END RhpCallFilterFunclet, Text + +#ifdef FEATURE_OBJCMARSHAL + +// +// void* FASTCALL RhpCallPropagateExceptionCallback(void* pCallbackContext, void* pCallback, REGDISPLAY* pRegDisplay, +// ExInfo* pExInfo, PInvokeTransitionFrame* pPreviousTransitionFrame) +// +// INPUT: a0: callback context +// a1: callback +// a2: REGDISPLAY* +// a3: ExInfo* +// a4: pPreviousTransitionFrame +// +// OUTPUT: +// + + NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler + +#define rsp_offset_a0 0x10 +#define rsp_offset_a1 0x18 +#define rsp_offset_a2 0x20 +#define rsp_offset_a3 0x28 +#define rsp_offset_a4 0x30 +#define rsp_CallPropagationCallback_offset_thread 0x38 + + // Using the NO_FP macro so that the debugger unwinds using SP. + // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. + PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, -0x40 + ori $fp, $sp, 0 + st.d $a0, $sp, rsp_offset_a0 // a0 to a3 are stored to restore them anytime + st.d $a1, $sp, rsp_offset_a1 // a0 to a3 are stored to restore them anytime + st.d $a2, $sp, rsp_offset_a2 + st.d $a3, $sp, rsp_offset_a3 + st.d $a4, $sp, rsp_offset_a4 + st.d $zero, $sp, rsp_CallPropagationCallback_offset_thread // $zero makes space to store the thread obj + + // + // clear the DoNotTriggerGc flag, trashes a4-a6 + // + + bl C_FUNC(RhpGetThread) + st.d $a0, $sp, rsp_CallPropagationCallback_offset_thread + ori $a5, $a0, 0 + ld.d $a0, $sp, rsp_offset_a0 + ld.d $a1, $sp, rsp_offset_a1 + ld.d $a2, $sp, rsp_offset_a2 + ld.d $a3, $sp, rsp_offset_a3 + + addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags + +ClearRetry_Propagate: //TODO-LOONGARCH64: change ld/st to atomic instructions. + ld.w $a4, $t3, 0 + bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc + st.w $a4, $t3, 0 + ori $a6, $zero, 0 + beq $a6, $zero, ClearSuccess_Propagate + b ClearRetry_Propagate +ClearSuccess_Propagate: + + // + // set preserved regs to the values expected by the funclet + // + RESTORE_PRESERVED_REGISTERS $a2 + // + // trash the values at the old homes to make sure nobody uses them + // + TRASH_PRESERVED_REGISTERS_STORAGE $a2 + +#ifdef _DEBUG + // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we + // have to spill all the preserved registers and then refill them after the call. + + SAVE_PRESERVED_REGISTERS $a2 + + ld.d $a0, $sp, rsp_CallPropagationCallback_offset_thread // a0 <- Thread* + ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + bl C_FUNC(RhpValidateExInfoPop) + + ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* + + RESTORE_PRESERVED_REGISTERS $a2 +#endif + + ld.d $a1, $sp, rsp_CallPropagationCallback_offset_thread + + // We must unhijack the thread at this point because the section of stack where the hijack is applied + // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. + INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 + + ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* + ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value + +Propagate_PopExInfoLoop: + ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo + beq $a3, $zero, Propagate_DonePopping // if (pExInfo == null) { we're done } + blt $a3, $a2, Propagate_PopExInfoLoop // if (pExInfo < resume SP} { keep going } + +Propagate_DonePopping: + st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread + + // restore preemptive mode + ld.d $a4, $sp, rsp_offset_a4 // pPreviousTransitionFrame + st.d $a4, $a1, OFFSETOF__Thread__m_pTransitionFrame + + // reset SP and RA and jump to continuation address + ld.d $a0, $sp, rsp_offset_a0 // callback context + ld.d $a1, $sp, rsp_offset_a1 // callback + ld.d $a2, $sp, rsp_offset_a2 // REGDISPLAY* + ld.d $a3, $a2, OFFSETOF__REGDISPLAY__pRA // a3 <- &resume RA value + ld.d $ra, $a3 + ld.d $a3, $a2, OFFSETOF__REGDISPLAY__SP // a3 <- resume SP value + ori $sp, $a3, 0 + jirl $r0, $a1, 0 + +#undef rsp_offset_a0 +#undef rsp_offset_a1 +#undef rsp_offset_a2 +#undef rsp_offset_a3 +#undef rsp_CallPropagationCallback_offset_thread + + NESTED_END RhpCallPropagateExceptionCallback, _Text + +#endif // FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S new file mode 100644 index 0000000000000..34329145ade01 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S @@ -0,0 +1,198 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + + // 10 * 8 for callee saved registers + + // 1 * 8 for caller SP + + // 2 * 8 for int returns + + // 1 * 8 for alignment padding + + // 4 * 16 for FP returns + +// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers +// and accepts the register bitmask +// Call this macro first in the method (no further prolog instructions can be added after this). +// +// threadReg : register containing the Thread* (this will be preserved). +// trashReg : register that can be trashed by this macro +// BITMASK : value to initialize m_dwFlags field with (register or #constant) +.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK + + // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving + // incoming register values into it. + + // First create PInvokeTransitionFrame + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -PROBE_FRAME_SIZE // Push down stack pointer and store FP and RA + + // Slot at $sp+0x10 is reserved for Thread * + // Slot at $sp+0x18 is reserved for bitmask of saved registers + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + + // Slot at $sp+0x70 is reserved for caller sp + + // Save the integer return registers + st.d $a0, $sp, 0x78 + st.d $a1, $sp, 0x80 + + // Slot at $sp+0x88 is alignment padding + + // Save the FP return registers + fst.d $f0, $sp, 0x90 + fst.d $f1, $sp, 0x98 + fst.d $f2, $sp, 0xA0 + fst.d $f3, $sp, 0xA8 + + // Perform the rest of the PInvokeTransitionFrame initialization. + st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker) + st.d \BITMASK, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread + 8 // save the register bitmask passed in by caller + + addi.d \trashReg, $sp, PROBE_FRAME_SIZE // recover value of caller's SP + st.d \trashReg, $sp, 0x70 // save caller's SP + + // link the frame into the Thread + ori \trashReg, $sp, 0 + st.d \trashReg, \threadReg, OFFSETOF__Thread__m_pDeferredTransitionFrame +.endm + +// +// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved +// registers and return value to their values from before the probe was called (while also updating any +// object refs or byrefs). +// +.macro POP_PROBE_FRAME + + // Restore the integer return registers + ld.d $a0, $sp, 0x78 + ld.d $a1, $sp, 0x80 + + // Restore the FP return registers + fld.d $f0, $sp, 0x90 + fld.d $f1, $sp, 0x98 + fld.d $f2, $sp, 0xA0 + fld.d $f3, $sp, 0xA8 + + // Restore callee saved registers + EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 + + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE +.endm + +// +// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and +// clears the hijack state. +// +// Register state on entry: +// All registers correct for return to the original return address. +// +// Register state on exit: +// a2: thread pointer +// t3: transition frame flags for the return registers a0 and a1 +// +.macro FixupHijackedCallstack + + // a2 <- GetThread() +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_2 +#else + INLINE_GETTHREAD $a2 +#endif + + // + // Fix the stack by restoring the original return address + // + // Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags + ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + ld.d $t3, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + 8 + + // + // Clear hijack state + // + // Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8 + // Clear m_uHijackedReturnValueFlags + st.d $zero, $a2, OFFSETOF__Thread__m_uHijackedReturnValueFlags + +.endm + +// +// GC Probe Hijack target +// +NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler + FixupHijackedCallstack + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 + andi $t8, $a3, TrapThreadsFlags_TrapThreads_Bit + bne $t8, $zero, WaitForGC + jirl $r0, $ra, 0 + +WaitForGC: + lu12i.w $t7, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) >> 12) & 0xfffff + ori $t7, $t7, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) & 0xfff + or $t3, $t3, $t7 + b C_FUNC(RhpWaitForGC) +NESTED_END RhpGcProbeHijack + +.global C_FUNC(RhpThrowHwEx) + +NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler + PUSH_PROBE_FRAME $a2, $a3, $t3 + + ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame + bl C_FUNC(RhpWaitForGC2) + + ld.d $a2,$sp, OFFSETOF__PInvokeTransitionFrame__m_Flags + andi $t8, $a2, PTFF_THREAD_ABORT_BIT + bne $t8, $zero, ThrowThreadAbort + + .cfi_remember_state + POP_PROBE_FRAME + EPILOG_RETURN + + .cfi_restore_state +ThrowThreadAbort: + POP_PROBE_FRAME + addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT + ori $a1, $ra, 0 // return address as exception PC + b RhpThrowHwEx +NESTED_END RhpWaitForGC + +.global C_FUNC(RhpGcPoll2) + +LEAF_ENTRY RhpGcPoll + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a0 + bne $a0, $zero, C_FUNC(RhpGcPollRare) + jirl $r0, $ra, 0 +LEAF_END RhpGcPoll + +NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler + PUSH_COOP_PINVOKE_FRAME $a0 + bl RhpGcPoll2 + POP_COOP_PINVOKE_FRAME + jirl $r0, $ra, 0 +NESTED_END RhpGcPollRare + + +#ifdef FEATURE_GC_STRESS + +// +// GC Stress Hijack targets +// +LEAF_ENTRY RhpGcStressHijack, _TEXT + // NYI + EMIT_BREAKPOINT +LEAF_END RhpGcStressHijack, _TEXT + +#endif // FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S new file mode 100644 index 0000000000000..c096d77796397 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S @@ -0,0 +1,52 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +POINTER_SIZE = 0x08 + +//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + // + // RhCommonStub + // + // INPUT: tp: thunk's data block + // + // TRASHES: t0, t1, tp + // + LEAF_ENTRY RhCommonStub, _TEXT + // There are arbitrary callers passing arguments with arbitrary signatures. + // Custom calling convention: + // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) + +#ifdef FEATURE_EMULATED_TLS + // This doesn't save and restore the floating point argument registers. If we encounter a + // target system that uses TLS emulation and modify these registers during this call we + // need to save and restore them, too + GETTHUNKDATA_ETLS_9 +#else + INLINE_GET_TLS_VAR $t0, C_FUNC(tls_thunkData) +#endif + + // t0 = base address of TLS data + // tp = address of context cell in thunk's data + + // store thunk address in thread static + ld.d $t1, $t7, 0 + st.d $t1, $t0, 0 + + // Now load the target address and jump to it. + ld.d $t7, $t7, POINTER_SIZE + jirl $r0, $t7, 0 + + LEAF_END RhCommonStub, _TEXT + + // + // IntPtr RhGetCommonStubAddress() + // + LEAF_ENTRY RhGetCommonStubAddress, _TEXT + PREPARE_EXTERNAL_VAR RhCommonStub, $a0 + jirl $r0, $ra, 0 + LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S new file mode 100644 index 0000000000000..ea5d91a1a1c1f --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S @@ -0,0 +1,5 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S new file mode 100644 index 0000000000000..0c0c6acda4f63 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S @@ -0,0 +1,65 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + + +#include +#include "AsmOffsets.inc" + +.global RhpTrapThreads + +// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h +PTFF_SAVE_SP = 0x00000400 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +PTFF_THREAD_ABORT_BIT = 36 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +TSF_Attached_Bit = 0 +TSF_SuppressGcStress_Bit = 3 +TSF_DoNotTriggerGc_Bit = 4 + +// +// RhpPInvoke +// +// IN: a0: address of pinvoke frame +// +// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. +// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. +// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. +// + +NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler + st.d $fp, $a0, OFFSETOF__PInvokeTransitionFrame__m_FramePointer + st.d $ra, $a0, OFFSETOF__PInvokeTransitionFrame__m_RIP + ori $t0, $sp, 0 + st.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs + ori $t0, $zero, PTFF_SAVE_SP + st.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_Flags + + // get TLS global variable address + +#ifdef FEATURE_EMULATED_TLS + GETTHREAD_ETLS_1 +#else + INLINE_GETTHREAD $a1 +#endif + + st.d $a1, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread + st.d $a0, $a1, OFFSETOF__Thread__m_pTransitionFrame + jirl $r0, $ra, 0 +NESTED_END RhpPInvoke, _TEXT + + +LEAF_ENTRY RhpPInvokeReturn, _TEXT + ld.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread + ori $t1, $zero, 0 + st.d $t1, $t0, OFFSETOF__Thread__m_pTransitionFrame + + PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a5 + + bne $t0, $zero, 0f // TrapThreadsFlags_None = 0 + jirl $r0, $ra, 0 +0: + // passing transition frame pointer in x0 + b C_FUNC(RhpWaitForGC2) +LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S new file mode 100644 index 0000000000000..138992ef1a329 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S @@ -0,0 +1,117 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include +#include "AsmOffsets.inc" + +#ifdef FEATURE_CACHED_INTERFACE_DISPATCH + + .extern RhpCidResolve + .extern RhpUniversalTransition_DebugStepTailCall + + // Macro that generates code to check a single cache entry. + .macro CHECK_CACHE_ENTRY entry + // Check a single entry in the cache. + // t0 : Cache data structure. Also used for target address jump. + // t1 : Instance MethodTable* + // t2 : Indirection cell address, preserved + // t3 : Trashed + ld.d $t3, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16)) + bne $t1, $t3, 0f + ld.d $t0, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8) + jirl $r0, $t0, 0 +0: + .endm + +// +// Macro that generates a stub consuming a cache with the given number of entries. +// + .macro DEFINE_INTERFACE_DISPATCH_STUB entries + + NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler + + // t2 holds the indirection cell address. Load the cache pointer. + ld.d $t0, $t8, OFFSETOF__InterfaceDispatchCell__m_pCache + + // Load the MethodTable from the object instance in a0. + ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries + ld.d $t1, $a0, 0 + + .global CurrentEntry + .set CurrentEntry, 0 + + .rept \entries + CHECK_CACHE_ENTRY CurrentEntry + .set CurrentEntry, CurrentEntry + 1 + .endr + + // t2 still contains the indirection cell address. + b C_FUNC(RhpInterfaceDispatchSlow) + + NESTED_END "RhpInterfaceDispatch\entries", _TEXT + + .endm + +// +// Define all the stub routines we currently need. +// +// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the +// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens +// during the interface dispatch. +// + DEFINE_INTERFACE_DISPATCH_STUB 1 + DEFINE_INTERFACE_DISPATCH_STUB 2 + DEFINE_INTERFACE_DISPATCH_STUB 4 + DEFINE_INTERFACE_DISPATCH_STUB 8 + DEFINE_INTERFACE_DISPATCH_STUB 16 + DEFINE_INTERFACE_DISPATCH_STUB 32 + DEFINE_INTERFACE_DISPATCH_STUB 64 + +// +// Initial dispatch on an interface when we don't have a cache yet. +// + LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT + ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch + // Trigger an AV if we're dispatching on a null this. + // The exception handling infrastructure is aware of the fact that this is the first + // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here + // to a NullReferenceException at the callsite. + ld.d $zero, $a0, 0 + + // Just tail call to the cache miss helper. + b C_FUNC(RhpInterfaceDispatchSlow) + LEAF_END RhpInitialInterfaceDispatch, _TEXT + +// +// Stub dispatch routine for dispatch to a vtable slot +// + LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT + // t2 contains the interface dispatch cell address. + // load t3 to point to the vtable offset (which is stored in the m_pCache field). + ld.d $t3, $t2, OFFSETOF__InterfaceDispatchCell__m_pCache + + // Load the MethodTable from the object instance in a0, and add it to the vtable offset + // to get the address in the vtable of what we want to dereference + ld.d $t4, $a0, 0 + add.d $t3, $t3, $t4 + + // Load the target address of the vtable into t3 + ld.d $t3, $t3, 0 + + jirl $r0, $t3, 0 + LEAF_END RhpVTableOffsetDispatch, _TEXT + +// +// Cache miss case, call the runtime to resolve the target and update the cache. +// Use universal transition helper to allow an exception to flow out of resolution. +// + LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT + // t2 contains the interface dispatch cell address. + // Calling convention of the universal thunk is: + // t7: target address for the thunk to call + // t8: parameter of the thunk's target + PREPARE_EXTERNAL_VAR RhpCidResolve, $t7 + b C_FUNC(RhpUniversalTransition_DebugStepTailCall) + LEAF_END RhpInterfaceDispatchSlow, _TEXT + +#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S new file mode 100644 index 0000000000000..79af74a1edbab --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S @@ -0,0 +1,191 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +#ifdef _DEBUG +#define TRASH_SAVED_ARGUMENT_REGISTERS +#endif + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + .global RhpIntegerTrashValues + .global RhpFpTrashValues +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + +// Padding to account for the odd number of saved integer registers +#define ALIGNMENT_PADDING_SIZE (8) + +#define COUNT_ARG_REGISTERS (9) +#define INTEGER_REGISTER_SIZE (8) +#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) + +// Largest return block is 4 doubles +#define RETURN_BLOCK_SIZE (32) + +#define COUNT_FLOAT_ARG_REGISTERS (8) +#define FLOAT_REGISTER_SIZE (16) +#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) + +#define PUSHED_RA_SIZE (8) +#define PUSHED_FP_SIZE (8) + +// +// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: +// +// ALIGNMENT_PADDING_SIZE +// ARGUMENT_REGISTERS_SIZE +// RETURN_BLOCK_SIZE +// FLOAT_ARG_REGISTERS_SIZE +// PUSHED_RA_SIZE +// PUSHED_FP_SIZE +// + +#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) + +#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) + +#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) +#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) + +// +// RhpUniversalTransition +// +// At input to this function, a0-7/tp, f0-7 and the stack may contain any number of arguments. +// +// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: +// t7 will contain the managed function that is to be called by this transition function +// t8 will contain the pointer sized extra argument to the managed function +// +// When invoking the callee: +// +// a0 shall contain a pointer to the TransitionBlock +// a1 shall contain the value that was in t8 at entry to this function +// +// Frame layout is: +// +// {StackPassedArgs} ChildSP+100 CallerSP+000 +// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 +// {IntArgRegs (a0-a7/tp) (0x48 bytes)} ChildSP+0B0 CallerSP-050 +// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-070 +// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are +// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact +// layout of all pieces of the frame that lie at or above the pushed floating point registers. +// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0F0 +// {PushedRA} ChildSP+008 CallerSP-0F8 +// {PushedFP} ChildSP+000 CallerSP-100 +// +// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure +// must be updated as well. +// +// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has +// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed +// FpArgRegs. +// +// NOTE: The stack walker guarantees that conservative GC reporting will be applied to +// everything between the base of the ReturnBlock and the top of the StackPassedArgs. +// + + .text + + .macro UNIVERSAL_TRANSITION FunctionName + + NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler + + // FP and RA registers + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -STACK_SIZE // ;; Push down stack pointer and store FP and RA + + // Floating point registers + fst.d $f0, $sp, FLOAT_ARG_OFFSET + fst.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 + fst.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 + fst.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 + fst.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 + fst.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 + fst.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 + fst.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 + + // Space for return buffer data (0x40 bytes) + + // Save argument registers + st.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET + st.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 + st.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 + st.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 + st.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 + st.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 + st.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 + st.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 + st.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 + st.d $r0, $sp, ARGUMENT_REGISTERS_OFFSET + 0x48 + +#ifdef TRASH_SAVED_ARGUMENT_REGISTERS + PREPARE_EXTERNAL_VAR RhpFpTrashValues, $a1 + + fld.d $f0, $a1, 0 + fld.d $f1, $a1, 0x08 + fld.d $f2, $a1, 0x10 + fld.d $f3, $a1, 0x18 + fld.d $f4, $a1, 0x20 + fld.d $f5, $a1, 0x28 + fld.d $f6, $a1, 0x30 + fld.d $f7, $a1, 0x38 + + PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, $a1 + + ld.d $a2, $a1, 0x10 + ld.d $a3, $a1, 0x18 + ld.d $a4, $a1, 0x20 + ld.d $a5, $a1, 0x28 + ld.d $a6, $a1, 0x30 + ld.d $a7, $a1, 0x38 +#endif // TRASH_SAVED_ARGUMENT_REGISTERS + + addi.d $a0, $sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block + ori $a1, $t8, 0 // Second parameter to target function + jirl $ra, $t7, 0 + + // We cannot make the label public as that tricks DIA stackwalker into thinking + // it's the beginning of a method. For this reason we export an auxiliary variable + // holding the address instead. + ALTERNATE_ENTRY ReturnFrom\FunctionName + + // Move the result (the target address) to t3 so it doesn't get overridden when we restore the + // argument registers. + ori $t3, $a0, 0 + + // Restore floating point registers + fld.d $f0, $sp, FLOAT_ARG_OFFSET + fld.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 + fld.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 + fld.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 + fld.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 + fld.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 + fld.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 + fld.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 + + // Restore the argument registers + ld.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET + ld.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 + ld.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 + ld.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 + ld.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 + ld.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 + ld.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 + ld.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 + ld.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 + + // Restore FP and RA registers, and free the allocated stack block + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, STACK_SIZE + + // Tailcall to the target address. + jirl $r0, $t3, 0 + + NESTED_END Rhp\FunctionName, _TEXT + + .endm + + // To enable proper step-in behavior in the debugger, we need to have two instances + // of the thunk. For the first one, the debugger steps into the call in the function, + // for the other, it steps over it. + UNIVERSAL_TRANSITION UniversalTransition + UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S new file mode 100644 index 0000000000000..bdde2036a3f1e --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S @@ -0,0 +1,354 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include + +// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used +// during garbage collections to verify that object references where never written to the heap without using a +// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing +// new references to the real heap. Since this can not be solved perfectly without critical sections around the +// entire update process, we instead update the shadow location and then re-check the real location (as two +// ordered operations) and if there is a disparity we will re-write the shadow location with a special value +// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC +// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the +// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. +#ifdef WRITE_BARRIER_CHECK + + .global $g_GCShadow + .global $g_GCShadowEnd + + // On entry: + // $destReg: location to be updated + // $refReg: objectref to be stored + // + // On exit: + // t3,t4: trashed + // other registers are preserved + // + .macro UPDATE_GC_SHADOW destReg, refReg + + // If g_GCShadow is 0, don't perform the check. + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 + beq $t3, $zero, 1f + ori $t4, $t3, 0 + + // Save destReg since we're about to modify it (and we need the original value both within the macro and + // once we exit the macro). + ori $t4, \destReg, 0 + + // Transform destReg into the equivalent address in the shadow heap. + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sub.d \destReg, \destReg, $t3 + bltu $t4, $zero, 0f + + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 + add.d \destReg, \destReg, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, $t3 + bgeu \destReg, $t3, 0f + + // Update the shadow heap. + st.d \refReg, \destReg, 0 + + // The following read must be strongly ordered wrt to the write we have just performed in order to + // prevent race conditions. + dbar 0 + + // Now check that the real heap location still contains the value we just wrote into the shadow heap. + ori $t3, $t4, 0 + ld.d $t3, $t3, 0 + beq $t3, \refReg, 0f + + // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not + // guarantee whose shadow update won. + lu12i.w $t3, ((INVALIDGCVALUE >> 12) & 0xFFFFF) + ori $t3, $t3, (INVALIDGCVALUE & 0xFFF) + st.d $t3, \destReg, 0 + +0: + // Restore original destReg value + ori \destReg, $t4, 0 + +1: + .endm + +#else // WRITE_BARRIER_CHECK + + .macro UPDATE_GC_SHADOW destReg, refReg + .endm + +#endif // WRITE_BARRIER_CHECK + +// There are several different helpers used depending on which register holds the object reference. Since all +// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the +// name of the register that points to the location to be updated and the name of the register that holds the +// object reference (this should be in upper case as it is used in the definition of the name of the helper). + +// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for +// some interlocked helpers that need an inline barrier. + + // On entry: + // destReg: location to be updated (cannot be t3,t4) + // refReg: objectref to be stored (cannot be t3,t4) + // + // On exit: + // t3,t4: trashed + // + .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg + + // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless + // we are in a debug build and write barrier checking has been enabled). + UPDATE_GC_SHADOW \destReg, \refReg + +#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP + // Update the write watch table if necessary + PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, $t3 + + beq $t3, $zero, 2f + srli.d $t5, \destReg, 12 + add.d $t3, $t3, $t5 // SoftwareWriteWatch::AddressToTableByteIndexShift + ld.b $t4, $t3, 0 + bne $t4, $zero, 2f + ori $t4, $zero, 0xFF + st.b $t4, $t3, 0 +#endif + +2: + // We can skip the card table write if the reference is to + // an object not on the epehemeral segment. + PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, $t3 + bltu \refReg, $t3, 0f + + PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, $t3 + bgeu \refReg, $t3, 0f + + // Set this objects card, if it has not already been set. + PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, $t3 + srli.d $t5, \destReg, 11 + add.d $t4, $t3, $t5 + + // Check that this card has not already been written. Avoiding useless writes is a big win on + // multi-proc systems since it avoids cache thrashing. + ld.b $t3, $t4, 0 + ori $t5, $zero, 0xFF + beq $t3, $t5, 0f + + ori $t3, $zero, 0xFF + st.b $t3, $t4, 0 + +#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES + // Check if we need to update the card bundle table + PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, $t3 + srli.d $t5, \destReg, 21 + add.d $t4, $t3, $t5 + ld.b $t3, $t4, 0 + ori $t5, $zero, 0xFF + beq $t3, $t5, 0f + + ori $t3, $zero, 0xFF + st.b $t3, $t4, 0 +#endif + +0: + // Exit label + .endm + + // On entry: + // destReg: location to be updated + // refReg: objectref to be stored + // + // On exit: + // t3, t4: trashed + // + .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg + + // The "check" of this checked write barrier - is destReg + // within the heap? if no, early out. + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sltu $t4, \destReg, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 + + // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. + // Otherwise, set the C flag (0x2) to take the next branch. + bnez $t4, 1f + bgeu \destReg, $t3, 0f + +1: + INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg + +0: + // Exit label + .endm + +// void JIT_ByRefWriteBarrier +// On entry: +// t8 : the source address (points to object reference to write) +// t6 : the destination address (object reference written here) +// +// On exit: +// t8 : incremented by 8 +// t6 : incremented by 8 +// t7 : trashed +// t3, t4 : trashed +// +// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF +// if you add more trashed registers. +// +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address +LEAF_ENTRY RhpByRefAssignRefLoongArch64, _TEXT + + ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 + ld.d $t7, $t8, 0 + addi.d $t8, $t8, 8 + b C_FUNC(RhpCheckedAssignRefLoongArch64) + +LEAF_END RhpByRefAssignRefLoongArch64, _TEXT + +// JIT_CheckedWriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that may reside +// on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// May not be a heap location (hence the checked). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 + LEAF_ENTRY RhpCheckedAssignRefLoongArch64, _TEXT + + // is destReg within the heap? + PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 + sltu $t4, $t6, $t3 + + PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 + sltu $t0, $t3, $t6 + or $t4, $t0, $t4 + beq $t4, $zero, C_FUNC(RhpAssignRefLoongArch64) + +NotInHeap: + ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation + st.d $t7, $t6, 0 + addi.d $t6, $t6, 8 + jirl $r0, $ra, 0 + +LEAF_END RhpCheckedAssignRefLoongArch64, _TEXT + +// JIT_WriteBarrier(Object** dst, Object* src) +// +// Write barrier for writes to objects that are known to +// reside on the managed heap. +// +// On entry: +// t6 : the destination address (LHS of the assignment). +// t7 : the object reference (RHS of the assignment). +// +// On exit: +// t3, t4 : trashed +// t6 : incremented by 8 +LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT + + ALTERNATE_ENTRY RhpAssignRefAVLocation + st.d $t7, $t6, 0 + + INSERT_UNCHECKED_WRITE_BARRIER_CORE $t6, $t7 + + addi.d $t6, $t6, 8 + jirl $r0, $ra, 0 + +LEAF_END RhpAssignRefLoongArch64, _TEXT + +// Same as RhpAssignRefLoongArch64, but with standard ABI. +LEAF_ENTRY RhpAssignRef, _TEXT + ori $t6, $a0, 0 ; t6 = dst + ori $t7, $a1, 0 ; t7 = val + b C_FUNC(RhpAssignRefLoongArch64) +LEAF_END RhpAssignRef, _TEXT + + +// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon +// successful updates. + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) +// +// Interlocked compare exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// a2: comparand +// +// On exit: +// a0: original value of objectref +// t1, t3, t6, t4: trashed +// + LEAF_ENTRY RhpCheckedLockCmpXchg + + ori $t1, $a2, 0 + ld.d $t0, $a0, 0 + beq $t0, $t1, 12 + ori $t1, $t0, 0 + b 8 + st.d $a1, $a0, 0 + + bne $a2, $t1, CmpXchgNoUpdate + +DoCardsCmpXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 + +CmpXchgNoUpdate: + // t1 still contains the original value. + ori $a0, $t1, 0 + + jirl $r0, $ra, 0 + + LEAF_END RhpCheckedLockCmpXchg, _TEXT + +// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: +// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation +// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address + +// RhpCheckedXchg(Object** destination, Object* value) +// +// Interlocked exchange on objectref. +// +// On entry: +// a0: pointer to objectref +// a1: exchange value +// +// On exit: +// a0: original value of objectref +// t1: trashed +// t3, t6, t4: trashed +// + LEAF_ENTRY RhpCheckedXchg, _TEXT + + ld.d $t1, $a0, 0 + st.d $a1, $a0, 0 + +DoCardsXchg: + // We have successfully updated the value of the objectref so now we need a GC write barrier. + // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are + // already correctly set up. + + INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 + + // $t1 still contains the original value. + ori $a0, $t1, 0 + + jirl $r0, $ra, 0 + + LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index 739a4eec23090..b9ee9aac1e43e 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -169,6 +169,62 @@ struct REGDISPLAY inline void SetIP(PCODE IP) { this->IP = IP; } inline void SetSP(uintptr_t SP) { this->SP = SP; } }; + +#elif defined(TARGET_LOONGARCH64) + +struct REGDISPLAY +{ + PTR_uintptr_t pR0; + PTR_uintptr_t pRA; + PTR_uintptr_t pR2; + + uintptr_t SP; + + PTR_uintptr_t pR4; + PTR_uintptr_t pR5; + PTR_uintptr_t pR6; + PTR_uintptr_t pR7; + PTR_uintptr_t pR8; + PTR_uintptr_t pR9; + PTR_uintptr_t pR10; + PTR_uintptr_t pR11; + PTR_uintptr_t pR12; + PTR_uintptr_t pR13; + PTR_uintptr_t pR14; + PTR_uintptr_t pR15; + PTR_uintptr_t pR16; + PTR_uintptr_t pR17; + PTR_uintptr_t pR18; + PTR_uintptr_t pR19; + PTR_uintptr_t pR20; + PTR_uintptr_t pR21; + PTR_uintptr_t pFP; + PTR_uintptr_t pR23; + PTR_uintptr_t pR24; + PTR_uintptr_t pR25; + PTR_uintptr_t pR26; + PTR_uintptr_t pR27; + PTR_uintptr_t pR28; + PTR_uintptr_t pR29; + PTR_uintptr_t pR30; + PTR_uintptr_t pR31; + + PCODE IP; + + uint64_t F[16-8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved + // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + // These need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses + + inline PCODE GetIP() { return IP; } + inline uintptr_t GetSP() { return SP; } + inline uintptr_t GetFP() { return *pFP; } + + inline void SetIP(PCODE IP) { this->IP = IP; } + inline void SetSP(uintptr_t SP) { this->SP = SP; } +}; + #elif defined(TARGET_WASM) struct REGDISPLAY @@ -185,7 +241,7 @@ struct REGDISPLAY inline void SetIP(PCODE IP) { } inline void SetSP(uintptr_t SP) { } }; -#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM +#endif // HOST_X86 || HOST_AMD64 || HOST_ARM || HOST_ARM64 || HOST_WASM || HOST_LOONGARCH64 typedef REGDISPLAY * PREGDISPLAY; diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index f87bc947d970a..116fd40d65a5c 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -48,13 +48,14 @@ static bool DetectCPUFeatures(); extern RhConfig * g_pRhConfig; -#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) // This field is inspected from the generated code to determine what intrinsics are available. EXTERN_C int g_cpuFeatures; int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. EXTERN_C int g_requiredCpuFeatures; +int g_requiredCpuFeatures = 0; #endif #ifdef TARGET_UNIX @@ -177,7 +178,7 @@ static bool InitDLL(HANDLE hPalInstance) bool DetectCPUFeatures() { -#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) g_cpuFeatures = minipal_getcpufeatures(); if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) @@ -185,7 +186,7 @@ bool DetectCPUFeatures() PalPrintFatalError("\nThe required instruction sets are not supported by the current CPU.\n"); RhFailFast(); } -#endif // HOST_X86|| HOST_AMD64 || HOST_ARM64 +#endif // HOST_X86|| HOST_AMD64 || HOST_ARM64 || HOST_LOONGARCH64 return true; } diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h index 983f17a36aba0..0b62e08d558ff 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h @@ -87,14 +87,26 @@ FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pD return result; } -#if defined(HOST_AMD64) || defined(HOST_ARM64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { +#if defined(HOST_LOONGARCH64) + int64_t iResult0 = __sync_val_compare_and_swap(pDst, pComparandAndResult[0], iValueLow); + int64_t iResult1 = __sync_val_compare_and_swap(pDst+1, pComparandAndResult[1], iValueHigh); + + uint8_t ret = pComparandAndResult[0] == iResult0; + pComparandAndResult[0] = iResult0; + ret &= pComparandAndResult[1] == iResult1; + pComparandAndResult[1] = iResult1; + + return ret; +#else __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); PalInterlockedOperationBarrier(); pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); return iComparand == iResult; +#endif } #endif // HOST_AMD64 diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp index 8e587901f60a3..e084fb35e391f 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.cpp @@ -80,7 +80,43 @@ #if HAVE___GREGSET_T -#ifdef HOST_64BIT +#if defined(HOST_LOONGARCH64) + +#define MCREG_R0(mc) ((mc).__gregs[0]) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Tp(mc) ((mc).__gregs[2]) +#define MCREG_Sp(mc) ((mc).__gregs[3]) +#define MCREG_A0(mc) ((mc).__gregs[4]) +#define MCREG_A1(mc) ((mc).__gregs[5]) +#define MCREG_A2(mc) ((mc).__gregs[6]) +#define MCREG_A3(mc) ((mc).__gregs[7]) +#define MCREG_A4(mc) ((mc).__gregs[8]) +#define MCREG_A5(mc) ((mc).__gregs[9]) +#define MCREG_A6(mc) ((mc).__gregs[10]) +#define MCREG_A7(mc) ((mc).__gregs[11]) +#define MCREG_T0(mc) ((mc).__gregs[12]) +#define MCREG_T1(mc) ((mc).__gregs[13]) +#define MCREG_T2(mc) ((mc).__gregs[14]) +#define MCREG_T3(mc) ((mc).__gregs[15]) +#define MCREG_T4(mc) ((mc).__gregs[16]) +#define MCREG_T5(mc) ((mc).__gregs[17]) +#define MCREG_T6(mc) ((mc).__gregs[18]) +#define MCREG_T7(mc) ((mc).__gregs[19]) +#define MCREG_T8(mc) ((mc).__gregs[20]) +#define MCREG_X0(mc) ((mc).__gregs[21]) +#define MCREG_Fp(mc) ((mc).__gregs[22]) +#define MCREG_S0(mc) ((mc).__gregs[23]) +#define MCREG_S1(mc) ((mc).__gregs[24]) +#define MCREG_S2(mc) ((mc).__gregs[25]) +#define MCREG_S3(mc) ((mc).__gregs[26]) +#define MCREG_S4(mc) ((mc).__gregs[27]) +#define MCREG_S5(mc) ((mc).__gregs[28]) +#define MCREG_S6(mc) ((mc).__gregs[29]) +#define MCREG_S7(mc) ((mc).__gregs[30]) +#define MCREG_S8(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__pc) + +#elif HOST_64BIT #define MCREG_Rip(mc) ((mc).__gregs[_REG_RIP]) #define MCREG_Rsp(mc) ((mc).__gregs[_REG_RSP]) #define MCREG_Rax(mc) ((mc).__gregs[_REG_RAX]) @@ -115,7 +151,43 @@ #elif HAVE_GREGSET_T -#ifdef HOST_64BIT +#if defined(HOST_LOONGARCH64) + +#define MCREG_R0(mc) ((mc).__gregs[0]) +#define MCREG_Ra(mc) ((mc).__gregs[1]) +#define MCREG_Tp(mc) ((mc).__gregs[2]) +#define MCREG_Sp(mc) ((mc).__gregs[3]) +#define MCREG_A0(mc) ((mc).__gregs[4]) +#define MCREG_A1(mc) ((mc).__gregs[5]) +#define MCREG_A2(mc) ((mc).__gregs[6]) +#define MCREG_A3(mc) ((mc).__gregs[7]) +#define MCREG_A4(mc) ((mc).__gregs[8]) +#define MCREG_A5(mc) ((mc).__gregs[9]) +#define MCREG_A6(mc) ((mc).__gregs[10]) +#define MCREG_A7(mc) ((mc).__gregs[11]) +#define MCREG_T0(mc) ((mc).__gregs[12]) +#define MCREG_T1(mc) ((mc).__gregs[13]) +#define MCREG_T2(mc) ((mc).__gregs[14]) +#define MCREG_T3(mc) ((mc).__gregs[15]) +#define MCREG_T4(mc) ((mc).__gregs[16]) +#define MCREG_T5(mc) ((mc).__gregs[17]) +#define MCREG_T6(mc) ((mc).__gregs[18]) +#define MCREG_T7(mc) ((mc).__gregs[19]) +#define MCREG_T8(mc) ((mc).__gregs[20]) +#define MCREG_X0(mc) ((mc).__gregs[21]) +#define MCREG_Fp(mc) ((mc).__gregs[22]) +#define MCREG_S0(mc) ((mc).__gregs[23]) +#define MCREG_S1(mc) ((mc).__gregs[24]) +#define MCREG_S2(mc) ((mc).__gregs[25]) +#define MCREG_S3(mc) ((mc).__gregs[26]) +#define MCREG_S4(mc) ((mc).__gregs[27]) +#define MCREG_S5(mc) ((mc).__gregs[28]) +#define MCREG_S6(mc) ((mc).__gregs[29]) +#define MCREG_S7(mc) ((mc).__gregs[30]) +#define MCREG_S8(mc) ((mc).__gregs[31]) +#define MCREG_Pc(mc) ((mc).__pc) + +#elif HOST_64BIT #define MCREG_Rip(mc) ((mc).gregs[REG_RIP]) #define MCREG_Rsp(mc) ((mc).gregs[REG_RSP]) #define MCREG_Rax(mc) ((mc).gregs[REG_RAX]) @@ -224,6 +296,42 @@ #define MCREG_Sp(mc) ((mc).sp) #define MCREG_Pc(mc) ((mc).pc) +#elif defined(HOST_LOONGARCH64) + +#define MCREG_R0(mc) ((mc).regs[0]) +#define MCREG_Ra(mc) ((mc).regs[1]) +#define MCREG_Tp(mc) ((mc).regs[2]) +#define MCREG_Sp(mc) ((mc).regs[3]) +#define MCREG_A0(mc) ((mc).regs[4]) +#define MCREG_A1(mc) ((mc).regs[5]) +#define MCREG_A2(mc) ((mc).regs[6]) +#define MCREG_A3(mc) ((mc).regs[7]) +#define MCREG_A4(mc) ((mc).regs[8]) +#define MCREG_A5(mc) ((mc).regs[9]) +#define MCREG_A6(mc) ((mc).regs[10]) +#define MCREG_A7(mc) ((mc).regs[11]) +#define MCREG_T0(mc) ((mc).regs[12]) +#define MCREG_T1(mc) ((mc).regs[13]) +#define MCREG_T2(mc) ((mc).regs[14]) +#define MCREG_T3(mc) ((mc).regs[15]) +#define MCREG_T4(mc) ((mc).regs[16]) +#define MCREG_T5(mc) ((mc).regs[17]) +#define MCREG_T6(mc) ((mc).regs[18]) +#define MCREG_T7(mc) ((mc).regs[19]) +#define MCREG_T8(mc) ((mc).regs[20]) +#define MCREG_X0(mc) ((mc).regs[21]) +#define MCREG_Fp(mc) ((mc).regs[22]) +#define MCREG_S0(mc) ((mc).regs[23]) +#define MCREG_S1(mc) ((mc).regs[24]) +#define MCREG_S2(mc) ((mc).regs[25]) +#define MCREG_S3(mc) ((mc).regs[26]) +#define MCREG_S4(mc) ((mc).regs[27]) +#define MCREG_S5(mc) ((mc).regs[28]) +#define MCREG_S6(mc) ((mc).regs[29]) +#define MCREG_S7(mc) ((mc).regs[30]) +#define MCREG_S8(mc) ((mc).regs[31]) +#define MCREG_Pc(mc) ((mc).pc) + #else // For FreeBSD, as found in x86/ucontext.h @@ -365,6 +473,29 @@ MCREG_X0(nativeContext->uc_mcontext) = arg0Reg; \ MCREG_X1(nativeContext->uc_mcontext) = arg1Reg; +#elif defined(HOST_LOONGARCH64) + +#define ASSIGN_CONTROL_REGS \ + ASSIGN_REG(Pc, IP) \ + ASSIGN_REG(Sp, SP) \ + ASSIGN_REG(Fp, FP) \ + ASSIGN_REG(Ra, RA) + +#define ASSIGN_INTEGER_REGS \ + ASSIGN_REG(S0, R23) \ + ASSIGN_REG(S1, R24) \ + ASSIGN_REG(S2, R25) \ + ASSIGN_REG(S3, R26) \ + ASSIGN_REG(S4, R27) \ + ASSIGN_REG(S5, R28) \ + ASSIGN_REG(S6, R29) \ + ASSIGN_REG(S7, R30) \ + ASSIGN_REG(S8, R31) + +#define ASSIGN_TWO_ARGUMENT_REGS \ + MCREG_A0(nativeContext->uc_mcontext) = arg0Reg; \ + MCREG_A1(nativeContext->uc_mcontext) = arg1Reg; + #elif defined(HOST_WASM) // TODO: determine how unwinding will work on WebAssembly #define ASSIGN_CONTROL_REGS @@ -529,6 +660,42 @@ uint64_t GetPC(void* context) uint64_t& UNIX_CONTEXT::R11(){ return (uint64_t&)MCREG_R11(ctx.uc_mcontext); } uint64_t& UNIX_CONTEXT::R12(){ return (uint64_t&)MCREG_R12(ctx.uc_mcontext); } +#elif TARGET_LOONGARCH64 + + uint64_t& UNIX_CONTEXT::R0() { return (uint64_t&)MCREG_R0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R2() { return (uint64_t&)MCREG_Tp(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R4() { return (uint64_t&)MCREG_A0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R5() { return (uint64_t&)MCREG_A1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R6() { return (uint64_t&)MCREG_A2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R7() { return (uint64_t&)MCREG_A3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R8() { return (uint64_t&)MCREG_A4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R9() { return (uint64_t&)MCREG_A5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R10() { return (uint64_t&)MCREG_A6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R11() { return (uint64_t&)MCREG_A7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R12() { return (uint64_t&)MCREG_T0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R13() { return (uint64_t&)MCREG_T1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R14() { return (uint64_t&)MCREG_T2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R15() { return (uint64_t&)MCREG_T3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R16() { return (uint64_t&)MCREG_T4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R17() { return (uint64_t&)MCREG_T5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R18() { return (uint64_t&)MCREG_T6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R19() { return (uint64_t&)MCREG_T7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R20() { return (uint64_t&)MCREG_T8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R21() { return (uint64_t&)MCREG_X0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R23() { return (uint64_t&)MCREG_S0(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R24() { return (uint64_t&)MCREG_S1(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R25() { return (uint64_t&)MCREG_S2(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R26() { return (uint64_t&)MCREG_S3(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R27() { return (uint64_t&)MCREG_S4(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R28() { return (uint64_t&)MCREG_S5(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R29() { return (uint64_t&)MCREG_S6(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R30() { return (uint64_t&)MCREG_S7(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::R31() { return (uint64_t&)MCREG_S8(ctx.uc_mcontext); } + uint64_t& UNIX_CONTEXT::Fp() { return (uint64_t&)MCREG_Fp(ctx.uc_mcontext); } // R22 + uint64_t& UNIX_CONTEXT::Ra() { return (uint64_t&)MCREG_Ra(ctx.uc_mcontext); } // R1 + uint64_t& UNIX_CONTEXT::Sp() { return (uint64_t&)MCREG_Sp(ctx.uc_mcontext); } // R3 + uint64_t& UNIX_CONTEXT::Pc() { return (uint64_t&)MCREG_Pc(ctx.uc_mcontext); } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h index caddff419d373..662b697715da0 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixContext.h +++ b/src/coreclr/nativeaot/Runtime/unix/UnixContext.h @@ -158,6 +158,61 @@ struct UNIX_CONTEXT lambda((size_t*)&R11()); lambda((size_t*)&R12()); } + +#elif defined(TARGET_LOONGARCH64) + + uint64_t& R0(); + uint64_t& R2(); + uint64_t& R4(); + uint64_t& R5(); + uint64_t& R6(); + uint64_t& R7(); + uint64_t& R8(); + uint64_t& R9(); + uint64_t& R10(); + uint64_t& R11(); + uint64_t& R12(); + uint64_t& R13(); + uint64_t& R14(); + uint64_t& R15(); + uint64_t& R16(); + uint64_t& R17(); + uint64_t& R18(); + uint64_t& R19(); + uint64_t& R20(); + uint64_t& R21(); + uint64_t& R23(); + uint64_t& R24(); + uint64_t& R25(); + uint64_t& R26(); + uint64_t& R27(); + uint64_t& R28(); + uint64_t& R29(); + uint64_t& R30(); + uint64_t& R31(); + uint64_t& Fp(); // R22 + uint64_t& Ra(); // R1 + uint64_t& Sp(); // R3 + uint64_t& Pc(); + + uintptr_t GetIp() { return (uintptr_t)Pc(); } + uintptr_t GetSp() { return (uintptr_t)Sp(); } + + template + void ForEachPossibleObjectRef(F lambda) + { + // it is doubtful anyone would implement R0,R2,R4-R21,R23-R31 not as a contiguous array + // just in case - here are some asserts. + ASSERT(&R4() + 1 == &R5()); + ASSERT(&R4() + 10 == &R14()); + + for (uint64_t* pReg = &R0(); pReg <= &R31(); pReg++) + lambda((size_t*)pReg); + + // Ra can be used as a scratch register + lambda((size_t*)&Ra()); + } + #else PORTABILITY_ASSERT("UNIX_CONTEXT"); #endif // TARGET_ARM diff --git a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp index 3ccf59f611ecb..b12d63bf72612 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnixNativeCodeManager.cpp @@ -399,7 +399,7 @@ bool UnixNativeCodeManager::IsUnwindable(PTR_VOID pvAddress) ASSERT(((uintptr_t)pvAddress & 1) == 0); #endif -#if defined(TARGET_ARM64) || defined(TARGET_ARM) +#if defined(TARGET_ARM64) || defined(TARGET_ARM) || defined(TARGET_LOONGARCH64) MethodInfo methodInfo; FindMethodInfo(pvAddress, &methodInfo); pMethodInfo = &methodInfo; @@ -667,6 +667,61 @@ int UnixNativeCodeManager::IsInProlog(MethodInfo * pMethodInfo, PTR_VOID pvAddre return 0; +#elif defined(TARGET_LOONGARCH64) + +// 0010 1001 11xx xxxx xxxx xxxx xxxx xxxx +#define ST_BITS 0x29C00000 +#define ST_MASK 0xFFC00000 + +// addi.d $fp, $sp, x +// ori $fp, $sp, 0 +// 0000 0010 11xx xxxx xxxx xx00 0111 0110 +#define ADDI_FP_SP_BITS 0x02C00076 +#define ADDI_FP_SP_MASK 0xFFC003FF + +#define ST_RJ_MASK 0x3E0 +#define ST_RJ_FP 0x2C0 +#define ST_RJ_RA 0x20 +#define ST_RD_MASK 0x1F +#define ST_RD_SP 0x3 +#define ST_RD_FP 0x16 + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + bool savedFp = false; + bool savedRa = false; + bool establishedFp = false; + + for (uint32_t* pInstr = (uint32_t*)start; pInstr < pvAddress && !(savedFp && savedRa && establishedFp); pInstr++) + { + uint32_t instr = *pInstr; + + if (((instr & ST_MASK) == ST_BITS) && + ((instr & ST_RD_MASK) == ST_RD_SP || (instr & ST_RD_MASK) == ST_RD_FP)) + { + // SP/FP-relative store of pair of registers + savedFp |= (instr & ST_RJ_MASK) == ST_RJ_FP; + savedRa |= (instr & ST_RJ_MASK) == ST_RJ_RA; + } + else if ((instr & ADDI_FP_SP_MASK) == ADDI_FP_SP_BITS) + { + establishedFp = true; + } + else + { + // JIT generates other patterns into the prolog that we currently don't + // recognize (saving unpaired register, stack pointer adjustments). We + // don't need to recognize these patterns unless a compact unwinding code + // is generated for them in ILC. + // https://github.com/dotnet/runtime/issues/76371 + return -1; + } + } + + return savedFp && savedRa && establishedFp ? 0 : 1; + #else return -1; @@ -1043,6 +1098,60 @@ int UnixNativeCodeManager::TrailingEpilogueInstructionsCount(MethodInfo * pMetho return 0; } +#elif defined(TARGET_LOONGARCH64) + +// ld.d +// 0010 1000 11xx xxxx xxxx xxxx xxxx xxxx +#define LD_BITS 0xB9400000 +#define LD_MASK 0xBF400000 + +// ldx.d with register offset +// 0011 1000 0000 1100 0xxx xxxx xxxx xxxx +#define LDX_BITS 0x380C0000 +#define LDX_MASK 0xFFFF7000 + +// Branches, Exception Generating and System instruction group +// 01xx xxxx xxxx xxxx xxxx xxxx xxxx xxxx +#define BEGS_BITS 0x40000000 +#define BEGS_MASK 0xC0000000 + + UnixNativeMethodInfo * pNativeMethodInfo = (UnixNativeMethodInfo *)pMethodInfo; + ASSERT(pNativeMethodInfo != NULL); + + uint32_t* start = (uint32_t*)pNativeMethodInfo->pMethodStartAddress; + + // Since we stop on branches, the search is roughly limited by the containing basic block. + // We typically examine just 1-5 instructions and in rare cases up to 30. + // + // TODO: we can also limit the search by the longest possible epilogue length, but + // we must be sure the longest length considers all possibilities, + // which is somewhat nontrivial to derive/prove. + // It does not seem urgent, but it could be nice to have a constant upper bound. + for (uint32_t* pInstr = (uint32_t*)pvAddress - 1; pInstr > start; pInstr--) + { + uint32_t instr = *pInstr; + + // check for Branches, Exception Generating and System instruction group. + // If we see such instruction before seeing FP or RA restored, we are not in an epilog. + // Note: this includes RET, BRK, branches, calls, tailcalls, fences, etc... + if ((instr & BEGS_MASK) == BEGS_BITS) + { + // not in an epilogue + break; + } + + // check for restoring FP or RA with ld.d or ldx.d + int operand = (instr >> 5) & 0x1f; + if (operand == 22 || operand == 1) + { + if ((instr & LD_MASK) == LD_BITS || + (instr & LDX_MASK) == LDX_BITS) + { + return -1; + } + } + } + #endif return 0; @@ -1085,9 +1194,9 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn // Decode the GC info for the current method to determine its return type GcInfoDecoderFlags flags = DECODE_RETURN_KIND; -#if defined(TARGET_ARM) || defined(TARGET_ARM64) +#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) flags = (GcInfoDecoderFlags)(flags | DECODE_HAS_TAILCALLS); -#endif // TARGET_ARM || TARGET_ARM64 +#endif // TARGET_ARM || TARGET_ARM64 || TARGET_LOONGARCH64 GcInfoDecoder decoder(GCInfoToken(p), flags); *pRetValueKind = GetGcRefKind(decoder.GetReturnKind()); @@ -1172,6 +1281,41 @@ bool UnixNativeCodeManager::GetReturnAddressHijackInfo(MethodInfo * pMethodIn *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pLR; return true; + +#elif defined(TARGET_LOONGARCH64) + + if (decoder.HasTailCalls()) + { + // Do not hijack functions that have tail calls, since there are two problems: + // 1. When a function that tail calls another one is hijacked, the RA may be + // stored at a different location in the stack frame of the tail call target. + // So just by performing tail call, the hijacked location becomes invalid and + // unhijacking would corrupt stack by writing to that location. + // 2. There is a small window after the caller pops RA from the stack in its + // epilog and before the tail called function pushes RA in its prolog when + // the hijacked return address would not be not on the stack and so we would + // not be able to unhijack. + return false; + } + + PTR_uintptr_t pRA = pRegisterSet->pRA; + if (!VirtualUnwind(pMethodInfo, pRegisterSet)) + { + return false; + } + + if (pRegisterSet->pRA == pRA) + { + // This is the case when we are either: + // + // 1) In a leaf method that does not push RA on stack, OR + // 2) In the prolog/epilog of a non-leaf method that has not yet pushed RA on stack + // or has RA already popped off. + return false; + } + + *ppvRetAddrLocation = (PTR_PTR_VOID)pRegisterSet->pRA; + return true; #else return false; #endif // defined(TARGET_AMD64) diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 39bf9f024be60..235b543f24b2e 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -34,6 +34,9 @@ using libunwind::Registers_arm; #elif defined(TARGET_ARM64) using libunwind::Registers_arm64; using libunwind::CompactUnwinder_arm64; +#elif defined(TARGET_LOONGARCH64) +using libunwind::Registers_loongarch; +//using libunwind::CompactUnwinder_loongarch64; //TODO-LOONGARCH64 #elif defined(TARGET_X86) using libunwind::Registers_x86; #else @@ -806,6 +809,286 @@ void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) #endif // TARGET_ARM64 +#if defined(TARGET_LOONGARCH64) + +// Shim that implements methods required by libunwind over REGDISPLAY +struct Registers_REGDISPLAY : REGDISPLAY +{ + inline static int getArch() { return libunwind::REGISTERS_LOONGARCH; } + inline static int lastDwarfRegNum() { return _LIBUNWIND_HIGHEST_DWARF_REGISTER_LOONGARCH; } + + bool validRegister(int num) const; + bool validFloatRegister(int num) { return false; }; + bool validVectorRegister(int num) const; + + uint64_t getRegister(int num) const; + void setRegister(int num, uint64_t value, uint64_t location); + + double getFloatRegister(int num) const {abort();} + void setFloatRegister(int num, double value) {abort();} + + libunwind::v128 getVectorRegister(int num) const; + void setVectorRegister(int num, libunwind::v128 value); + + uint64_t getSP() const { return SP;} + void setSP(uint64_t value, uint64_t location) { SP = value;} + uint64_t getIP() const { return IP;} + void setIP(uint64_t value, uint64_t location) { IP = value; } + uint64_t getFP() const { return *pFP;} + void setFP(uint64_t value, uint64_t location) { pFP = (PTR_uintptr_t)location;} +}; + +inline bool Registers_REGDISPLAY::validRegister(int num) const { + if (num == UNW_REG_SP || num == UNW_LOONGARCH_R3) + return true; + + if (num == UNW_LOONGARCH_R22) + return true; + + if (num == UNW_REG_IP) + return true; + + if (num >= UNW_LOONGARCH_R0 && num <= UNW_LOONGARCH_R31) + return true; + + return false; +} + +bool Registers_REGDISPLAY::validVectorRegister(int num) const +{ + if (num >= UNW_LOONGARCH_F24 && num <= UNW_LOONGARCH_F31) + return true; + + return false; +} + +inline uint64_t Registers_REGDISPLAY::getRegister(int regNum) const { + if (regNum == UNW_REG_SP || regNum == UNW_LOONGARCH_R3) + return SP; + + if (regNum == UNW_LOONGARCH_R22) + return *pFP; + + if (regNum == UNW_LOONGARCH_R1) + return *pRA; + + if (regNum == UNW_REG_IP) + return IP; + + switch (regNum) + { + case (UNW_LOONGARCH_R0): + return *pR0; + case (UNW_LOONGARCH_R2): + return *pR2; + case (UNW_LOONGARCH_R4): + return *pR4; + case (UNW_LOONGARCH_R5): + return *pR5; + case (UNW_LOONGARCH_R6): + return *pR6; + case (UNW_LOONGARCH_R7): + return *pR7; + case (UNW_LOONGARCH_R8): + return *pR8; + case (UNW_LOONGARCH_R9): + return *pR9; + case (UNW_LOONGARCH_R10): + return *pR10; + case (UNW_LOONGARCH_R11): + return *pR11; + case (UNW_LOONGARCH_R12): + return *pR12; + case (UNW_LOONGARCH_R13): + return *pR13; + case (UNW_LOONGARCH_R14): + return *pR14; + case (UNW_LOONGARCH_R15): + return *pR15; + case (UNW_LOONGARCH_R16): + return *pR16; + case (UNW_LOONGARCH_R17): + return *pR17; + case (UNW_LOONGARCH_R18): + return *pR18; + case (UNW_LOONGARCH_R19): + return *pR19; + case (UNW_LOONGARCH_R20): + return *pR20; + case (UNW_LOONGARCH_R21): + return *pR21; + case (UNW_LOONGARCH_R23): + return *pR23; + case (UNW_LOONGARCH_R24): + return *pR24; + case (UNW_LOONGARCH_R25): + return *pR25; + case (UNW_LOONGARCH_R26): + return *pR26; + case (UNW_LOONGARCH_R27): + return *pR27; + case (UNW_LOONGARCH_R28): + return *pR28; + case (UNW_LOONGARCH_R29): + return *pR29; + case (UNW_LOONGARCH_R30): + return *pR30; + case (UNW_LOONGARCH_R31): + return *pR31; + } + + PORTABILITY_ASSERT("unsupported loongarch64 register"); +} + +void Registers_REGDISPLAY::setRegister(int num, uint64_t value, uint64_t location) +{ + if (num == UNW_REG_SP || num == UNW_LOONGARCH_R3) { + SP = (uintptr_t )value; + return; + } + + if (num == UNW_LOONGARCH_R22) { + pFP = (PTR_uintptr_t)location; + return; + } + + if (num == UNW_LOONGARCH_R1) { + pRA = (PTR_uintptr_t)location; + return; + } + + if (num == UNW_REG_IP) { + IP = value; + return; + } + + switch (num) + { + case (UNW_LOONGARCH_R0): + pR0 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R2): + pR2 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R4): + pR4 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R5): + pR5 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R6): + pR6 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R7): + pR7 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R8): + pR8 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R9): + pR9 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R10): + pR10 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R11): + pR11 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R12): + pR12 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R13): + pR13 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R14): + pR14 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R15): + pR15 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R16): + pR16 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R17): + pR17 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R18): + pR18 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R19): + pR19 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R20): + pR20 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R21): + pR21 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R23): + pR23 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R24): + pR24 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R25): + pR25 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R26): + pR26 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R27): + pR27 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R28): + pR28 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R29): + pR29 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R30): + pR30 = (PTR_uintptr_t)location; + break; + case (UNW_LOONGARCH_R31): + pR31 = (PTR_uintptr_t)location; + break; + default: + PORTABILITY_ASSERT("unsupported loongarch64 register"); + } +} + +libunwind::v128 Registers_REGDISPLAY::getVectorRegister(int num) const +{ + num -= UNW_LOONGARCH_F24; + + if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + { + PORTABILITY_ASSERT("unsupported loongarch64 vector register"); + } + + libunwind::v128 result; + + result.vec[0] = 0; + result.vec[1] = 0; + result.vec[2] = F[num] >> 32; + result.vec[3] = F[num] & 0xFFFFFFFF; + + return result; +} + +void Registers_REGDISPLAY::setVectorRegister(int num, libunwind::v128 value) +{ + num -= UNW_LOONGARCH_F24; + + if (num < 0 || num >= sizeof(F) / sizeof(uint64_t)) + { + PORTABILITY_ASSERT("unsupported loongarch64 vector register"); + } + + F[num] = (uint64_t)value.vec[2] << 32 | (uint64_t)value.vec[3]; +} + +#endif // TARGET_LOONGARCH64 + bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t format, unw_word_t unwind_info) { #if _LIBUNWIND_SUPPORT_DWARF_UNWIND @@ -818,6 +1101,12 @@ bool UnwindHelpers::StepFrame(REGDISPLAY *regs, unw_word_t start_ip, uint32_t fo int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); return stepRet == UNW_STEP_SUCCESS; } +#elif defined(TARGET_LOONGARCH64) + if ((format & UNWIND_LOONGARCH64_MODE_MASK) != UNWIND_LOONGARCH64_MODE_DWARF) { + CompactUnwinder_loongarch64 compactInst; + int stepRet = compactInst.stepWithCompactEncoding(format, start_ip, _addressSpace, *(Registers_REGDISPLAY*)regs); + return stepRet == UNW_STEP_SUCCESS; + } #elif defined(TARGET_AMD64) if ((format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { CompactUnwinder_x86_64 compactInst; @@ -867,6 +1156,8 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio libunwind::UnwindCursor uc(_addressSpace); #elif defined(HOST_X86) libunwind::UnwindCursor uc(_addressSpace); +#elif defined(HOST_LOONGARCH64) + libunwind::UnwindCursor uc(_addressSpace); #else #error "Unwinding is not implemented for this architecture yet." #endif @@ -885,6 +1176,12 @@ bool UnwindHelpers::GetUnwindProcInfo(PCODE pc, UnwindInfoSections &uwInfoSectio } else { dwarfOffsetHint = procInfo->format & UNWIND_ARM64_DWARF_SECTION_OFFSET; } +#elif defined(TARGET_LOONGARCH64) + if ((procInfo->format & UNWIND_LOONGARCH64_MODE_MASK) != UNWIND_LOONGARCH64_MODE_DWARF) { + return true; + } else { + dwarfOffsetHint = procInfo->format & UNWIND_LOONGARCH64_DWARF_SECTION_OFFSET; + } #elif defined(TARGET_AMD64) if ((procInfo->format & UNWIND_X86_64_MODE_MASK) != UNWIND_X86_64_MODE_DWARF) { return true; diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 80f633327c830..68ba993209e42 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -42,4 +42,6 @@ #include "unixasmmacrosarm64.inc" #elif defined(HOST_X86) #include "unixasmmacrosx86.inc" +#elif defined(HOST_LOONGARCH64) +#include "unixasmmacrosloongarch64.inc" #endif diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc new file mode 100644 index 0000000000000..ff13d9d8f4053 --- /dev/null +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -0,0 +1,328 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +#include "AsmOffsets.inc" + +.macro NESTED_ENTRY Name, Section, Handler + LEAF_ENTRY \Name, \Section + .ifnc \Handler, NoHandler + .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 + .endif +.endm + +.macro NESTED_END Name, Section + LEAF_END \Name, \Section +.endm + +.macro PATCH_LABEL Name + .global C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro ALTERNATE_ENTRY Name + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) +C_FUNC(\Name): +.endm + +.macro LEAF_ENTRY Name, Section + .global C_FUNC(\Name) + .hidden C_FUNC(\Name) + .type \Name, %function +C_FUNC(\Name): + .cfi_startproc +.endm + +.macro LEAF_END Name, Section + .size \Name, .-\Name + .cfi_endproc +.endm + +.macro PREPARE_EXTERNAL_VAR Name, HelperReg + la.local \HelperReg, \Name +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg + la.local \HelperReg, \Name + ld.d \HelperReg, \HelperReg, 0 +.endm + +.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg + la.local \HelperReg, \Name + ld.w \HelperReg, \HelperReg, 0 +.endm + + +.macro PROLOG_STACK_ALLOC Size + addi.d $sp, $sp, -\Size +.endm + +.macro EPILOG_STACK_FREE Size + addi.d $sp, $sp, \Size + .cfi_adjust_cfa_offset -\Size +.endm + +.macro EPILOG_STACK_RESTORE + ori $sp, $fp, 0 + .cfi_restore 3 +.endm + +.macro PROLOG_SAVE_REG reg, ofs + st.d $r\reg, $sp, \ofs + .cfi_rel_offset \reg, \ofs +.endm + +.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs + st.d $r\reg1, $sp, \ofs + st.d $r\reg2, $sp, \ofs + 8 + .cfi_rel_offset \reg1, \ofs + .cfi_rel_offset \reg2, \ofs + 8 + .ifc \reg1, $fp + ori $fp, $sp, 0 + .cfi_def_cfa_register 22 + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs + st.d $r\reg1, $sp, \ofs + st.d $r\reg2, $sp, \ofs + 8 + addi.d $sp, $sp, \ofs + .cfi_adjust_cfa_offset -\ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 + .ifc \reg1, $fp + ori $fp, $sp, 0 + .cfi_def_cfa_register $fp + .endif +.endm + +.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs + st.d $r\reg1, $sp, \ofs + st.d $r\reg2, $sp, \ofs + 8 + addi.d $sp, $sp, \ofs + .cfi_adjust_cfa_offset -\ofs + .cfi_rel_offset \reg1, 0 + .cfi_rel_offset \reg2, 8 +.endm + + +.macro EPILOG_RESTORE_REG reg, ofs + ld.d $r\reg, $sp, \ofs + .cfi_restore \reg +.endm + +.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs + ld.d $r\reg1, $sp, \ofs + ld.d $r\reg2, $sp, \ofs + 8 + .cfi_restore \reg1 + .cfi_restore \reg2 +.endm + +.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs + ld.d $r\reg1, $sp, 0 + ld.d $r\reg2, $sp, 8 + addi.d $sp, $sp, \ofs + .cfi_restore \reg1 + .cfi_restore \reg2 + .cfi_adjust_cfa_offset -\ofs +.endm + +.macro EPILOG_RETURN + jirl $r0, $ra, 0 +.endm + +.macro EMIT_BREAKPOINT + break 0 +.endm + +.macro EPILOG_BRANCH_REG reg + + jirl $r0, \reg, 0 + +.endm + +// Loads the address of a thread-local variable into the target register, +// which cannot be a0. Preserves all other registers. +.macro INLINE_GET_TLS_VAR target, var + .ifc \target, $a0 + .error "target cannot be a0" + .endif + + st.d $a0, $sp, -0x10 + st.d $ra, $sp, -0x8 + addi.d $sp, $sp, -16 + + // This sequence of instructions is recognized and potentially patched + // by the linker (GD->IE/LE relaxation). + //la.local $a0, \var // + //ld.d \target, $a0, 0 // + //.tlsdesccall \var //TODO-LOONGARCH64 + la.tls.ie $a0, \var + //jirl $ra, \target, 0 + // End of the sequence + + ori \target, $tp, 0 + add.d \target, \target, $a0 + + ld.d $a0, $sp, 0 + ld.d $ra, $sp, 8 + addi.d $sp, $sp, 16 +.endm + +// Inlined version of RhpGetThread. Target cannot be a0. +.macro INLINE_GETTHREAD target + INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) +.endm + +// Do not use these ETLS macros in functions that already create a stack frame. +// Creating two stack frames in one function can confuse the unwinder/debugger + +.macro GETTHREAD_ETLS_1 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + + bl C_FUNC(RhpGetThread) + ori $a1, $a0, 0 + + ld.d $a0, $sp, 0x10 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 +.endm + +.macro GETTHREAD_ETLS_2 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + + bl C_FUNC(RhpGetThread) + ori $a2, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 +.endm + +.macro GETTHREAD_ETLS_3 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -48 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + st.d $a2, $sp, 0x20 + + bl C_FUNC(RhpGetThread) + ori $a3, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + ld.d $a2, $sp, 0x20 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 48 +.endm + +.macro GETTHUNKDATA_ETLS_9 + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -96 // ;; Push down stack pointer and store FP and RA + st.d $a0, $sp, 0x10 + st.d $a1, $sp, 0x18 + st.d $a2, $sp, 0x20 + st.d $a3, $sp, 0x28 + st.d $a4, $sp, 0x30 + st.d $a5, $sp, 0x38 + st.d $a6, $sp, 0x40 + st.d $a7, $sp, 0x48 + st.d $t6, $sp, 0x50 + st.d $t7, $sp, 0x58 + + bl RhpGetThunkData + ori $t0, $a0, 0 + + ld.d $a0, $sp, 0x10 + ld.d $a1, $sp, 0x18 + ld.d $a2, $sp, 0x20 + ld.d $a3, $sp, 0x28 + ld.d $a4, $sp, 0x30 + ld.d $a5, $sp, 0x38 + ld.d $a6, $sp, 0x40 + ld.d $a7, $sp, 0x48 + ld.d $t6, $sp, 0x50 + ld.d $t7, $sp, 0x58 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 +.endm + +.macro InterlockedOperationBarrier + dbar 0 +.endm + +.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 + // + // Thread::Unhijack() + // + ld.d \trashReg1, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress + beq \trashReg1, $zero, 0f + + ld.d \trashReg2, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d \trashReg1, \trashReg2, 0 + st.d $zero, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + st.d $zero, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress +0: +.endm + +// Note: these must match the defs in PInvokeTransitionFrameFlags +PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_R4 = 0x00000800 +PTFF_SAVE_R5 = 0x00001000 +PTFF_SAVE_ALL_PRESERVED = 0x000001FF // NOTE: r23-r31 + +DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP + +.macro PUSH_COOP_PINVOKE_FRAME trashReg + + PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -0x80 // Push down stack pointer and store FP and RA + + // 0x10 bytes reserved for Thread* and flags + + // Save callee saved registers + PROLOG_SAVE_REG_PAIR 23, 24, 0x20 + PROLOG_SAVE_REG_PAIR 25, 26, 0x30 + PROLOG_SAVE_REG_PAIR 27, 28, 0x40 + PROLOG_SAVE_REG_PAIR 29, 30, 0x50 + PROLOG_SAVE_REG_PAIR 31, 2, 0x60 + + // Save the value of SP before stack allocation to the last slot in the frame (slot #15) + addi.d \trashReg, $sp, 0x80 + st.d \trashReg, $sp, 0x70 + + // Record the bitmask of saved registers in the frame (slot #3) + ori \trashReg, $zero, DEFAULT_FRAME_SAVE_FLAGS + st.d \trashReg, $sp, 0x18 + + ori \trashReg, $sp, 0 +.endm + +// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME +.macro POP_COOP_PINVOKE_FRAME + + // $s0,$s1 + EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 + EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 + EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 + EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 + EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 + EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x80 +.endm + +// Bit position for the flags above, to be used with andi+beq/bne instructions +PTFF_THREAD_ABORT_BIT = 36 + +// +// CONSTANTS -- INTEGER +// +#define TSF_Attached 0x01 +#define TSF_SuppressGcStress 0x08 +#define TSF_DoNotTriggerGc 0x10 +#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 + +// Bit position for the flags above, to be used with andi+beq/bne instructions +TrapThreadsFlags_AbortInProgress_Bit = 0 +TrapThreadsFlags_TrapThreads_Bit = 1 + +// These must match the TrapThreadsFlags enum +#define TrapThreadsFlags_None 0 +#define TrapThreadsFlags_AbortInProgress 1 +#define TrapThreadsFlags_TrapThreads 2 diff --git a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs index 06db5253c590c..c7a96e12ddf17 100644 --- a/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs +++ b/src/coreclr/nativeaot/System.Private.TypeLoader/src/Internal/Runtime/TypeLoader/TypeSystemContextFactory.cs @@ -42,6 +42,8 @@ public static TypeSystemContext Create() TargetArchitecture.X64, #elif TARGET_WASM TargetArchitecture.Wasm32, +#elif TARGET_LOONGARCH64 + TargetArchitecture.LoongArch64, #else #error Unknown architecture #endif diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs index ada020f7ca93b..e3ab2aac040a1 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs @@ -108,9 +108,9 @@ public void EmitJMP(ISymbolNode symbol) } else { - //Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_LOONGARCH64_PC); - Builder.EmitUInt(0xffffffff); // bad code. - throw new NotImplementedException(); + Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_LOONGARCH64_PC); + EmitPC(Register.R21); // pcalau12i R21, 0 + Builder.EmitUInt(0x4c0002a0); // jirl R0, R21, 0 } } @@ -123,7 +123,7 @@ public void EmitRETIfEqual(Register regSrc) public void EmitJE(Register regSrc, ISymbolNode symbol) { - uint offset = symbol.RepresentsIndirectionCell ? 7u : 2u; + uint offset = symbol.RepresentsIndirectionCell ? 7u : 3u; // BNEZ regSrc, offset Builder.EmitUInt((uint)(0x44000000 | (offset << 10) | ((uint)regSrc << 5))); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs index db4d1855e20fb..fd0d0ecde0fb0 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs @@ -48,30 +48,37 @@ protected override void EmitCode(NodeFactory factory, ref LoongArch64Emitter enc case ReadyToRunHelperId.GetThreadStaticBase: { MetadataType target = (MetadataType)Target; - encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); - - // First arg: address of the TypeManager slot that provides the helper with - // information about module index and the type manager instance (which is used - // for initialization on first access). - encoder.EmitLD(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, 0); - - // Second arg: index of the type in the ThreadStatic section of the modules - encoder.EmitLD(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); - - if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + if (factory.TypeThreadStaticIndex(target) is TypeThreadStaticIndexNode ti && ti.IsInlined) { - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + throw new NotImplementedException(); } else { - encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); - encoder.EmitADD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); - - encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); - encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg3, 0); - encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); - - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLD(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, 0); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLD(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) + { + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + } + else + { + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitADD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); + + encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); + encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg3, 0); + encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); + } } } break; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index c04658df379f7..78d84ea586130 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -62,16 +62,31 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, break; case ReadyToRunHelper.WriteBarrier: - mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpAssignRefArm64" : "RhpAssignRef"; + mangledName = context.Target.Architecture switch + { + TargetArchitecture.ARM64 => "RhpAssignRefArm64", + TargetArchitecture.LoongArch64 => "RhpAssignRefLoongArch64", + _ => "RhpAssignRef" + }; break; case ReadyToRunHelper.CheckedWriteBarrier: - mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpCheckedAssignRefArm64" : "RhpCheckedAssignRef"; + mangledName = context.Target.Architecture switch + { + TargetArchitecture.ARM64 => "RhpCheckedAssignRefArm64", + TargetArchitecture.LoongArch64 => "RhpCheckedAssignRefLoongArch64", + _ => "RhpCheckedAssignRef" + }; break; case ReadyToRunHelper.BulkWriteBarrier: - mangledName = "RhBuffer_BulkMoveWithWriteBarrier"; - break; + mangledName = "RhBuffer_BulkMoveWithWriteBarrier"; + break; case ReadyToRunHelper.ByRefWriteBarrier: - mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; + mangledName = context.Target.Architecture switch + { + TargetArchitecture.ARM64 => "RhpByRefAssignRefArm64", + TargetArchitecture.LoongArch64 => "RhpByRefAssignRefLoongArch64", + _ => "RhpByRefAssignRef" + }; break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs index 431ef42dc27ad..f28dea4d0aa60 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs @@ -2089,6 +2089,77 @@ public enum CodeViewRegister : ushort CV_AMD64_YMM15D1 = 685, CV_AMD64_YMM15D2 = 686, CV_AMD64_YMM15D3 = 687, + + // Register set for LoongArch64 + CV_LOONGARCH64_NOREG = CV_REG_NONE, + + // General purpose 64-bit integer registers + CV_LOONGARCH64_R0 = 10, + CV_LOONGARCH64_RA = 11, + CV_LOONGARCH64_TP = 12, + CV_LOONGARCH64_SP = 13, + CV_LOONGARCH64_A0 = 14, + CV_LOONGARCH64_A1 = 15, + CV_LOONGARCH64_A2 = 16, + CV_LOONGARCH64_A3 = 17, + CV_LOONGARCH64_A4 = 18, + CV_LOONGARCH64_A5 = 19, + CV_LOONGARCH64_A6 = 20, + CV_LOONGARCH64_A7 = 21, + CV_LOONGARCH64_T0 = 22, + CV_LOONGARCH64_T1 = 23, + CV_LOONGARCH64_T2 = 24, + CV_LOONGARCH64_T3 = 25, + CV_LOONGARCH64_T4 = 26, + CV_LOONGARCH64_T5 = 27, + CV_LOONGARCH64_T6 = 28, + CV_LOONGARCH64_T7 = 29, + CV_LOONGARCH64_T8 = 30, + CV_LOONGARCH64_X0 = 31, + CV_LOONGARCH64_FP = 32, + CV_LOONGARCH64_S0 = 33, + CV_LOONGARCH64_S1 = 34, + CV_LOONGARCH64_S2 = 35, + CV_LOONGARCH64_S3 = 36, + CV_LOONGARCH64_S4 = 37, + CV_LOONGARCH64_S5 = 38, + CV_LOONGARCH64_S6 = 39, + CV_LOONGARCH64_S7 = 40, + CV_LOONGARCH64_S8 = 41, + + // 64-bit floating point registers + CV_LOONGARCH64_F0 = 50, + CV_LOONGARCH64_F1 = 51, + CV_LOONGARCH64_F2 = 52, + CV_LOONGARCH64_F3 = 53, + CV_LOONGARCH64_F4 = 54, + CV_LOONGARCH64_F5 = 55, + CV_LOONGARCH64_F6 = 56, + CV_LOONGARCH64_F7 = 57, + CV_LOONGARCH64_F8 = 58, + CV_LOONGARCH64_F9 = 59, + CV_LOONGARCH64_F10 = 60, + CV_LOONGARCH64_F11 = 61, + CV_LOONGARCH64_F12 = 62, + CV_LOONGARCH64_F13 = 63, + CV_LOONGARCH64_F14 = 64, + CV_LOONGARCH64_F15 = 65, + CV_LOONGARCH64_F16 = 66, + CV_LOONGARCH64_F17 = 67, + CV_LOONGARCH64_F18 = 68, + CV_LOONGARCH64_F19 = 69, + CV_LOONGARCH64_F20 = 70, + CV_LOONGARCH64_F21 = 71, + CV_LOONGARCH64_F22 = 72, + CV_LOONGARCH64_F23 = 73, + CV_LOONGARCH64_F24 = 74, + CV_LOONGARCH64_F25 = 75, + CV_LOONGARCH64_F26 = 76, + CV_LOONGARCH64_F27 = 77, + CV_LOONGARCH64_F28 = 78, + CV_LOONGARCH64_F29 = 79, + CV_LOONGARCH64_F30 = 80, + CV_LOONGARCH64_F31 = 81, } // Matches CV_access_e diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs index e8faabae51939..40b7395798f38 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs @@ -105,6 +105,12 @@ private CodeViewRegister GetCVRegNum(uint regNum) // TODO: Floating point return CV_REG_NONE; + case TargetArchitecture.LoongArch64: + if (regNum <= 32) + return (CodeViewRegister)(regNum + (uint)CV_LOONGARCH64_R0); + // TODO: Floating point + return CV_REG_NONE; + default: return CV_REG_NONE; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index d79abeea30c5d..643f14056bd4d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -80,6 +80,12 @@ public DwarfBuilder( _codeRelocType = RelocType.IMAGE_REL_BASED_HIGHLOW; break; + case TargetArchitecture.LoongArch64: + _targetPointerSize = 8; + _frameRegister = 22; // FP + _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index 7497734283314..3aa4817732993 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -80,6 +80,19 @@ public DwarfCie(TargetArchitecture targetArchitecture) InitialCFAOffset = 8; break; + case TargetArchitecture.LoongArch64: + CodeAlignFactor = 1; + DataAlignFactor = -4; + ReturnAddressRegister = 1; // RA + Instructions = new byte[] + { + DW_CFA_def_cfa, + 3, // SP + 0, // Offset from SP + }; + InitialCFAOffset = 0; + break; + default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs index 98fb159e04774..89c2188774416 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs @@ -161,6 +161,10 @@ public static int DwarfRegNum(TargetArchitecture architecture, int regNum) _ => regNum - (int)RegNumX86.REGNUM_COUNT + 32 // FP registers }; + case TargetArchitecture.LoongArch64: + // Normal registers are directly mapped + return regNum; + default: throw new NotSupportedException(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs index 9abdc3361e049..8288f7fe8bd35 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs @@ -25,6 +25,7 @@ internal static class ElfNative public const ushort EM_ARM = 40; public const ushort EM_X86_64 = 62; public const ushort EM_AARCH64 = 183; + public const ushort EM_LOONGARCH = 258; // Section header type public const uint SHT_NULL = 0; @@ -435,5 +436,122 @@ internal static class ElfNative public const uint R_AARCH64_TLS_TPREL = 1030; public const uint R_AARCH64_TLSDESC = 1031; public const uint R_AARCH64_IRELATIVE = 1032; + + // Relocations (loongarch64) + public const uint R_LARCH_NONE = 0; + public const uint R_LARCH_32 = 1; + public const uint R_LARCH_64 = 2; + public const uint R_LARCH_RELATIVE = 3; + public const uint R_LARCH_COPY = 4; + public const uint R_LARCH_JUMP_SLOT = 5; + public const uint R_LARCH_TLS_DTPMOD32 = 6; + public const uint R_LARCH_TLS_DTPMOD64 = 7; + public const uint R_LARCH_TLS_DTPREL32 = 8; + public const uint R_LARCH_TLS_DTPREL64 = 9; + public const uint R_LARCH_TLS_TPREL32 = 10; + public const uint R_LARCH_TLS_TPREL64 = 11; + public const uint R_LARCH_IRELATIVE = 12; + public const uint R_LARCH_MARK_LA = 20; + public const uint R_LARCH_MARK_PCREL = 21; + public const uint R_LARCH_SOP_PUSH_PCREL = 22; + public const uint R_LARCH_SOP_PUSH_ABSOLUTE = 23; + public const uint R_LARCH_SOP_PUSH_DUP = 24; + public const uint R_LARCH_SOP_PUSH_GPREL = 25; + public const uint R_LARCH_SOP_PUSH_TLS_TPREL = 26; + public const uint R_LARCH_SOP_PUSH_TLS_GOT = 27; + public const uint R_LARCH_SOP_PUSH_TLS_GD = 28; + public const uint R_LARCH_SOP_PUSH_PLT_PCREL = 29; + public const uint R_LARCH_SOP_ASSERT = 30; + public const uint R_LARCH_SOP_NOT = 31; + public const uint R_LARCH_SOP_SUB = 32; + public const uint R_LARCH_SOP_SL = 33; + public const uint R_LARCH_SOP_SR = 34; + public const uint R_LARCH_SOP_ADD = 35; + public const uint R_LARCH_SOP_AND = 36; + public const uint R_LARCH_SOP_IF_ELSE = 37; + public const uint R_LARCH_SOP_POP_32_S_10_5 = 38; + public const uint R_LARCH_SOP_POP_32_U_10_12 = 39; + public const uint R_LARCH_SOP_POP_32_S_10_12 = 40; + public const uint R_LARCH_SOP_POP_32_S_10_16 = 41; + public const uint R_LARCH_SOP_POP_32_S_10_16_S2 = 42; + public const uint R_LARCH_SOP_POP_32_S_5_20 = 43; + public const uint R_LARCH_SOP_POP_32_S_0_5_10_16_S2 = 44; + public const uint R_LARCH_SOP_POP_32_S_0_10_10_16_S2 = 45; + public const uint R_LARCH_SOP_POP_32_U = 46; + public const uint R_LARCH_ADD8 = 47; + public const uint R_LARCH_ADD16 = 48; + public const uint R_LARCH_ADD24 = 49; + public const uint R_LARCH_ADD32 = 50; + public const uint R_LARCH_ADD64 = 51; + public const uint R_LARCH_SUB8 = 52; + public const uint R_LARCH_SUB16 = 53; + public const uint R_LARCH_SUB24 = 54; + public const uint R_LARCH_SUB32 = 55; + public const uint R_LARCH_SUB64 = 56; + public const uint R_LARCH_GNU_VTINHERIT = 57; + public const uint R_LARCH_GNU_VTENTRY = 58; + public const uint R_LARCH_B16 = 64; + public const uint R_LARCH_B21 = 65; + public const uint R_LARCH_B26 = 66; + public const uint R_LARCH_ABS_HI20 = 67; + public const uint R_LARCH_ABS_LO12 = 68; + public const uint R_LARCH_ABS64_LO20 = 69; + public const uint R_LARCH_ABS64_HI12 = 70; + public const uint R_LARCH_PCALA_HI20 = 71; + public const uint R_LARCH_PCALA_LO12 = 72; + public const uint R_LARCH_PCALA64_LO20 = 73; + public const uint R_LARCH_PCALA64_HI12 = 74; + public const uint R_LARCH_GOT_PC_HI20 = 75; + public const uint R_LARCH_GOT_PC_LO12 = 76; + public const uint R_LARCH_GOT64_PC_LO20 = 77; + public const uint R_LARCH_GOT64_PC_HI12 = 78; + public const uint R_LARCH_GOT_HI20 = 79; + public const uint R_LARCH_GOT_LO12 = 80; + public const uint R_LARCH_GOT64_LO20 = 81; + public const uint R_LARCH_GOT64_HI12 = 82; + public const uint R_LARCH_TLS_LE_HI20 = 83; + public const uint R_LARCH_TLS_LE_LO12 = 84; + public const uint R_LARCH_TLS_LE64_LO20 = 85; + public const uint R_LARCH_TLS_LE64_HI12 = 86; + public const uint R_LARCH_TLS_IE_PC_HI20 = 87; + public const uint R_LARCH_TLS_IE_PC_LO12 = 88; + public const uint R_LARCH_TLS_IE64_PC_LO20 = 89; + public const uint R_LARCH_TLS_IE64_PC_HI12 = 90; + public const uint R_LARCH_TLS_IE_HI20 = 91; + public const uint R_LARCH_TLS_IE_LO12 = 92; + public const uint R_LARCH_TLS_IE64_LO20 = 93; + public const uint R_LARCH_TLS_IE64_HI12 = 94; + public const uint R_LARCH_TLS_LD_PC_HI20 = 95; + public const uint R_LARCH_TLS_LD_HI20 = 96; + public const uint R_LARCH_TLS_GD_PC_HI20 = 97; + public const uint R_LARCH_TLS_GD_HI20 = 98; + public const uint R_LARCH_32_PCREL = 99; + public const uint R_LARCH_RELAX = 100; + public const uint R_LARCH_ALIGN = 102; + public const uint R_LARCH_PCREL20_S2 = 103; + public const uint R_LARCH_ADD6 = 105; + public const uint R_LARCH_SUB6 = 106; + public const uint R_LARCH_ADD_ULEB128 = 107; + public const uint R_LARCH_SUB_ULEB128 = 108; + public const uint R_LARCH_64_PCREL = 109; + public const uint R_LARCH_CALL36 = 110; + public const uint R_LARCH_TLS_DESC32 = 13; + public const uint R_LARCH_TLS_DESC64 = 14; + public const uint R_LARCH_TLS_DESC_PC_HI20 = 111; + public const uint R_LARCH_TLS_DESC_PC_LO12 = 112; + public const uint R_LARCH_TLS_DESC64_PC_LO20 = 113; + public const uint R_LARCH_TLS_DESC64_PC_HI12 = 114; + public const uint R_LARCH_TLS_DESC_HI20 = 115; + public const uint R_LARCH_TLS_DESC_LO12 = 116; + public const uint R_LARCH_TLS_DESC64_LO20 = 117; + public const uint R_LARCH_TLS_DESC64_HI12 = 118; + public const uint R_LARCH_TLS_DESC_LD = 119; + public const uint R_LARCH_TLS_DESC_CALL = 120; + public const uint R_LARCH_TLS_LE_HI20_R = 121; + public const uint R_LARCH_TLS_LE_ADD_R = 122; + public const uint R_LARCH_TLS_LE_LO12_R = 123; + public const uint R_LARCH_TLS_LD_PCREL20_S2 = 124; + public const uint R_LARCH_TLS_GD_PCREL20_S2 = 125; + public const uint R_LARCH_TLS_DESC_PCREL20_S2 = 126; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 2cd90d06977e0..228a6fb5d41f1 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -59,6 +59,7 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options) TargetArchitecture.X64 => EM_X86_64, TargetArchitecture.ARM => EM_ARM, TargetArchitecture.ARM64 => EM_AARCH64, + TargetArchitecture.LoongArch64 => EM_LOONGARCH, _ => throw new NotSupportedException("Unsupported architecture") }; _useInlineRelocationAddends = _machine is EM_386 or EM_ARM; @@ -358,6 +359,9 @@ private protected override void EmitRelocations(int sectionIndex, List rel } } + private void EmitRelocationsLoongArch64(int sectionIndex, List relocationList) + { + if (relocationList.Count > 0) + { + Span relocationEntry = stackalloc byte[24]; + var relocationStream = new MemoryStream(24 * relocationList.Count); + _sections[sectionIndex].RelocationStream = relocationStream; + foreach (SymbolicRelocation symbolicRelocation in relocationList) + { + uint symbolIndex = _symbolNameToIndex[symbolicRelocation.SymbolName]; + uint type = symbolicRelocation.Type switch + { + IMAGE_REL_BASED_DIR64 => R_LARCH_64, + IMAGE_REL_BASED_HIGHLOW => R_LARCH_32, + IMAGE_REL_BASED_RELPTR32 => R_LARCH_32_PCREL, + IMAGE_REL_BASED_LOONGARCH64_PC => R_LARCH_PCALA_HI20, + IMAGE_REL_BASED_LOONGARCH64_JIR => R_LARCH_CALL36, + _ => throw new NotSupportedException("Unknown relocation type: " + symbolicRelocation.Type) + }; + + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + + if (symbolicRelocation.Type is IMAGE_REL_BASED_LOONGARCH64_PC) + { + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset + 4); + BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type + 1); + BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); + relocationStream.Write(relocationEntry); + } + } + } + } + private protected override void EmitSectionsAndLayout() { if (_machine == EM_ARM) @@ -762,7 +802,12 @@ private void EmitObjectFile(FileStream outputFileStream) SectionHeaderEntryCount = sectionCount < SHN_LORESERVE ? (ushort)sectionCount : (ushort)0u, StringTableIndex = strTabSectionIndex < SHN_LORESERVE ? (ushort)strTabSectionIndex : (ushort)SHN_XINDEX, // For ARM32 claim conformance with the EABI specification - Flags = _machine is EM_ARM ? 0x05000000u : 0u, + Flags = _machine switch + { + EM_ARM => 0x05000000u, + EM_LOONGARCH => 0x43u, + _ => 0u + }, }; elfHeader.Write(outputFileStream); diff --git a/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp b/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp index 5cb825e2fd3e9..cdadeb169a572 100644 --- a/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp +++ b/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp @@ -692,6 +692,10 @@ int CompactUnwinder_arm64::stepWithCompactEncodingFrame( } #endif // _LIBUNWIND_TARGET_AARCH64 +#if defined(_LIBUNWIND_TARGET_LOONGARCH64) + // TODO-LOONGARCH64: support libunwind on LoongArch64.(nativeaot) +#endif // _LIBUNWIND_TARGET_LOONGARCH64 + } // namespace libunwind diff --git a/src/native/external/llvm-libunwind/src/Registers.hpp b/src/native/external/llvm-libunwind/src/Registers.hpp index 4e1d75519ef35..927d810ff7946 100644 --- a/src/native/external/llvm-libunwind/src/Registers.hpp +++ b/src/native/external/llvm-libunwind/src/Registers.hpp @@ -5268,7 +5268,8 @@ class _LIBUNWIND_HIDDEN Registers_loongarch { bool validRegister(int num) const; uint64_t getRegister(int num) const; - void setRegister(int num, uint64_t value); + void setRegister(int num, uint64_t value, uint64_t location); + uint64_t getRegisterLocation(int num) const { return 0; } bool validFloatRegister(int num) const; double getFloatRegister(int num) const; void setFloatRegister(int num, double value); @@ -5283,9 +5284,9 @@ class _LIBUNWIND_HIDDEN Registers_loongarch { static int getArch() { return REGISTERS_LOONGARCH; } uint64_t getSP() const { return _registers.__r[3]; } - void setSP(uint64_t value) { _registers.__r[3] = value; } + void setSP(uint64_t value, uint64_t location) { _registers.__r[3] = value; } uint64_t getIP() const { return _registers.__pc; } - void setIP(uint64_t value) { _registers.__pc = value; } + void setIP(uint64_t value, uint64_t location) { _registers.__pc = value; } private: struct loongarch_thread_state_t { @@ -5337,7 +5338,7 @@ inline uint64_t Registers_loongarch::getRegister(int regNum) const { _LIBUNWIND_ABORT("unsupported loongarch register"); } -inline void Registers_loongarch::setRegister(int regNum, uint64_t value) { +inline void Registers_loongarch::setRegister(int regNum, uint64_t value, uint64_t location) { if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) _registers.__r[regNum - UNW_LOONGARCH_R0] = value; else if (regNum == UNW_REG_IP) From 88f82689a1142fb508a9d3f0e47c1985787ca192 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Tue, 25 Jun 2024 19:48:05 +0800 Subject: [PATCH 02/10] Correct mistakes in the first patch. --- src/coreclr/nativeaot/Runtime/PalRedhawk.h | 9 +-- .../nativeaot/Runtime/PalRedhawkCommon.h | 4 +- .../Runtime/loongarch64/ExceptionHandling.S | 2 +- .../nativeaot/Runtime/loongarch64/PInvoke.S | 7 +- .../Runtime/loongarch64/WriteBarriers.S | 10 +-- src/coreclr/nativeaot/Runtime/regdisplay.h | 10 +-- .../nativeaot/Runtime/unix/UnwindHelpers.cpp | 1 - .../Runtime/unix/unixasmmacrosloongarch64.inc | 6 +- .../LoongArch64ReadyToRunHelperNode.cs | 48 ++++++------- .../ILCompiler.Compiler/Compiler/JitHelper.cs | 14 +--- .../ObjectWriter/CodeView/CodeViewNative.cs | 71 ------------------- .../CodeView/CodeViewSymbolsBuilder.cs | 6 -- .../Compiler/ObjectWriter/Dwarf/DwarfCie.cs | 6 +- .../Compiler/ObjectWriter/ElfObjectWriter.cs | 5 +- .../include/__libunwind_config.h | 4 +- .../llvm-libunwind/src/CompactUnwinder.hpp | 4 -- .../external/llvm-libunwind/src/Registers.hpp | 36 ++++++++-- 17 files changed, 79 insertions(+), 164 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index ef96825c15e37..0ca8c4084fe96 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -462,11 +462,6 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { #define LOONGARCH64_MAX_BREAKPOINTS 8 #define LOONGARCH64_MAX_WATCHPOINTS 2 -typedef struct _NEON128 { - uint64_t Low; - int64_t High; -} NEON128, *PNEON128; - typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // Control flags. @@ -520,9 +515,9 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { uint64_t Pc; // - // Floating Point/NEON Registers + // Floating Point Registers // - NEON128 V[32]; + uint64_t F[32]; uint32_t Fpcr; uint32_t Fpsr; diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index d47a12c39fed8..be655a6470c3b 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -111,8 +111,8 @@ struct PAL_LIMITED_CONTEXT uintptr_t SP; uintptr_t IP; - uint64_t F[16 - 8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved - // (V0-V7 and V16-V31 are not preserved according to the ABI spec). + uint64_t F[32 - 24]; // Only the F registers F24..F31 needs to be preserved + // (F0-F23 are not preserved according to the ABI spec). uintptr_t GetIp() const { return IP; } diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S index 2b60eaa4c225f..9541face3fba6 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S @@ -19,7 +19,7 @@ // Setup a PAL_LIMITED_CONTEXT on the stack { .if \exceptionType == HARDWARE_EXCEPTION - addi.d $sp, $sp, -80 + addi.d $sp, $sp, -0x50 .cfi_adjust_cfa_offset 0x50 st.d $a3, $sp, 0 // a3 is the SP and a1 is the IP of the fault site st.d $a1, $sp, 8 diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S index 0c0c6acda4f63..0bce06069a2f3 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S @@ -8,16 +8,11 @@ .global RhpTrapThreads // Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h -PTFF_SAVE_SP = 0x00000400 +PTFF_SAVE_SP = 0x00000200 // Bit position for the flags above, to be used with andi+beq/bne instructions PTFF_THREAD_ABORT_BIT = 36 -// Bit position for the flags above, to be used with andi+beq/bne instructions -TSF_Attached_Bit = 0 -TSF_SuppressGcStress_Bit = 3 -TSF_DoNotTriggerGc_Bit = 4 - // // RhpPInvoke // diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S index bdde2036a3f1e..ebdb83f93a93b 100644 --- a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S +++ b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S @@ -198,14 +198,14 @@ // WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: // - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 // - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address -LEAF_ENTRY RhpByRefAssignRefLoongArch64, _TEXT +LEAF_ENTRY RhpByRefAssignRef, _TEXT ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 ld.d $t7, $t8, 0 addi.d $t8, $t8, 8 - b C_FUNC(RhpCheckedAssignRefLoongArch64) + b C_FUNC(RhpCheckedAssignRef) -LEAF_END RhpByRefAssignRefLoongArch64, _TEXT +LEAF_END RhpByRefAssignRef, _TEXT // JIT_CheckedWriteBarrier(Object** dst, Object* src) // @@ -220,7 +220,7 @@ LEAF_END RhpByRefAssignRefLoongArch64, _TEXT // On exit: // t3, t4 : trashed // t6 : incremented by 8 - LEAF_ENTRY RhpCheckedAssignRefLoongArch64, _TEXT + LEAF_ENTRY RhpCheckedAssignRef, _TEXT // is destReg within the heap? PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 @@ -237,7 +237,7 @@ NotInHeap: addi.d $t6, $t6, 8 jirl $r0, $ra, 0 -LEAF_END RhpCheckedAssignRefLoongArch64, _TEXT +LEAF_END RhpCheckedAssignRef, _TEXT // JIT_WriteBarrier(Object** dst, Object* src) // diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index b9ee9aac1e43e..e95c9d5fd71d6 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -211,11 +211,11 @@ struct REGDISPLAY PCODE IP; - uint64_t F[16-8]; // Only the bottom 64-bit value of the V registers V8..V15 needs to be preserved - // (V0-V7 and V16-V31 are not preserved according to the ABI spec). - // These need to be unwound during a stack walk - // for EH, but not adjusted, so we only need - // their values, not their addresses + uint64_t F[32-24]; // Only the F registers F24..F31 needs to be preserved + // (F0-F23 are not preserved according to the ABI spec). + // These need to be unwound during a stack walk + // for EH, but not adjusted, so we only need + // their values, not their addresses inline PCODE GetIP() { return IP; } inline uintptr_t GetSP() { return SP; } diff --git a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp index 235b543f24b2e..67701b45dd948 100644 --- a/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/UnwindHelpers.cpp @@ -36,7 +36,6 @@ using libunwind::Registers_arm64; using libunwind::CompactUnwinder_arm64; #elif defined(TARGET_LOONGARCH64) using libunwind::Registers_loongarch; -//using libunwind::CompactUnwinder_loongarch64; //TODO-LOONGARCH64 #elif defined(TARGET_X86) using libunwind::Registers_x86; #else diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc index ff13d9d8f4053..e016e5a1c0bc8 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc @@ -264,9 +264,9 @@ C_FUNC(\Name): .endm // Note: these must match the defs in PInvokeTransitionFrameFlags -PTFF_SAVE_SP = 0x00000400 -PTFF_SAVE_R4 = 0x00000800 -PTFF_SAVE_R5 = 0x00001000 +PTFF_SAVE_SP = 0x00000200 +PTFF_SAVE_R4 = 0x00001000 +PTFF_SAVE_R5 = 0x00002000 PTFF_SAVE_ALL_PRESERVED = 0x000001FF // NOTE: r23-r31 DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs index fd0d0ecde0fb0..8656626e85406 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64ReadyToRunHelperNode.cs @@ -48,37 +48,31 @@ protected override void EmitCode(NodeFactory factory, ref LoongArch64Emitter enc case ReadyToRunHelperId.GetThreadStaticBase: { MetadataType target = (MetadataType)Target; - if (factory.TypeThreadStaticIndex(target) is TypeThreadStaticIndexNode ti && ti.IsInlined) + + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); + + // First arg: address of the TypeManager slot that provides the helper with + // information about module index and the type manager instance (which is used + // for initialization on first access). + encoder.EmitLD(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, 0); + + // Second arg: index of the type in the ThreadStatic section of the modules + encoder.EmitLD(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); + + if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) { - throw new NotImplementedException(); + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); } else { - encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeThreadStaticIndex(target)); - - // First arg: address of the TypeManager slot that provides the helper with - // information about module index and the type manager instance (which is used - // for initialization on first access). - encoder.EmitLD(encoder.TargetRegister.Arg0, encoder.TargetRegister.Arg2, 0); - - // Second arg: index of the type in the ThreadStatic section of the modules - encoder.EmitLD(encoder.TargetRegister.Arg1, encoder.TargetRegister.Arg2, factory.Target.PointerSize); - - if (!factory.PreinitializationManager.HasLazyStaticConstructor(target)) - { - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); - } - else - { - encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); - encoder.EmitADD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); - - encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); - encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg3, 0); - encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); - - encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); - } + encoder.EmitMOV(encoder.TargetRegister.Arg2, factory.TypeNonGCStaticsSymbol(target)); + encoder.EmitADD(encoder.TargetRegister.Arg2, encoder.TargetRegister.Arg2, -NonGCStaticsNode.GetClassConstructorContextSize(factory.Target)); + + encoder.EmitLD(encoder.TargetRegister.Arg3, encoder.TargetRegister.Arg2, 0); + encoder.EmitXOR(encoder.TargetRegister.IntraProcedureCallScratch1, encoder.TargetRegister.Arg3, 0); + encoder.EmitJE(encoder.TargetRegister.IntraProcedureCallScratch1, factory.HelperEntrypoint(HelperEntrypoint.GetThreadStaticBaseForType)); + + encoder.EmitJMP(factory.HelperEntrypoint(HelperEntrypoint.EnsureClassConstructorRunAndReturnThreadStaticBase)); } } break; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs index 78d84ea586130..54f80651288da 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/JitHelper.cs @@ -70,23 +70,13 @@ public static void GetEntryPoint(TypeSystemContext context, ReadyToRunHelper id, }; break; case ReadyToRunHelper.CheckedWriteBarrier: - mangledName = context.Target.Architecture switch - { - TargetArchitecture.ARM64 => "RhpCheckedAssignRefArm64", - TargetArchitecture.LoongArch64 => "RhpCheckedAssignRefLoongArch64", - _ => "RhpCheckedAssignRef" - }; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpCheckedAssignRefArm64" : "RhpCheckedAssignRef"; break; case ReadyToRunHelper.BulkWriteBarrier: mangledName = "RhBuffer_BulkMoveWithWriteBarrier"; break; case ReadyToRunHelper.ByRefWriteBarrier: - mangledName = context.Target.Architecture switch - { - TargetArchitecture.ARM64 => "RhpByRefAssignRefArm64", - TargetArchitecture.LoongArch64 => "RhpByRefAssignRefLoongArch64", - _ => "RhpByRefAssignRef" - }; + mangledName = context.Target.Architecture == TargetArchitecture.ARM64 ? "RhpByRefAssignRefArm64" : "RhpByRefAssignRef"; break; case ReadyToRunHelper.WriteBarrier_EAX: mangledName = "RhpAssignRefEAX"; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs index f28dea4d0aa60..431ef42dc27ad 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewNative.cs @@ -2089,77 +2089,6 @@ public enum CodeViewRegister : ushort CV_AMD64_YMM15D1 = 685, CV_AMD64_YMM15D2 = 686, CV_AMD64_YMM15D3 = 687, - - // Register set for LoongArch64 - CV_LOONGARCH64_NOREG = CV_REG_NONE, - - // General purpose 64-bit integer registers - CV_LOONGARCH64_R0 = 10, - CV_LOONGARCH64_RA = 11, - CV_LOONGARCH64_TP = 12, - CV_LOONGARCH64_SP = 13, - CV_LOONGARCH64_A0 = 14, - CV_LOONGARCH64_A1 = 15, - CV_LOONGARCH64_A2 = 16, - CV_LOONGARCH64_A3 = 17, - CV_LOONGARCH64_A4 = 18, - CV_LOONGARCH64_A5 = 19, - CV_LOONGARCH64_A6 = 20, - CV_LOONGARCH64_A7 = 21, - CV_LOONGARCH64_T0 = 22, - CV_LOONGARCH64_T1 = 23, - CV_LOONGARCH64_T2 = 24, - CV_LOONGARCH64_T3 = 25, - CV_LOONGARCH64_T4 = 26, - CV_LOONGARCH64_T5 = 27, - CV_LOONGARCH64_T6 = 28, - CV_LOONGARCH64_T7 = 29, - CV_LOONGARCH64_T8 = 30, - CV_LOONGARCH64_X0 = 31, - CV_LOONGARCH64_FP = 32, - CV_LOONGARCH64_S0 = 33, - CV_LOONGARCH64_S1 = 34, - CV_LOONGARCH64_S2 = 35, - CV_LOONGARCH64_S3 = 36, - CV_LOONGARCH64_S4 = 37, - CV_LOONGARCH64_S5 = 38, - CV_LOONGARCH64_S6 = 39, - CV_LOONGARCH64_S7 = 40, - CV_LOONGARCH64_S8 = 41, - - // 64-bit floating point registers - CV_LOONGARCH64_F0 = 50, - CV_LOONGARCH64_F1 = 51, - CV_LOONGARCH64_F2 = 52, - CV_LOONGARCH64_F3 = 53, - CV_LOONGARCH64_F4 = 54, - CV_LOONGARCH64_F5 = 55, - CV_LOONGARCH64_F6 = 56, - CV_LOONGARCH64_F7 = 57, - CV_LOONGARCH64_F8 = 58, - CV_LOONGARCH64_F9 = 59, - CV_LOONGARCH64_F10 = 60, - CV_LOONGARCH64_F11 = 61, - CV_LOONGARCH64_F12 = 62, - CV_LOONGARCH64_F13 = 63, - CV_LOONGARCH64_F14 = 64, - CV_LOONGARCH64_F15 = 65, - CV_LOONGARCH64_F16 = 66, - CV_LOONGARCH64_F17 = 67, - CV_LOONGARCH64_F18 = 68, - CV_LOONGARCH64_F19 = 69, - CV_LOONGARCH64_F20 = 70, - CV_LOONGARCH64_F21 = 71, - CV_LOONGARCH64_F22 = 72, - CV_LOONGARCH64_F23 = 73, - CV_LOONGARCH64_F24 = 74, - CV_LOONGARCH64_F25 = 75, - CV_LOONGARCH64_F26 = 76, - CV_LOONGARCH64_F27 = 77, - CV_LOONGARCH64_F28 = 78, - CV_LOONGARCH64_F29 = 79, - CV_LOONGARCH64_F30 = 80, - CV_LOONGARCH64_F31 = 81, } // Matches CV_access_e diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs index 40b7395798f38..e8faabae51939 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/CodeView/CodeViewSymbolsBuilder.cs @@ -105,12 +105,6 @@ private CodeViewRegister GetCVRegNum(uint regNum) // TODO: Floating point return CV_REG_NONE; - case TargetArchitecture.LoongArch64: - if (regNum <= 32) - return (CodeViewRegister)(regNum + (uint)CV_LOONGARCH64_R0); - // TODO: Floating point - return CV_REG_NONE; - default: return CV_REG_NONE; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index 3aa4817732993..e5303e64f1aa7 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -84,12 +84,12 @@ public DwarfCie(TargetArchitecture targetArchitecture) CodeAlignFactor = 1; DataAlignFactor = -4; ReturnAddressRegister = 1; // RA - Instructions = new byte[] - { + Instructions = + [ DW_CFA_def_cfa, 3, // SP 0, // Offset from SP - }; + ]; InitialCFAOffset = 0; break; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 228a6fb5d41f1..8d2dcfe576a5c 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -801,11 +801,10 @@ private void EmitObjectFile(FileStream outputFileStream) SectionHeaderEntrySize = (ushort)ElfSectionHeader.GetSize(), SectionHeaderEntryCount = sectionCount < SHN_LORESERVE ? (ushort)sectionCount : (ushort)0u, StringTableIndex = strTabSectionIndex < SHN_LORESERVE ? (ushort)strTabSectionIndex : (ushort)SHN_XINDEX, - // For ARM32 claim conformance with the EABI specification Flags = _machine switch { - EM_ARM => 0x05000000u, - EM_LOONGARCH => 0x43u, + EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification + EM_LOONGARCH => 0x43u, // For LoongArch claim conformance with the EABI specification _ => 0u }, }; diff --git a/src/native/external/llvm-libunwind/include/__libunwind_config.h b/src/native/external/llvm-libunwind/include/__libunwind_config.h index ecfe7be0d12f6..d521890f17f86 100644 --- a/src/native/external/llvm-libunwind/include/__libunwind_config.h +++ b/src/native/external/llvm-libunwind/include/__libunwind_config.h @@ -173,8 +173,8 @@ #elif defined(__loongarch__) #define _LIBUNWIND_TARGET_LOONGARCH 1 #if __loongarch_grlen == 64 -#define _LIBUNWIND_CONTEXT_SIZE 65 -#define _LIBUNWIND_CURSOR_SIZE 77 +#define _LIBUNWIND_CONTEXT_SIZE 98 +#define _LIBUNWIND_CURSOR_SIZE 110 #elif defined(HOST_WASM) #define _LIBUNWIND_TARGET_WASM 1 // TODO: Determine the right values diff --git a/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp b/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp index cdadeb169a572..5cb825e2fd3e9 100644 --- a/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp +++ b/src/native/external/llvm-libunwind/src/CompactUnwinder.hpp @@ -692,10 +692,6 @@ int CompactUnwinder_arm64::stepWithCompactEncodingFrame( } #endif // _LIBUNWIND_TARGET_AARCH64 -#if defined(_LIBUNWIND_TARGET_LOONGARCH64) - // TODO-LOONGARCH64: support libunwind on LoongArch64.(nativeaot) -#endif // _LIBUNWIND_TARGET_LOONGARCH64 - } // namespace libunwind diff --git a/src/native/external/llvm-libunwind/src/Registers.hpp b/src/native/external/llvm-libunwind/src/Registers.hpp index 927d810ff7946..b76f24ea67da5 100644 --- a/src/native/external/llvm-libunwind/src/Registers.hpp +++ b/src/native/external/llvm-libunwind/src/Registers.hpp @@ -5269,13 +5269,13 @@ class _LIBUNWIND_HIDDEN Registers_loongarch { bool validRegister(int num) const; uint64_t getRegister(int num) const; void setRegister(int num, uint64_t value, uint64_t location); - uint64_t getRegisterLocation(int num) const { return 0; } bool validFloatRegister(int num) const; double getFloatRegister(int num) const; void setFloatRegister(int num, double value); bool validVectorRegister(int num) const; v128 getVectorRegister(int num) const; void setVectorRegister(int num, v128 value); + uint64_t getRegisterLocation(int num) const; static const char *getRegisterName(int num); void jumpto(); static constexpr int lastDwarfRegNum() { @@ -5289,12 +5289,18 @@ class _LIBUNWIND_HIDDEN Registers_loongarch { void setIP(uint64_t value, uint64_t location) { _registers.__pc = value; } private: - struct loongarch_thread_state_t { + struct GPRs { + uint64_t __r[32]; + uint64_t __pc; + }; + + struct GPRLocations { uint64_t __r[32]; uint64_t __pc; }; - loongarch_thread_state_t _registers; + GPRs _registers; + GPRLocations _registerLocations; #if __loongarch_frlen == 64 double _floats[32]; #endif @@ -5304,6 +5310,7 @@ inline Registers_loongarch::Registers_loongarch(const void *registers) { static_assert((check_fit::does_fit), "loongarch registers do not fit into unw_context_t"); memcpy(&_registers, registers, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); static_assert(sizeof(_registers) == 0x108, "expected float registers to be at offset 264"); #if __loongarch_frlen == 64 @@ -5314,6 +5321,7 @@ inline Registers_loongarch::Registers_loongarch(const void *registers) { inline Registers_loongarch::Registers_loongarch() { memset(&_registers, 0, sizeof(_registers)); + memset(&_registerLocations, 0, sizeof(_registerLocations)); #if __loongarch_frlen == 64 memset(&_floats, 0, sizeof(_floats)); #endif @@ -5339,16 +5347,32 @@ inline uint64_t Registers_loongarch::getRegister(int regNum) const { } inline void Registers_loongarch::setRegister(int regNum, uint64_t value, uint64_t location) { - if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) + if (regNum >= UNW_LOONGARCH_R0 && regNum <= UNW_LOONGARCH_R31) { _registers.__r[regNum - UNW_LOONGARCH_R0] = value; - else if (regNum == UNW_REG_IP) + _registerLocations.__r[regNum - UNW_LOONGARCH_R0] = location; + } + else if (regNum == UNW_REG_IP) { _registers.__pc = value; - else if (regNum == UNW_REG_SP) + _registerLocations.__pc = location; + } + else if (regNum == UNW_REG_SP) { _registers.__r[3] = value; + _registerLocations.__r[3] = location; + } else _LIBUNWIND_ABORT("unsupported loongarch register"); } +inline uint64_t Registers_loongarch::getRegisterLocation(int regNum) const { + if (regNum == UNW_REG_IP) + return _registerLocations.__pc; + if (regNum == UNW_REG_SP) + return _registerLocations.__r[3]; + if ((regNum >= 0) && (regNum < 32)) + return _registerLocations.__r[regNum]; + _LIBUNWIND_ABORT("unsupported loongarch64 register"); +} + inline const char *Registers_loongarch::getRegisterName(int regNum) { switch (regNum) { case UNW_REG_IP: From cdc759189a2fa33842eb24256a9c90326b92667d Mon Sep 17 00:00:00 2001 From: Sun Lijun Date: Tue, 25 Jun 2024 22:08:45 +0800 Subject: [PATCH 03/10] Update src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs Co-authored-by: Filip Navara --- .../Compiler/ObjectWriter/ElfObjectWriter.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 8d2dcfe576a5c..9f3d877b602b6 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -804,7 +804,7 @@ private void EmitObjectFile(FileStream outputFileStream) Flags = _machine switch { EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification - EM_LOONGARCH => 0x43u, // For LoongArch claim conformance with the EABI specification + EM_LOONGARCH => 0x43u, // For LoongArch ELF psABI specify the ABI version (1) and modifiers (64-bit GPRs, 64-bit FPRs) _ => 0u }, }; From eb0f3a6300390ed00ae7244d09c0dc5c30093d3a Mon Sep 17 00:00:00 2001 From: sunlijun Date: Thu, 27 Jun 2024 17:13:24 +0800 Subject: [PATCH 04/10] [LoongArch64] Part-4:Add changes in nativeaot-runtime and everything else about nativeaot. --- .../nativeaot/Runtime/loongarch64/AllocFast.S | 276 ------ .../Runtime/loongarch64/AsmOffsetsCpu.h | 67 -- .../Runtime/loongarch64/ExceptionHandling.S | 831 ------------------ .../nativeaot/Runtime/loongarch64/GcProbe.S | 198 ----- .../loongarch64/InteropThunksHelpers.S | 52 -- .../nativeaot/Runtime/loongarch64/MiscStubs.S | 5 - .../nativeaot/Runtime/loongarch64/PInvoke.S | 60 -- .../Runtime/loongarch64/StubDispatch.S | 117 --- .../Runtime/loongarch64/UniversalTransition.S | 191 ---- .../Runtime/loongarch64/WriteBarriers.S | 354 -------- .../nativeaot/Runtime/unix/unixasmmacros.inc | 2 - .../Runtime/unix/unixasmmacrosloongarch64.inc | 328 ------- .../Target_LoongArch64/LoongArch64Emitter.cs | 8 +- .../ObjectWriter/Dwarf/DwarfBuilder.cs | 6 - .../Compiler/ObjectWriter/Dwarf/DwarfCie.cs | 13 - .../Dwarf/DwarfExpressionBuilder.cs | 4 - .../Compiler/ObjectWriter/ElfNative.cs | 118 --- .../Compiler/ObjectWriter/ElfObjectWriter.cs | 48 +- 18 files changed, 6 insertions(+), 2672 deletions(-) delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S delete mode 100644 src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S delete mode 100644 src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S b/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S deleted file mode 100644 index a43b77c6ee959..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AllocFast.S +++ /dev/null @@ -1,276 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -// GC type flags -GC_ALLOC_FINALIZE = 1 - -// -// Rename fields of nested structs -// -OFFSETOF__Thread__m_alloc_context__alloc_ptr = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_ptr -OFFSETOF__Thread__m_alloc_context__alloc_limit = OFFSETOF__Thread__m_rgbAllocContextBuffer + OFFSETOF__gc_alloc_context__alloc_limit - - - -// Allocate non-array, non-finalizable object. If the allocation doesn't fit into the current thread's -// allocation context then automatically fallback to the slow allocation path. -// $a0 == MethodTable - LEAF_ENTRY RhpNewFast, _TEXT - - // a1 = GetThread() -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_1 -#else - INLINE_GETTHREAD $a1 -#endif - - // - // a0 contains MethodTable pointer - // - ld.w $a2, $a0, OFFSETOF__MethodTable__m_uBaseSize - - // - // a0: MethodTable pointer - // a1: Thread pointer - // a2: base size - // - - // Load potential new object address into t3. - ld.d $t3, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t4, $a1, OFFSETOF__Thread__m_alloc_context__alloc_limit - bltu $t4, $a2, RhpNewFast_RarePath - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a1, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - - ori $a0, $t3, 0 - jirl $r0, $ra, 0 - -RhpNewFast_RarePath: - ori $a1, $zero, 0 - b RhpNewObject - LEAF_END RhpNewFast, _TEXT - -// Allocate non-array object with finalizer. -// a0 == MethodTable - LEAF_ENTRY RhpNewFinalizable, _TEXT - ori $a1, $zero, GC_ALLOC_FINALIZE - b RhpNewObject - LEAF_END RhpNewFinalizable, _TEXT - -// Allocate non-array object. -// a0 == MethodTable -// a1 == alloc flags - NESTED_ENTRY RhpNewObject, _TEXT, NoHandler - - PUSH_COOP_PINVOKE_FRAME $a3 - - // a3: transition frame - - // Preserve the MethodTable in s0 - ori $s0, $a0, 0 - - addi.w $a2, $zero, 0 // numElements - - // Call the rest of the allocation helper. - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer on success. - beq $a0, $zero, NewOutOfMemory - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -NewOutOfMemory: - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - ori $a0, $s0, 0 // MethodTable pointer - ori $a1, $zero, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewObject, _TEXT - -// Allocate a string. -// a0 == MethodTable -// a1 == element/character count - LEAF_ENTRY RhNewString, _TEXT - // Make sure computing the overall allocation size wont overflow - lu12i.w $a2, 0x3ffff // (MAX_STRING_LENGTH >> 12) & 0xFFFFF - ori $a2, $a2, 0xfdf // MAX_STRING_LENGTH & 0xFFF - bltu $a2, $a1, StringSizeOverflow - - // Compute overall allocation size (align(base size + (element size * elements), 8)). - addi.w $a2, $zero, STRING_COMPONENT_SIZE - ori $a3, $zero, 29 // STRING_BASE_SIZE + 7 - mulw.d.w $a2, $a1, $a2 - add.d $a2, $a2, $a3 // a2 = (a1[31:0] * a2[31:0])[64:0] + a3 - srli.d $a2, $a2, 3 - slli.d $a2, $a2, 3 - - // a0 == MethodTable - // a1 == element count - // a2 == string size - -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_3 -#else - INLINE_GETTHREAD $a3 -#endif - - // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit - bltu $t3, $a2, RhNewString_Rare - - // Reload new object address into r12. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer and element count. - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - st.d $a1, $t3, OFFSETOF__Array__m_Length - - // Return the object allocated in a0. - ori $a0, $t3, 0 - - jirl $r0, $ra, 0 - -StringSizeOverflow: - // We get here if the length of the final string object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an OOM exception that the caller of this allocator understands. - - // a0 holds MethodTable pointer already - ori $a1, $zero, 1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -RhNewString_Rare: - b C_FUNC(RhpNewArrayRare) - LEAF_END RhNewString, _Text - -// Allocate one dimensional, zero based array (SZARRAY). -// $a0 == MethodTable -// $a1 == element count - LEAF_ENTRY RhpNewArray, _Text - - // We want to limit the element count to the non-negative 32-bit int range. - // If the element count is <= 0x7FFFFFFF, no overflow is possible because the component - // size is <= 0xffff (it is an unsigned 16-bit value), and the base size for the worst - // case (32 dimensional MdArray) is less than 0xffff, and thus the product fits in 64 bits. - lu12i.w $a2, 0x7ffff - ori $a2, $a2, 0xfff - bltu $a2, $a1, ArraySizeOverflow - - ld.h $a2, $a0, OFFSETOF__MethodTable__m_usComponentSize - mulw.d.w $a2, $a1, $a2 - ld.w $a3, $a0, OFFSETOF__MethodTable__m_uBaseSize - add.d $a2, $a2, $a3 - addi.d $a2, $a2, 7 - srli.d $a2, $a2, 3 - slli.d $a2, $a2, 3 - // a0 == MethodTable - // a1 == element count - // a2 == array size - - INLINE_GETTHREAD $a3 - - // Load potential new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Determine whether the end of the object would lie outside of the current allocation context. If so, - // we abandon the attempt to allocate the object directly and fall back to the slow helper. - add.d $a2, $a2, $t3 - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_limit - bltu $t3, $a2, RhpNewArray_Rare - - // Reload new object address into t3. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Update the alloc pointer to account for the allocation. - st.d $a2, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - - // Set the new objects MethodTable pointer and element count. - st.d $a0, $t3, OFFSETOF__Object__m_pEEType - st.d $a1, $t3, OFFSETOF__Array__m_Length - - // Return the object allocated in r0. - ori $a0, $t3, 0 - - jirl $r0, $ra, 0 - -ArraySizeOverflow: - // We get here if the size of the final array object can not be represented as an unsigned - // 32-bit value. We are going to tail-call to a managed helper that will throw - // an overflow exception that the caller of this allocator understands. - - // $a0 holds MethodTable pointer already - ori $a1, $zero, 1 // Indicate that we should throw OverflowException - b C_FUNC(RhExceptionHandling_FailedAllocation) - -RhpNewArray_Rare: - b C_FUNC(RhpNewArrayRare) - LEAF_END RhpNewArray, _TEXT - -// Allocate one dimensional, zero based array (SZARRAY) using the slow path that calls a runtime helper. -// a0 == MethodTable -// a1 == element count -// a2 == array size + Thread::m_alloc_context::alloc_ptr -// a3 == Thread - NESTED_ENTRY RhpNewArrayRare, _TEXT, NoHandler - - // Recover array size by subtracting the alloc_ptr from a2. - ld.d $t3, $a3, OFFSETOF__Thread__m_alloc_context__alloc_ptr - sub.d $a2, $a2, $t3 - - PUSH_COOP_PINVOKE_FRAME $a3 - - // Preserve data we will need later into the callee saved registers - ori $s0, $a0, 0 // Preserve MethodTable - - ori $a2, $a1, 0 // numElements - ori $a1, $zero, 0 // uFlags - - // void* RhpGcAlloc(MethodTable *pEEType, uint32_t uFlags, uintptr_t numElements, void * pTransitionFrame) - bl C_FUNC(RhpGcAlloc) - - // Set the new objects MethodTable pointer and length on success. - beq $a0, $zero, ArrayOutOfMemory - - .cfi_remember_state - POP_COOP_PINVOKE_FRAME - EPILOG_RETURN - - .cfi_restore_state -ArrayOutOfMemory: - // This is the OOM failure path. We are going to tail-call to a managed helper that will throw - // an out of memory exception that the caller of this allocator understands. - - ori $a0, $s0, 0 // MethodTable Pointer - ori $a1, $zero, 0 // Indicate that we should throw OOM. - - POP_COOP_PINVOKE_FRAME - b C_FUNC(RhExceptionHandling_FailedAllocation) - - NESTED_END RhpNewArrayRare, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h deleted file mode 100644 index 0724e0f86fcff..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/AsmOffsetsCpu.h +++ /dev/null @@ -1,67 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -// -// This file is used by AsmOffsets.h to validate that our -// assembly-code offsets always match their C++ counterparts. -// -// NOTE: the offsets MUST be in hex notation WITHOUT the 0x prefix - -PLAT_ASM_SIZEOF(280, ExInfo) -PLAT_ASM_OFFSET(0, ExInfo, m_pPrevExInfo) -PLAT_ASM_OFFSET(8, ExInfo, m_pExContext) -PLAT_ASM_OFFSET(10, ExInfo, m_exception) -PLAT_ASM_OFFSET(18, ExInfo, m_kind) -PLAT_ASM_OFFSET(19, ExInfo, m_passNumber) -PLAT_ASM_OFFSET(1c, ExInfo, m_idxCurClause) -PLAT_ASM_OFFSET(20, ExInfo, m_frameIter) -PLAT_ASM_OFFSET(278, ExInfo, m_notifyDebuggerSP) - -PLAT_ASM_OFFSET(8, PInvokeTransitionFrame, m_FramePointer) -PLAT_ASM_OFFSET(0, PInvokeTransitionFrame, m_RIP) -PLAT_ASM_OFFSET(10, PInvokeTransitionFrame, m_pThread) -PLAT_ASM_OFFSET(18, PInvokeTransitionFrame, m_Flags) -PLAT_ASM_OFFSET(20, PInvokeTransitionFrame, m_PreservedRegs) - -PLAT_ASM_SIZEOF(258, StackFrameIterator) -PLAT_ASM_OFFSET(10, StackFrameIterator, m_FramePointer) -PLAT_ASM_OFFSET(18, StackFrameIterator, m_ControlPC) -PLAT_ASM_OFFSET(20, StackFrameIterator, m_RegDisplay) -PLAT_ASM_OFFSET(248, StackFrameIterator, m_OriginalControlPC) -PLAT_ASM_OFFSET(250, StackFrameIterator, m_pPreviousTransitionFrame) - -PLAT_ASM_SIZEOF(C0, PAL_LIMITED_CONTEXT) - -PLAT_ASM_OFFSET(0, PAL_LIMITED_CONTEXT, FP) -PLAT_ASM_OFFSET(8, PAL_LIMITED_CONTEXT, RA) -PLAT_ASM_OFFSET(10, PAL_LIMITED_CONTEXT, R4) -PLAT_ASM_OFFSET(18, PAL_LIMITED_CONTEXT, R5) -PLAT_ASM_OFFSET(20, PAL_LIMITED_CONTEXT, R23) -PLAT_ASM_OFFSET(28, PAL_LIMITED_CONTEXT, R24) -PLAT_ASM_OFFSET(30, PAL_LIMITED_CONTEXT, R25) -PLAT_ASM_OFFSET(38, PAL_LIMITED_CONTEXT, R26) -PLAT_ASM_OFFSET(40, PAL_LIMITED_CONTEXT, R27) -PLAT_ASM_OFFSET(48, PAL_LIMITED_CONTEXT, R28) -PLAT_ASM_OFFSET(50, PAL_LIMITED_CONTEXT, R29) -PLAT_ASM_OFFSET(58, PAL_LIMITED_CONTEXT, R30) -PLAT_ASM_OFFSET(60, PAL_LIMITED_CONTEXT, R31) -PLAT_ASM_OFFSET(68, PAL_LIMITED_CONTEXT, R2) -PLAT_ASM_OFFSET(70, PAL_LIMITED_CONTEXT, SP) -PLAT_ASM_OFFSET(78, PAL_LIMITED_CONTEXT, IP) - -PLAT_ASM_SIZEOF(148, REGDISPLAY) -PLAT_ASM_OFFSET(18, REGDISPLAY, SP) - -PLAT_ASM_OFFSET(b8, REGDISPLAY, pR23) -PLAT_ASM_OFFSET(c0, REGDISPLAY, pR24) -PLAT_ASM_OFFSET(c8, REGDISPLAY, pR25) -PLAT_ASM_OFFSET(d0, REGDISPLAY, pR26) -PLAT_ASM_OFFSET(d8, REGDISPLAY, pR27) -PLAT_ASM_OFFSET(e0, REGDISPLAY, pR28) -PLAT_ASM_OFFSET(e8, REGDISPLAY, pR29) -PLAT_ASM_OFFSET(f0, REGDISPLAY, pR30) -PLAT_ASM_OFFSET(f8, REGDISPLAY, pR31) -PLAT_ASM_OFFSET(10, REGDISPLAY, pR2) -PLAT_ASM_OFFSET(b0, REGDISPLAY, pFP) -PLAT_ASM_OFFSET(8, REGDISPLAY, pRA) -PLAT_ASM_OFFSET(108, REGDISPLAY, F) diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S b/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S deleted file mode 100644 index 9541face3fba6..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/ExceptionHandling.S +++ /dev/null @@ -1,831 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#define STACKSIZEOF_ExInfo ((SIZEOF__ExInfo + 15)&(~15)) - -#define HARDWARE_EXCEPTION 1 -#define SOFTWARE_EXCEPTION 0 - -.global RhpTrapThreads - -// ----------------------------------------------------------------------------- -// Macro used to create frame of exception throwing helpers (RhpThrowEx, RhpThrowHwEx) - .macro ALLOC_THROW_FRAME exceptionType - - ori $a3, $sp, 0 - - // Setup a PAL_LIMITED_CONTEXT on the stack { - .if \exceptionType == HARDWARE_EXCEPTION - addi.d $sp, $sp, -0x50 - .cfi_adjust_cfa_offset 0x50 - st.d $a3, $sp, 0 // a3 is the SP and a1 is the IP of the fault site - st.d $a1, $sp, 8 - .else - PROLOG_STACK_ALLOC 0x50 - .cfi_adjust_cfa_offset 0x50 - st.d $a3, $sp, 0 // a3 is the SP and ra is the IP of the fault site - st.d $ra, $sp, 8 - .endif - fst.d $f24, $sp, 0x10 - fst.d $f25, $sp, 0x18 - fst.d $f26, $sp, 0x20 - fst.d $f27, $sp, 0x28 - fst.d $f28, $sp, 0x30 - fst.d $f29, $sp, 0x38 - fst.d $f30, $sp, 0x40 - fst.d $f31, $sp, 0x48 - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -0x70 - st.d $zero, $sp, 0x10 // locations reserved for return value, not used for exception handling - st.d $zero, $sp, 0x18 - PROLOG_SAVE_REG_PAIR 23, 24, 0x20 - PROLOG_SAVE_REG_PAIR 25, 26, 0x30 - PROLOG_SAVE_REG_PAIR 27, 28, 0x40 - PROLOG_SAVE_REG_PAIR 29, 30, 0x50 - PROLOG_SAVE_REG_PAIR 31, 2, 0x60 - // } end PAL_LIMITED_CONTEXT - - PROLOG_STACK_ALLOC STACKSIZEOF_ExInfo - .endm - -// ----------------------------------------------------------------------------- -// Macro used to create frame of funclet calling helpers (RhpCallXXXXFunclet) -// extraStackSize - extra stack space that the user of the macro can use to -// store additional registers - .macro ALLOC_CALL_FUNCLET_FRAME extraStackSize - - // Using below prolog instead of PROLOG_SAVE_REG_PAIR fp,ra, #-60! - // is intentional. Above statement would also emit instruction to save - // sp in fp. If sp is saved in fp in prolog then it is not expected that fp can change in the body - // of method. However, this method needs to be able to change fp before calling funclet. - // This is required to access locals in funclet. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, -0x60 - PROLOG_SAVE_REG_PAIR 23, 24, 0x10 - PROLOG_SAVE_REG_PAIR 25, 26, 0x20 - PROLOG_SAVE_REG_PAIR 27, 28, 0x30 - PROLOG_SAVE_REG_PAIR 29, 30, 0x40 - PROLOG_SAVE_REG_PAIR 31, 2, 0x50 - ori $fp, $sp, 0 - .cfi_def_cfa_register 22 //fp - - .if \extraStackSize != 0 - PROLOG_STACK_ALLOC \extraStackSize - .endif - .endm - -// ----------------------------------------------------------------------------- -// Macro used to free frame of funclet calling helpers (RhpCallXXXXFunclet) -// extraStackSize - extra stack space that the user of the macro can use to -// store additional registers. -// It needs to match the value passed to the corresponding -// ALLOC_CALL_FUNCLET_FRAME. - .macro FREE_CALL_FUNCLET_FRAME extraStackSize - - .if \extraStackSize != 0 - EPILOG_STACK_FREE \extraStackSize - .endif - - EPILOG_RESTORE_REG_PAIR 23, 24, 0x10 - EPILOG_RESTORE_REG_PAIR 25, 26, 0x20 - EPILOG_RESTORE_REG_PAIR 27, 28, 0x30 - EPILOG_RESTORE_REG_PAIR 29, 30, 0x40 - EPILOG_RESTORE_REG_PAIR 31, 2, 0x50 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x60 - .endm - - -// ----------------------------------------------------------------------------- -// Macro used to restore preserved general purpose and FP registers from REGDISPLAY -// regdisplayReg - register pointing to the REGDISPLAY structure - .macro RESTORE_PRESERVED_REGISTERS regdisplayReg - - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - ld.d $s0, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - ld.d $s1, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - ld.d $s2, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - ld.d $s3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - ld.d $s4, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - ld.d $s5, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - ld.d $s6, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - ld.d $s7, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - ld.d $s8, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - ld.d $fp, $t3, 0 - // - // load FP preserved regs - // - addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F - fld.d $f24, $t3, 0x00 - fld.d $f25, $t3, 0x08 - fld.d $f26, $t3, 0x10 - fld.d $f27, $t3, 0x18 - fld.d $f28, $t3, 0x20 - fld.d $f29, $t3, 0x28 - fld.d $f30, $t3, 0x30 - fld.d $f31, $t3, 0x38 - .endm - -// ----------------------------------------------------------------------------- -// Macro used to save preserved general purpose and FP registers to REGDISPLAY -// regdisplayReg - register pointing to the REGDISPLAY structure - .macro SAVE_PRESERVED_REGISTERS regdisplayReg - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - st.d $s0, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - st.d $s1, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - st.d $s2, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - st.d $s3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - st.d $s4, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - st.d $s5, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - st.d $s6, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - st.d $s7, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - st.d $s8, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - st.d $fp, $t3, 0 - // - // store vfp preserved regs - // - addi.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__F - fst.d $f24, $t3, 0x00 - fst.d $f25, $t3, 0x08 - fst.d $f26, $t3, 0x10 - fst.d $f27, $t3, 0x18 - fst.d $f28, $t3, 0x20 - fst.d $f29, $t3, 0x28 - fst.d $f30, $t3, 0x30 - fst.d $f31, $t3, 0x38 - .endm - - -// ----------------------------------------------------------------------------- -// Macro used to thrash preserved general purpose registers in REGDISPLAY -// to make sure nobody uses them -// regdisplayReg - register pointing to the REGDISPLAY structure - .macro TRASH_PRESERVED_REGISTERS_STORAGE regdisplayReg - -#if _DEBUG - lu12i.w $a3, 0xbaadd - ori $a3, $a3, 0xeed - lu32i.d $a3, 0xddeed - lu52i.d $a3, $a3, 0xbaa - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR23 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR24 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR25 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR26 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR27 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR28 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR29 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR30 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pR31 - st.d $a3, $t3, 0 - ld.d $t3, \regdisplayReg, OFFSETOF__REGDISPLAY__pFP - st.d $a3, $t3, 0 -#endif // _DEBUG - .endm - -.macro GetThreadX2 - st.d $a0, $sp, -16 - st.d $a1, $sp, -8 - addi.d $sp, $sp, -16 - bl C_FUNC(RhpGetThread) - ori $a2, $a0, 0 - ld.d $a0, $sp, 0 - ld.d $a1, $sp, 8 - addi.d $sp, $sp, 16 -.endm - -#define rsp_offsetof_ExInfo 0 -#define rsp_offsetof_Context STACKSIZEOF_ExInfo - -// -// RhpThrowHwEx -// -// INPUT: a0[31:0]: exception code of fault -// a1: faulting IP -// -// OUTPUT: -// - NESTED_ENTRY RhpThrowHwEx, _TEXT, NoHandler - - ALLOC_THROW_FRAME HARDWARE_EXCEPTION - - GetThreadX2 - - addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null - ori $a3, $zero, 1 - st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 - lu12i.w $a3, -1 - ori $a3, $a3, 0xfff - st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx - ori $a3, $zero, 2 - st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.HardwareFault - - // link the ExInfo into the thread's ExInfo chain - ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead - st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo - - // set the exception context field on the ExInfo - addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext - - // a0[31:0]: exception code - // a1: ExInfo* - bl C_FUNC(RhThrowHwEx) - - ALTERNATE_ENTRY RhpThrowHwEx2 - - // no return - EMIT_BREAKPOINT - - NESTED_END RhpThrowHwEx, _TEXT - -// -// RhpThrowEx -// -// INPUT: a0: exception object -// -// OUTPUT: -// - - NESTED_ENTRY RhpThrowEx, _TEXT, NoHandler - - ALLOC_THROW_FRAME SOFTWARE_EXCEPTION - - GetThreadX2 - - // There is runtime C# code that can tail call to RhpThrowEx using a binder intrinsic. So the return - // address could have been hijacked when we were in that C# code and we must remove the hijack and - // reflect the correct return address in our exception context record. The other throw helpers don't - // need this because they cannot be tail-called from C#. - - // NOTE: we cannot use INLINE_THREAD_UNHIJACK because it will write into the stack at the location - // where the tail-calling thread had saved RA, which may not match where we have saved RA. - - ld.d $a1, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress - beq $a1, $zero, NotHijacked - - ld.d $a3, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - - // a0: exception object - // a1: hijacked return address - // a2: pThread - // a3: hijacked return address location - - addi.d $t3, $sp, (STACKSIZEOF_ExInfo + SIZEOF__PAL_LIMITED_CONTEXT) // re-compute SP at callsite - bltu $a3, $t3, TailCallWasHijacked // if (m_ppvHijackedReturnAddressLocation < SP at callsite) - - // normal case where a valid return address location is hijacked - st.d $a1, $a3, 0 - b ClearThreadState - -TailCallWasHijacked: - - // Abnormal case where the return address location is now invalid because we ended up here via a tail - // call. In this case, our hijacked return address should be the correct caller of this method. - - // stick the previous return address in RA as well as in the right spots in our PAL_LIMITED_CONTEXT. - ori $ra, $a1, 0 - st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__RA) - st.d $ra, $sp, (rsp_offsetof_Context + OFFSETOF__PAL_LIMITED_CONTEXT__IP) - -ClearThreadState: - - // clear the Thread's hijack state - st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - st.d $zero, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress - -NotHijacked: - - addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null - ori $a3, $zero, 1 - st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 - lu12i.w $a3, -1 - ori $a3, $a3, 0xfff - st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx - ori $a3, $zero, 1 - st.b $a3, $a1, OFFSETOF__ExInfo__m_kind // pExInfo->m_kind = ExKind.Throw - - // link the ExInfo into the thread's ExInfo chain - ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead - st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo - - // set the exception context field on the ExInfo - addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext - - // a0: exception object - // a1: ExInfo* - bl C_FUNC(RhThrowEx) - - ALTERNATE_ENTRY RhpThrowEx2 - - // no return - EMIT_BREAKPOINT - NESTED_END RhpThrowEx, _TEXT - - -// -// void FASTCALL RhpRethrow() -// -// SUMMARY: Similar to RhpThrowEx, except that it passes along the currently active ExInfo -// -// INPUT: -// -// OUTPUT: -// - - NESTED_ENTRY RhpRethrow, _TEXT, NoHandler - - ALLOC_THROW_FRAME SOFTWARE_EXCEPTION - - GetThreadX2 - - addi.d $a1, $sp, rsp_offsetof_ExInfo // a1 <- ExInfo* - st.d $zero, $a1, OFFSETOF__ExInfo__m_exception // pExInfo->m_exception = null - st.b $zero, $a1, OFFSETOF__ExInfo__m_kind // init to a deterministic value (ExKind.None) - ori $a3, $zero, 1 - st.b $a3, $a1, OFFSETOF__ExInfo__m_passNumber // pExInfo->m_passNumber = 1 - lu12i.w $a3, -1 - ori $a3, $a3, 0xfff - st.w $a3, $a1, OFFSETOF__ExInfo__m_idxCurClause // pExInfo->m_idxCurClause = MaxTryRegionIdx - - // link the ExInfo into the thread's ExInfo chain - ld.d $a3, $a2, OFFSETOF__Thread__m_pExInfoStackHead - ori $a0, $a3, 0 // a0 <- current ExInfo - st.d $a3, $a1, OFFSETOF__ExInfo__m_pPrevExInfo // pExInfo->m_pPrevExInfo = m_pExInfoStackHead - st.d $a1, $a2, OFFSETOF__Thread__m_pExInfoStackHead // m_pExInfoStackHead = pExInfo - - // set the exception context field on the ExInfo - addi.d $a2, $sp, rsp_offsetof_Context // a2 <- PAL_LIMITED_CONTEXT* - st.d $a2, $a1, OFFSETOF__ExInfo__m_pExContext // pExInfo->m_pExContext = pContext - - // a0 contains the currently active ExInfo - // a1 contains the address of the new ExInfo - bl C_FUNC(RhRethrow) - - ALTERNATE_ENTRY RhpRethrow2 - - // no return - EMIT_BREAKPOINT - NESTED_END RhpRethrow, _TEXT - -// -// void* FASTCALL RhpCallCatchFunclet(OBJECTREF exceptionObj, void* pHandlerIP, REGDISPLAY* pRegDisplay, -// ExInfo* pExInfo) -// -// INPUT: a0: exception object -// a1: handler funclet address -// a2: REGDISPLAY* -// a3: ExInfo* -// -// OUTPUT: -// - - NESTED_ENTRY RhpCallCatchFunclet, _TEXT, NoHandler - - ALLOC_CALL_FUNCLET_FRAME 0x70 // Size needs to be equal with ExceptionHandling.asm variant of this function - fst.d $f24, $sp, 0x00 - fst.d $f25, $sp, 0x08 - fst.d $f26, $sp, 0x10 - fst.d $f27, $sp, 0x18 - fst.d $f28, $sp, 0x20 - fst.d $f29, $sp, 0x28 - fst.d $f30, $sp, 0x30 - fst.d $f31, $sp, 0x38 - st.d $a0, $sp, 0x40 // a0 to a3 are stored to restore them anytime - st.d $a1, $sp, 0x48 - st.d $a2, $sp, 0x50 - st.d $a3, $sp, 0x58 - st.d $zero, $sp, 0x60 // $zero makes space for the local "is_not_handling_thread_abort"; last qword will store the thread obj - -#define rsp_offset_is_not_handling_thread_abort 0x60 -#define rsp_offset_a0 0x40 -#define rsp_offset_a1 0x48 -#define rsp_offset_a2 0x50 -#define rsp_offset_a3 0x58 -#define rsp_CatchFunclet_offset_thread 0x68 - - // - // clear the DoNotTriggerGc flag, trashes a4-a6 - // - - bl C_FUNC(RhpGetThread) - st.d $a0, $sp, rsp_CatchFunclet_offset_thread - ori $a5, $a0, 0 - ld.d $a0, $sp, 0x40 - ld.d $a1, $sp, 0x48 - ld.d $a2, $sp, 0x50 - ld.d $a3, $sp, 0x58 - - ld.d $a4, $a5, OFFSETOF__Thread__m_threadAbortException - sub.d $a4, $a4, $a0 - st.d $a4, $sp, rsp_offset_is_not_handling_thread_abort // Non-zero if the exception is not ThreadAbortException - - addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags - -ClearRetry_Catch: //TODO-LOONGARCH64: change ld/st to atomic instructions. - ld.w $a4, $t3, 0 - bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc - st.w $a4, $t3, 0 - ori $a6, $zero, 0 - beq $a6, $zero, ClearSuccess_Catch - b ClearRetry_Catch -ClearSuccess_Catch: - - // - // set preserved regs to the values expected by the funclet - // - RESTORE_PRESERVED_REGISTERS $a2 - // - // trash the values at the old homes to make sure nobody uses them - // - TRASH_PRESERVED_REGISTERS_STORAGE $a2 - - // - // call the funclet - // - // a0 still contains the exception object - jirl $ra, $a1, 0 - - ALTERNATE_ENTRY RhpCallCatchFunclet2 - - // $a0 contains resume IP - - ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* - -#ifdef _DEBUG - // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we - // have to spill all the preserved registers and then refill them after the call. - - st.d $a0, $sp, rsp_offset_a0 - - SAVE_PRESERVED_REGISTERS $a2 - - ld.d $a0, $sp, rsp_CatchFunclet_offset_thread // a0 <- Thread* - ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* - ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value - bl C_FUNC(RhpValidateExInfoPop) - - ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* - - RESTORE_PRESERVED_REGISTERS $a2 - - ld.d $a0, $sp, rsp_offset_a0 // reload resume IP -#endif - - ld.d $a1, $sp, rsp_CatchFunclet_offset_thread - - // We must unhijack the thread at this point because the section of stack where the hijack is applied - // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. - INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 - - ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* - ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value - -PopExInfoLoop: - ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo - beq $a3, $zero, DonePopping // if (pExInfo == null) { we're done } - blt $a3, $a2, PopExInfoLoop // if (pExInfo < resume SP} { keep going } - -DonePopping: - st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 - - andi $t7, $a3, TrapThreadsFlags_AbortInProgress_Bit - beq $t7, $zero, NoAbort - - ld.d $a3, $sp, rsp_offset_is_not_handling_thread_abort - bne $a3, $zero, NoAbort - - // It was the ThreadAbortException, so rethrow it - // reset SP - ori $a1, $a0, 0 // a1 <- continuation address as exception PC - addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT - ori $sp, $a2, 0 - b C_FUNC(RhpThrowHwEx) - -NoAbort: - // reset SP and jump to continuation address - ori $sp, $a2, 0 - jirl $r0, $a0, 0 - -#undef rsp_offset_is_not_handling_thread_abort -#undef rsp_offset_a0 -#undef rsp_offset_a1 -#undef rsp_offset_a2 -#undef rsp_offset_a3 -#undef rsp_CatchFunclet_offset_thread - - NESTED_END RhpCallCatchFunclet, _Text - -// -// void FASTCALL RhpCallFinallyFunclet(void* pHandlerIP, REGDISPLAY* pRegDisplay) -// -// INPUT: a0: handler funclet address -// a1: REGDISPLAY* -// -// OUTPUT: -// - - NESTED_ENTRY RhpCallFinallyFunclet, _TEXT, NoHandler - - ALLOC_CALL_FUNCLET_FRAME 0x60 // Size needs to be equal with ExceptionHandling.asm variant of this function - fst.d $f24, $sp, 0x00 - fst.d $f25, $sp, 0x08 - fst.d $f26, $sp, 0x10 - fst.d $f27, $sp, 0x18 - fst.d $f28, $sp, 0x20 - fst.d $f29, $sp, 0x28 - fst.d $f30, $sp, 0x30 - fst.d $f31, $sp, 0x38 - st.d $a0, $sp, 0x40 // a0 and a1 are saved so we have them later - st.d $a1, $sp, 0x48 - -#define rsp_offset_a1 0x48 -#define rsp_FinallyFunclet_offset_thread 0x50 - - - // We want to suppress hijacking between invocations of subsequent finallys. We do this because we - // cannot tolerate a GC after one finally has run (and possibly side-effected the GC state of the - // method) and then been popped off the stack, leaving behind no trace of its effect. - // - // So we clear the state before and set it after invocation of the handler. - // - - // - // clear the DoNotTriggerGc flag, trashes a2-a4 - // - - bl C_FUNC(RhpGetThread) - st.d $a0, $sp, rsp_FinallyFunclet_offset_thread - ori $a2, $a0, 0 - ld.d $a0, $sp, 0x40 - ld.d $a1, $sp, 0x48 - - addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags - -ClearRetry: //TODO-LOONGARCH64: change ld/st to atomic instructions. - ld.w $a4, $t3, 0 - bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc - st.w $a4, $t3, 0 - ori $a3, $zero, 0 - beq $a3, $zero, ClearSuccess - b ClearRetry -ClearSuccess: - - // - // set preserved regs to the values expected by the funclet - // - RESTORE_PRESERVED_REGISTERS $a1 - // - // trash the values at the old homes to make sure nobody uses them - // - TRASH_PRESERVED_REGISTERS_STORAGE $a1 - - // - // call the funclet - // - jirl $ra, $a0, 0 - - ALTERNATE_ENTRY RhpCallFinallyFunclet2 - - ld.d $a1, $sp, rsp_offset_a1 // reload REGDISPLAY pointer - - // - // save new values of preserved regs into REGDISPLAY - // - SAVE_PRESERVED_REGISTERS $a1 - - // - // set the DoNotTriggerGc flag, trashes a1-a3 - // - - ld.d $a2, $sp, rsp_FinallyFunclet_offset_thread - - addi.d $t3, $a2, OFFSETOF__Thread__m_ThreadStateFlags -SetRetry: //TODO-LOONGARCH64: change ld/st to atomic instructions. - ld.w $a1, $t3, 0 - ori $a1, $a1, TSF_DoNotTriggerGc - st.w $a1, $t3, 0 - ori $a3, $zero, 0 - beq $a3, $zero, SetSuccess - b SetRetry -SetSuccess: - - fld.d $f24, $sp, 0x00 - fld.d $f25, $sp, 0x08 - fld.d $f26, $sp, 0x10 - fld.d $f27, $sp, 0x18 - fld.d $f28, $sp, 0x20 - fld.d $f29, $sp, 0x28 - fld.d $f30, $sp, 0x30 - fld.d $f31, $sp, 0x38 - - FREE_CALL_FUNCLET_FRAME 0x60 - EPILOG_RETURN - -#undef rsp_offset_a1 -#undef rsp_FinallyFunclet_offset_thread - - NESTED_END RhpCallFinallyFunclet, _Text - - -// -// void* FASTCALL RhpCallFilterFunclet(OBJECTREF exceptionObj, void* pFilterIP, REGDISPLAY* pRegDisplay) -// -// INPUT: a0: exception object -// a1: filter funclet address -// a2: REGDISPLAY* -// -// OUTPUT: -// - - NESTED_ENTRY RhpCallFilterFunclet, _TEXT, NoHandler - ALLOC_CALL_FUNCLET_FRAME 0x40 - fst.d $f24, $sp, 0x00 - fst.d $f25, $sp, 0x08 - fst.d $f26, $sp, 0x10 - fst.d $f27, $sp, 0x18 - fst.d $f28, $sp, 0x20 - fst.d $f29, $sp, 0x28 - fst.d $f30, $sp, 0x30 - fst.d $f31, $sp, 0x38 - - ld.d $t3, $a2, OFFSETOF__REGDISPLAY__pFP - ld.d $fp, $t3, 0 - - // - // call the funclet - // - // $a0 still contains the exception object - jirl $ra, $a1, 0 - - ALTERNATE_ENTRY RhpCallFilterFunclet2 - - fld.d $f24, $sp, 0x00 - fld.d $f25, $sp, 0x08 - fld.d $f26, $sp, 0x10 - fld.d $f27, $sp, 0x18 - fld.d $f28, $sp, 0x20 - fld.d $f29, $sp, 0x28 - fld.d $f30, $sp, 0x30 - fld.d $f31, $sp, 0x38 - - FREE_CALL_FUNCLET_FRAME 0x40 - EPILOG_RETURN - - NESTED_END RhpCallFilterFunclet, Text - -#ifdef FEATURE_OBJCMARSHAL - -// -// void* FASTCALL RhpCallPropagateExceptionCallback(void* pCallbackContext, void* pCallback, REGDISPLAY* pRegDisplay, -// ExInfo* pExInfo, PInvokeTransitionFrame* pPreviousTransitionFrame) -// -// INPUT: a0: callback context -// a1: callback -// a2: REGDISPLAY* -// a3: ExInfo* -// a4: pPreviousTransitionFrame -// -// OUTPUT: -// - - NESTED_ENTRY RhpCallPropagateExceptionCallback, _TEXT, NoHandler - -#define rsp_offset_a0 0x10 -#define rsp_offset_a1 0x18 -#define rsp_offset_a2 0x20 -#define rsp_offset_a3 0x28 -#define rsp_offset_a4 0x30 -#define rsp_CallPropagationCallback_offset_thread 0x38 - - // Using the NO_FP macro so that the debugger unwinds using SP. - // This makes backtraces work even after using RESTORE_PRESERVED_REGISTERS. - PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED 22, 1, -0x40 - ori $fp, $sp, 0 - st.d $a0, $sp, rsp_offset_a0 // a0 to a3 are stored to restore them anytime - st.d $a1, $sp, rsp_offset_a1 // a0 to a3 are stored to restore them anytime - st.d $a2, $sp, rsp_offset_a2 - st.d $a3, $sp, rsp_offset_a3 - st.d $a4, $sp, rsp_offset_a4 - st.d $zero, $sp, rsp_CallPropagationCallback_offset_thread // $zero makes space to store the thread obj - - // - // clear the DoNotTriggerGc flag, trashes a4-a6 - // - - bl C_FUNC(RhpGetThread) - st.d $a0, $sp, rsp_CallPropagationCallback_offset_thread - ori $a5, $a0, 0 - ld.d $a0, $sp, rsp_offset_a0 - ld.d $a1, $sp, rsp_offset_a1 - ld.d $a2, $sp, rsp_offset_a2 - ld.d $a3, $sp, rsp_offset_a3 - - addi.d $t3, $a5, OFFSETOF__Thread__m_ThreadStateFlags - -ClearRetry_Propagate: //TODO-LOONGARCH64: change ld/st to atomic instructions. - ld.w $a4, $t3, 0 - bstrins.w $a4, $zero, 4, 4 // $a4 = $a4 & ~TSF_DoNotTriggerGc - st.w $a4, $t3, 0 - ori $a6, $zero, 0 - beq $a6, $zero, ClearSuccess_Propagate - b ClearRetry_Propagate -ClearSuccess_Propagate: - - // - // set preserved regs to the values expected by the funclet - // - RESTORE_PRESERVED_REGISTERS $a2 - // - // trash the values at the old homes to make sure nobody uses them - // - TRASH_PRESERVED_REGISTERS_STORAGE $a2 - -#ifdef _DEBUG - // Call into some C++ code to validate the pop of the ExInfo. We only do this in debug because we - // have to spill all the preserved registers and then refill them after the call. - - SAVE_PRESERVED_REGISTERS $a2 - - ld.d $a0, $sp, rsp_CallPropagationCallback_offset_thread // a0 <- Thread* - ld.d $a1, $sp, rsp_offset_a3 // a1 <- current ExInfo* - ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value - bl C_FUNC(RhpValidateExInfoPop) - - ld.d $a2, $sp, rsp_offset_a2 // a2 <- REGDISPLAY* - - RESTORE_PRESERVED_REGISTERS $a2 -#endif - - ld.d $a1, $sp, rsp_CallPropagationCallback_offset_thread - - // We must unhijack the thread at this point because the section of stack where the hijack is applied - // may go dead. If it does, then the next time we try to unhijack the thread, it will corrupt the stack. - INLINE_THREAD_UNHIJACK $a1, $a3, $t3 // Thread in a1, trashes a3 and t3 - - ld.d $a3, $sp, rsp_offset_a3 // a3 <- current ExInfo* - ld.d $a2, $a2, OFFSETOF__REGDISPLAY__SP // a2 <- resume SP value - -Propagate_PopExInfoLoop: - ld.d $a3, $a3, OFFSETOF__ExInfo__m_pPrevExInfo // a3 <- next ExInfo - beq $a3, $zero, Propagate_DonePopping // if (pExInfo == null) { we're done } - blt $a3, $a2, Propagate_PopExInfoLoop // if (pExInfo < resume SP} { keep going } - -Propagate_DonePopping: - st.d $a3, $a1, OFFSETOF__Thread__m_pExInfoStackHead // store the new head on the Thread - - // restore preemptive mode - ld.d $a4, $sp, rsp_offset_a4 // pPreviousTransitionFrame - st.d $a4, $a1, OFFSETOF__Thread__m_pTransitionFrame - - // reset SP and RA and jump to continuation address - ld.d $a0, $sp, rsp_offset_a0 // callback context - ld.d $a1, $sp, rsp_offset_a1 // callback - ld.d $a2, $sp, rsp_offset_a2 // REGDISPLAY* - ld.d $a3, $a2, OFFSETOF__REGDISPLAY__pRA // a3 <- &resume RA value - ld.d $ra, $a3 - ld.d $a3, $a2, OFFSETOF__REGDISPLAY__SP // a3 <- resume SP value - ori $sp, $a3, 0 - jirl $r0, $a1, 0 - -#undef rsp_offset_a0 -#undef rsp_offset_a1 -#undef rsp_offset_a2 -#undef rsp_offset_a3 -#undef rsp_CallPropagationCallback_offset_thread - - NESTED_END RhpCallPropagateExceptionCallback, _Text - -#endif // FEATURE_OBJCMARSHAL diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S b/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S deleted file mode 100644 index 34329145ade01..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/GcProbe.S +++ /dev/null @@ -1,198 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -PROBE_FRAME_SIZE = 0xD0 // 4 * 8 for fixed part of PInvokeTransitionFrame (fp, ra, m_pThread, m_Flags) + - // 10 * 8 for callee saved registers + - // 1 * 8 for caller SP + - // 2 * 8 for int returns + - // 1 * 8 for alignment padding + - // 4 * 16 for FP returns - -// See PUSH_COOP_PINVOKE_FRAME, this macro is very similar, but also saves return registers -// and accepts the register bitmask -// Call this macro first in the method (no further prolog instructions can be added after this). -// -// threadReg : register containing the Thread* (this will be preserved). -// trashReg : register that can be trashed by this macro -// BITMASK : value to initialize m_dwFlags field with (register or #constant) -.macro PUSH_PROBE_FRAME threadReg, trashReg, BITMASK - - // Define the method prolog, allocating enough stack space for the PInvokeTransitionFrame and saving - // incoming register values into it. - - // First create PInvokeTransitionFrame - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -PROBE_FRAME_SIZE // Push down stack pointer and store FP and RA - - // Slot at $sp+0x10 is reserved for Thread * - // Slot at $sp+0x18 is reserved for bitmask of saved registers - - // Save callee saved registers - PROLOG_SAVE_REG_PAIR 23, 24, 0x20 - PROLOG_SAVE_REG_PAIR 25, 26, 0x30 - PROLOG_SAVE_REG_PAIR 27, 28, 0x40 - PROLOG_SAVE_REG_PAIR 29, 30, 0x50 - PROLOG_SAVE_REG_PAIR 31, 2, 0x60 - - // Slot at $sp+0x70 is reserved for caller sp - - // Save the integer return registers - st.d $a0, $sp, 0x78 - st.d $a1, $sp, 0x80 - - // Slot at $sp+0x88 is alignment padding - - // Save the FP return registers - fst.d $f0, $sp, 0x90 - fst.d $f1, $sp, 0x98 - fst.d $f2, $sp, 0xA0 - fst.d $f3, $sp, 0xA8 - - // Perform the rest of the PInvokeTransitionFrame initialization. - st.d \threadReg, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread // Thread * (unused by stackwalker) - st.d \BITMASK, $sp, OFFSETOF__PInvokeTransitionFrame__m_pThread + 8 // save the register bitmask passed in by caller - - addi.d \trashReg, $sp, PROBE_FRAME_SIZE // recover value of caller's SP - st.d \trashReg, $sp, 0x70 // save caller's SP - - // link the frame into the Thread - ori \trashReg, $sp, 0 - st.d \trashReg, \threadReg, OFFSETOF__Thread__m_pDeferredTransitionFrame -.endm - -// -// Remove the frame from a previous call to PUSH_PROBE_FRAME from the top of the stack and restore preserved -// registers and return value to their values from before the probe was called (while also updating any -// object refs or byrefs). -// -.macro POP_PROBE_FRAME - - // Restore the integer return registers - ld.d $a0, $sp, 0x78 - ld.d $a1, $sp, 0x80 - - // Restore the FP return registers - fld.d $f0, $sp, 0x90 - fld.d $f1, $sp, 0x98 - fld.d $f2, $sp, 0xA0 - fld.d $f3, $sp, 0xA8 - - // Restore callee saved registers - EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 - EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 - EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 - EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 - EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 - - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, PROBE_FRAME_SIZE -.endm - -// -// The prolog for all GC suspension hijacks (normal and stress). Fixes up the hijacked return address, and -// clears the hijack state. -// -// Register state on entry: -// All registers correct for return to the original return address. -// -// Register state on exit: -// a2: thread pointer -// t3: transition frame flags for the return registers a0 and a1 -// -.macro FixupHijackedCallstack - - // a2 <- GetThread() -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_2 -#else - INLINE_GETTHREAD $a2 -#endif - - // - // Fix the stack by restoring the original return address - // - // Load m_pvHijackedReturnAddress and m_uHijackedReturnValueFlags - ld.d $ra, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress - ld.d $t3, $a2, OFFSETOF__Thread__m_pvHijackedReturnAddress + 8 - - // - // Clear hijack state - // - // Clear m_ppvHijackedReturnAddressLocation and m_pvHijackedReturnAddress - st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - st.d $zero, $a2, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation + 8 - // Clear m_uHijackedReturnValueFlags - st.d $zero, $a2, OFFSETOF__Thread__m_uHijackedReturnValueFlags - -.endm - -// -// GC Probe Hijack target -// -NESTED_ENTRY RhpGcProbeHijack, _TEXT, NoHandler - FixupHijackedCallstack - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a3 - andi $t8, $a3, TrapThreadsFlags_TrapThreads_Bit - bne $t8, $zero, WaitForGC - jirl $r0, $ra, 0 - -WaitForGC: - lu12i.w $t7, ((DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) >> 12) & 0xfffff - ori $t7, $t7, (DEFAULT_FRAME_SAVE_FLAGS + PTFF_SAVE_R4 + PTFF_SAVE_R5) & 0xfff - or $t3, $t3, $t7 - b C_FUNC(RhpWaitForGC) -NESTED_END RhpGcProbeHijack - -.global C_FUNC(RhpThrowHwEx) - -NESTED_ENTRY RhpWaitForGC, _TEXT, NoHandler - PUSH_PROBE_FRAME $a2, $a3, $t3 - - ld.d $a0, $a2, OFFSETOF__Thread__m_pDeferredTransitionFrame - bl C_FUNC(RhpWaitForGC2) - - ld.d $a2,$sp, OFFSETOF__PInvokeTransitionFrame__m_Flags - andi $t8, $a2, PTFF_THREAD_ABORT_BIT - bne $t8, $zero, ThrowThreadAbort - - .cfi_remember_state - POP_PROBE_FRAME - EPILOG_RETURN - - .cfi_restore_state -ThrowThreadAbort: - POP_PROBE_FRAME - addi.w $a0, $zero, STATUS_REDHAWK_THREAD_ABORT - ori $a1, $ra, 0 // return address as exception PC - b RhpThrowHwEx -NESTED_END RhpWaitForGC - -.global C_FUNC(RhpGcPoll2) - -LEAF_ENTRY RhpGcPoll - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a0 - bne $a0, $zero, C_FUNC(RhpGcPollRare) - jirl $r0, $ra, 0 -LEAF_END RhpGcPoll - -NESTED_ENTRY RhpGcPollRare, _TEXT, NoHandler - PUSH_COOP_PINVOKE_FRAME $a0 - bl RhpGcPoll2 - POP_COOP_PINVOKE_FRAME - jirl $r0, $ra, 0 -NESTED_END RhpGcPollRare - - -#ifdef FEATURE_GC_STRESS - -// -// GC Stress Hijack targets -// -LEAF_ENTRY RhpGcStressHijack, _TEXT - // NYI - EMIT_BREAKPOINT -LEAF_END RhpGcStressHijack, _TEXT - -#endif // FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S b/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S deleted file mode 100644 index c096d77796397..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/InteropThunksHelpers.S +++ /dev/null @@ -1,52 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; DATA SECTIONS ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -POINTER_SIZE = 0x08 - -//;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; Interop Thunks Helpers ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - - // - // RhCommonStub - // - // INPUT: tp: thunk's data block - // - // TRASHES: t0, t1, tp - // - LEAF_ENTRY RhCommonStub, _TEXT - // There are arbitrary callers passing arguments with arbitrary signatures. - // Custom calling convention: - // tp pointer to the current thunk's data block (data contains 2 pointer values: context + target pointers) - -#ifdef FEATURE_EMULATED_TLS - // This doesn't save and restore the floating point argument registers. If we encounter a - // target system that uses TLS emulation and modify these registers during this call we - // need to save and restore them, too - GETTHUNKDATA_ETLS_9 -#else - INLINE_GET_TLS_VAR $t0, C_FUNC(tls_thunkData) -#endif - - // t0 = base address of TLS data - // tp = address of context cell in thunk's data - - // store thunk address in thread static - ld.d $t1, $t7, 0 - st.d $t1, $t0, 0 - - // Now load the target address and jump to it. - ld.d $t7, $t7, POINTER_SIZE - jirl $r0, $t7, 0 - - LEAF_END RhCommonStub, _TEXT - - // - // IntPtr RhGetCommonStubAddress() - // - LEAF_ENTRY RhGetCommonStubAddress, _TEXT - PREPARE_EXTERNAL_VAR RhCommonStub, $a0 - jirl $r0, $ra, 0 - LEAF_END RhGetCommonStubAddress, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S b/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S deleted file mode 100644 index ea5d91a1a1c1f..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/MiscStubs.S +++ /dev/null @@ -1,5 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S b/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S deleted file mode 100644 index 0bce06069a2f3..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/PInvoke.S +++ /dev/null @@ -1,60 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - - -#include -#include "AsmOffsets.inc" - -.global RhpTrapThreads - -// Note: these must match the defs in PInvokeTransitionFrameFlags defined in rhbinder.h -PTFF_SAVE_SP = 0x00000200 - -// Bit position for the flags above, to be used with andi+beq/bne instructions -PTFF_THREAD_ABORT_BIT = 36 - -// -// RhpPInvoke -// -// IN: a0: address of pinvoke frame -// -// This helper assumes that its callsite is as good to start the stackwalk as the actual PInvoke callsite. -// The codegenerator must treat the callsite of this helper as GC triggering and generate the GC info for it. -// Also, the codegenerator must ensure that there are no live GC references in callee saved registers. -// - -NESTED_ENTRY RhpPInvoke, _TEXT, NoHandler - st.d $fp, $a0, OFFSETOF__PInvokeTransitionFrame__m_FramePointer - st.d $ra, $a0, OFFSETOF__PInvokeTransitionFrame__m_RIP - ori $t0, $sp, 0 - st.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_PreservedRegs - ori $t0, $zero, PTFF_SAVE_SP - st.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_Flags - - // get TLS global variable address - -#ifdef FEATURE_EMULATED_TLS - GETTHREAD_ETLS_1 -#else - INLINE_GETTHREAD $a1 -#endif - - st.d $a1, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread - st.d $a0, $a1, OFFSETOF__Thread__m_pTransitionFrame - jirl $r0, $ra, 0 -NESTED_END RhpPInvoke, _TEXT - - -LEAF_ENTRY RhpPInvokeReturn, _TEXT - ld.d $t0, $a0, OFFSETOF__PInvokeTransitionFrame__m_pThread - ori $t1, $zero, 0 - st.d $t1, $t0, OFFSETOF__Thread__m_pTransitionFrame - - PREPARE_EXTERNAL_VAR_INDIRECT_W RhpTrapThreads, $a5 - - bne $t0, $zero, 0f // TrapThreadsFlags_None = 0 - jirl $r0, $ra, 0 -0: - // passing transition frame pointer in x0 - b C_FUNC(RhpWaitForGC2) -LEAF_END RhpPInvokeReturn, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S b/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S deleted file mode 100644 index 138992ef1a329..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/StubDispatch.S +++ /dev/null @@ -1,117 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include -#include "AsmOffsets.inc" - -#ifdef FEATURE_CACHED_INTERFACE_DISPATCH - - .extern RhpCidResolve - .extern RhpUniversalTransition_DebugStepTailCall - - // Macro that generates code to check a single cache entry. - .macro CHECK_CACHE_ENTRY entry - // Check a single entry in the cache. - // t0 : Cache data structure. Also used for target address jump. - // t1 : Instance MethodTable* - // t2 : Indirection cell address, preserved - // t3 : Trashed - ld.d $t3, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16)) - bne $t1, $t3, 0f - ld.d $t0, $t0, (OFFSETOF__InterfaceDispatchCache__m_rgEntries + (\entry * 16) + 8) - jirl $r0, $t0, 0 -0: - .endm - -// -// Macro that generates a stub consuming a cache with the given number of entries. -// - .macro DEFINE_INTERFACE_DISPATCH_STUB entries - - NESTED_ENTRY "RhpInterfaceDispatch\entries", _TEXT, NoHandler - - // t2 holds the indirection cell address. Load the cache pointer. - ld.d $t0, $t8, OFFSETOF__InterfaceDispatchCell__m_pCache - - // Load the MethodTable from the object instance in a0. - ALTERNATE_ENTRY RhpInterfaceDispatchAVLocation\entries - ld.d $t1, $a0, 0 - - .global CurrentEntry - .set CurrentEntry, 0 - - .rept \entries - CHECK_CACHE_ENTRY CurrentEntry - .set CurrentEntry, CurrentEntry + 1 - .endr - - // t2 still contains the indirection cell address. - b C_FUNC(RhpInterfaceDispatchSlow) - - NESTED_END "RhpInterfaceDispatch\entries", _TEXT - - .endm - -// -// Define all the stub routines we currently need. -// -// If you change or add any new dispatch stubs, exception handling might need to be aware because it refers to the -// *AVLocation symbols defined by the dispatch stubs to be able to unwind and blame user code if a NullRef happens -// during the interface dispatch. -// - DEFINE_INTERFACE_DISPATCH_STUB 1 - DEFINE_INTERFACE_DISPATCH_STUB 2 - DEFINE_INTERFACE_DISPATCH_STUB 4 - DEFINE_INTERFACE_DISPATCH_STUB 8 - DEFINE_INTERFACE_DISPATCH_STUB 16 - DEFINE_INTERFACE_DISPATCH_STUB 32 - DEFINE_INTERFACE_DISPATCH_STUB 64 - -// -// Initial dispatch on an interface when we don't have a cache yet. -// - LEAF_ENTRY RhpInitialInterfaceDispatch, _TEXT - ALTERNATE_ENTRY RhpInitialDynamicInterfaceDispatch - // Trigger an AV if we're dispatching on a null this. - // The exception handling infrastructure is aware of the fact that this is the first - // instruction of RhpInitialInterfaceDispatch and uses it to translate an AV here - // to a NullReferenceException at the callsite. - ld.d $zero, $a0, 0 - - // Just tail call to the cache miss helper. - b C_FUNC(RhpInterfaceDispatchSlow) - LEAF_END RhpInitialInterfaceDispatch, _TEXT - -// -// Stub dispatch routine for dispatch to a vtable slot -// - LEAF_ENTRY RhpVTableOffsetDispatch, _TEXT - // t2 contains the interface dispatch cell address. - // load t3 to point to the vtable offset (which is stored in the m_pCache field). - ld.d $t3, $t2, OFFSETOF__InterfaceDispatchCell__m_pCache - - // Load the MethodTable from the object instance in a0, and add it to the vtable offset - // to get the address in the vtable of what we want to dereference - ld.d $t4, $a0, 0 - add.d $t3, $t3, $t4 - - // Load the target address of the vtable into t3 - ld.d $t3, $t3, 0 - - jirl $r0, $t3, 0 - LEAF_END RhpVTableOffsetDispatch, _TEXT - -// -// Cache miss case, call the runtime to resolve the target and update the cache. -// Use universal transition helper to allow an exception to flow out of resolution. -// - LEAF_ENTRY RhpInterfaceDispatchSlow, _TEXT - // t2 contains the interface dispatch cell address. - // Calling convention of the universal thunk is: - // t7: target address for the thunk to call - // t8: parameter of the thunk's target - PREPARE_EXTERNAL_VAR RhpCidResolve, $t7 - b C_FUNC(RhpUniversalTransition_DebugStepTailCall) - LEAF_END RhpInterfaceDispatchSlow, _TEXT - -#endif // FEATURE_CACHED_INTERFACE_DISPATCH diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S b/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S deleted file mode 100644 index 79af74a1edbab..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/UniversalTransition.S +++ /dev/null @@ -1,191 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -#ifdef _DEBUG -#define TRASH_SAVED_ARGUMENT_REGISTERS -#endif - -#ifdef TRASH_SAVED_ARGUMENT_REGISTERS - .global RhpIntegerTrashValues - .global RhpFpTrashValues -#endif // TRASH_SAVED_ARGUMENT_REGISTERS - -// Padding to account for the odd number of saved integer registers -#define ALIGNMENT_PADDING_SIZE (8) - -#define COUNT_ARG_REGISTERS (9) -#define INTEGER_REGISTER_SIZE (8) -#define ARGUMENT_REGISTERS_SIZE (COUNT_ARG_REGISTERS * INTEGER_REGISTER_SIZE) - -// Largest return block is 4 doubles -#define RETURN_BLOCK_SIZE (32) - -#define COUNT_FLOAT_ARG_REGISTERS (8) -#define FLOAT_REGISTER_SIZE (16) -#define FLOAT_ARG_REGISTERS_SIZE (COUNT_FLOAT_ARG_REGISTERS * FLOAT_REGISTER_SIZE) - -#define PUSHED_RA_SIZE (8) -#define PUSHED_FP_SIZE (8) - -// -// From CallerSP to ChildSP, the stack frame is composed of the following adjacent regions: -// -// ALIGNMENT_PADDING_SIZE -// ARGUMENT_REGISTERS_SIZE -// RETURN_BLOCK_SIZE -// FLOAT_ARG_REGISTERS_SIZE -// PUSHED_RA_SIZE -// PUSHED_FP_SIZE -// - -#define DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK (PUSHED_FP_SIZE + PUSHED_RA_SIZE + FLOAT_ARG_REGISTERS_SIZE) - -#define STACK_SIZE (ALIGNMENT_PADDING_SIZE + ARGUMENT_REGISTERS_SIZE + RETURN_BLOCK_SIZE + FLOAT_ARG_REGISTERS_SIZE + PUSHED_RA_SIZE + PUSHED_FP_SIZE) - -#define FLOAT_ARG_OFFSET (PUSHED_FP_SIZE + PUSHED_RA_SIZE) -#define ARGUMENT_REGISTERS_OFFSET (FLOAT_ARG_OFFSET + FLOAT_ARG_REGISTERS_SIZE + RETURN_BLOCK_SIZE) - -// -// RhpUniversalTransition -// -// At input to this function, a0-7/tp, f0-7 and the stack may contain any number of arguments. -// -// In addition, there are 2 extra arguments passed in the intra-procedure-call scratch register: -// t7 will contain the managed function that is to be called by this transition function -// t8 will contain the pointer sized extra argument to the managed function -// -// When invoking the callee: -// -// a0 shall contain a pointer to the TransitionBlock -// a1 shall contain the value that was in t8 at entry to this function -// -// Frame layout is: -// -// {StackPassedArgs} ChildSP+100 CallerSP+000 -// {AlignmentPad (0x8 bytes)} ChildSP+0F8 CallerSP-008 -// {IntArgRegs (a0-a7/tp) (0x48 bytes)} ChildSP+0B0 CallerSP-050 -// {ReturnBlock (0x20 bytes)} ChildSP+090 CallerSP-070 -// -- The base address of the Return block is the TransitionBlock pointer, the floating point args are -// in the neg space of the TransitionBlock pointer. Note that the callee has knowledge of the exact -// layout of all pieces of the frame that lie at or above the pushed floating point registers. -// {FpArgRegs (f0-f7) (0x80 bytes)} ChildSP+010 CallerSP-0F0 -// {PushedRA} ChildSP+008 CallerSP-0F8 -// {PushedFP} ChildSP+000 CallerSP-100 -// -// NOTE: If the frame layout ever changes, the C++ UniversalTransitionStackFrame structure -// must be updated as well. -// -// NOTE: The callee receives a pointer to the base of the ReturnBlock, and the callee has -// knowledge of the exact layout of all pieces of the frame that lie at or above the pushed -// FpArgRegs. -// -// NOTE: The stack walker guarantees that conservative GC reporting will be applied to -// everything between the base of the ReturnBlock and the top of the StackPassedArgs. -// - - .text - - .macro UNIVERSAL_TRANSITION FunctionName - - NESTED_ENTRY Rhp\FunctionName, _TEXT, NoHandler - - // FP and RA registers - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -STACK_SIZE // ;; Push down stack pointer and store FP and RA - - // Floating point registers - fst.d $f0, $sp, FLOAT_ARG_OFFSET - fst.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 - fst.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 - fst.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 - fst.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 - fst.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 - fst.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 - fst.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 - - // Space for return buffer data (0x40 bytes) - - // Save argument registers - st.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET - st.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 - st.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 - st.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 - st.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 - st.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 - st.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 - st.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 - st.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 - st.d $r0, $sp, ARGUMENT_REGISTERS_OFFSET + 0x48 - -#ifdef TRASH_SAVED_ARGUMENT_REGISTERS - PREPARE_EXTERNAL_VAR RhpFpTrashValues, $a1 - - fld.d $f0, $a1, 0 - fld.d $f1, $a1, 0x08 - fld.d $f2, $a1, 0x10 - fld.d $f3, $a1, 0x18 - fld.d $f4, $a1, 0x20 - fld.d $f5, $a1, 0x28 - fld.d $f6, $a1, 0x30 - fld.d $f7, $a1, 0x38 - - PREPARE_EXTERNAL_VAR RhpIntegerTrashValues, $a1 - - ld.d $a2, $a1, 0x10 - ld.d $a3, $a1, 0x18 - ld.d $a4, $a1, 0x20 - ld.d $a5, $a1, 0x28 - ld.d $a6, $a1, 0x30 - ld.d $a7, $a1, 0x38 -#endif // TRASH_SAVED_ARGUMENT_REGISTERS - - addi.d $a0, $sp, DISTANCE_FROM_CHILDSP_TO_RETURN_BLOCK // First parameter to target function is a pointer to the return block - ori $a1, $t8, 0 // Second parameter to target function - jirl $ra, $t7, 0 - - // We cannot make the label public as that tricks DIA stackwalker into thinking - // it's the beginning of a method. For this reason we export an auxiliary variable - // holding the address instead. - ALTERNATE_ENTRY ReturnFrom\FunctionName - - // Move the result (the target address) to t3 so it doesn't get overridden when we restore the - // argument registers. - ori $t3, $a0, 0 - - // Restore floating point registers - fld.d $f0, $sp, FLOAT_ARG_OFFSET - fld.d $f1, $sp, FLOAT_ARG_OFFSET + 0x08 - fld.d $f2, $sp, FLOAT_ARG_OFFSET + 0x10 - fld.d $f3, $sp, FLOAT_ARG_OFFSET + 0x18 - fld.d $f4, $sp, FLOAT_ARG_OFFSET + 0x20 - fld.d $f5, $sp, FLOAT_ARG_OFFSET + 0x28 - fld.d $f6, $sp, FLOAT_ARG_OFFSET + 0x30 - fld.d $f7, $sp, FLOAT_ARG_OFFSET + 0x38 - - // Restore the argument registers - ld.d $a0, $sp, ARGUMENT_REGISTERS_OFFSET - ld.d $a1, $sp, ARGUMENT_REGISTERS_OFFSET + 0x08 - ld.d $a2, $sp, ARGUMENT_REGISTERS_OFFSET + 0x10 - ld.d $a3, $sp, ARGUMENT_REGISTERS_OFFSET + 0x18 - ld.d $a4, $sp, ARGUMENT_REGISTERS_OFFSET + 0x20 - ld.d $a5, $sp, ARGUMENT_REGISTERS_OFFSET + 0x28 - ld.d $a6, $sp, ARGUMENT_REGISTERS_OFFSET + 0x30 - ld.d $a7, $sp, ARGUMENT_REGISTERS_OFFSET + 0x38 - ld.d $tp, $sp, ARGUMENT_REGISTERS_OFFSET + 0x40 - - // Restore FP and RA registers, and free the allocated stack block - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, STACK_SIZE - - // Tailcall to the target address. - jirl $r0, $t3, 0 - - NESTED_END Rhp\FunctionName, _TEXT - - .endm - - // To enable proper step-in behavior in the debugger, we need to have two instances - // of the thunk. For the first one, the debugger steps into the call in the function, - // for the other, it steps over it. - UNIVERSAL_TRANSITION UniversalTransition - UNIVERSAL_TRANSITION UniversalTransition_DebugStepTailCall diff --git a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S b/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S deleted file mode 100644 index ebdb83f93a93b..0000000000000 --- a/src/coreclr/nativeaot/Runtime/loongarch64/WriteBarriers.S +++ /dev/null @@ -1,354 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include - -// Macro used to copy contents of newly updated GC heap locations to a shadow copy of the heap. This is used -// during garbage collections to verify that object references where never written to the heap without using a -// write barrier. Note that we are potentially racing to update the shadow heap while other threads are writing -// new references to the real heap. Since this can not be solved perfectly without critical sections around the -// entire update process, we instead update the shadow location and then re-check the real location (as two -// ordered operations) and if there is a disparity we will re-write the shadow location with a special value -// (INVALIDGCVALUE) which disables the check for that location. Since the shadow heap is only validated at GC -// time and these write barrier operations are atomic wrt to GCs this is sufficient to guarantee that the -// shadow heap contains only valid copies of real heap values or INVALIDGCVALUE. -#ifdef WRITE_BARRIER_CHECK - - .global $g_GCShadow - .global $g_GCShadowEnd - - // On entry: - // $destReg: location to be updated - // $refReg: objectref to be stored - // - // On exit: - // t3,t4: trashed - // other registers are preserved - // - .macro UPDATE_GC_SHADOW destReg, refReg - - // If g_GCShadow is 0, don't perform the check. - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 - beq $t3, $zero, 1f - ori $t4, $t3, 0 - - // Save destReg since we're about to modify it (and we need the original value both within the macro and - // once we exit the macro). - ori $t4, \destReg, 0 - - // Transform destReg into the equivalent address in the shadow heap. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sub.d \destReg, \destReg, $t3 - bltu $t4, $zero, 0f - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadow, $t3 - add.d \destReg, \destReg, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_GCShadowEnd, $t3 - bgeu \destReg, $t3, 0f - - // Update the shadow heap. - st.d \refReg, \destReg, 0 - - // The following read must be strongly ordered wrt to the write we have just performed in order to - // prevent race conditions. - dbar 0 - - // Now check that the real heap location still contains the value we just wrote into the shadow heap. - ori $t3, $t4, 0 - ld.d $t3, $t3, 0 - beq $t3, \refReg, 0f - - // Someone went and updated the real heap. We need to invalidate INVALIDGCVALUE the shadow location since we can not - // guarantee whose shadow update won. - lu12i.w $t3, ((INVALIDGCVALUE >> 12) & 0xFFFFF) - ori $t3, $t3, (INVALIDGCVALUE & 0xFFF) - st.d $t3, \destReg, 0 - -0: - // Restore original destReg value - ori \destReg, $t4, 0 - -1: - .endm - -#else // WRITE_BARRIER_CHECK - - .macro UPDATE_GC_SHADOW destReg, refReg - .endm - -#endif // WRITE_BARRIER_CHECK - -// There are several different helpers used depending on which register holds the object reference. Since all -// the helpers have identical structure we use a macro to define this structure. Two arguments are taken, the -// name of the register that points to the location to be updated and the name of the register that holds the -// object reference (this should be in upper case as it is used in the definition of the name of the helper). - -// Define a sub-macro first that expands to the majority of the barrier implementation. This is used below for -// some interlocked helpers that need an inline barrier. - - // On entry: - // destReg: location to be updated (cannot be t3,t4) - // refReg: objectref to be stored (cannot be t3,t4) - // - // On exit: - // t3,t4: trashed - // - .macro INSERT_UNCHECKED_WRITE_BARRIER_CORE destReg, refReg - - // Update the shadow copy of the heap with the same value just written to the same heap. (A no-op unless - // we are in a debug build and write barrier checking has been enabled). - UPDATE_GC_SHADOW \destReg, \refReg - -#ifdef FEATURE_USE_SOFTWARE_WRITE_WATCH_FOR_GC_HEAP - // Update the write watch table if necessary - PREPARE_EXTERNAL_VAR_INDIRECT g_write_watch_table, $t3 - - beq $t3, $zero, 2f - srli.d $t5, \destReg, 12 - add.d $t3, $t3, $t5 // SoftwareWriteWatch::AddressToTableByteIndexShift - ld.b $t4, $t3, 0 - bne $t4, $zero, 2f - ori $t4, $zero, 0xFF - st.b $t4, $t3, 0 -#endif - -2: - // We can skip the card table write if the reference is to - // an object not on the epehemeral segment. - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_low, $t3 - bltu \refReg, $t3, 0f - - PREPARE_EXTERNAL_VAR_INDIRECT g_ephemeral_high, $t3 - bgeu \refReg, $t3, 0f - - // Set this objects card, if it has not already been set. - PREPARE_EXTERNAL_VAR_INDIRECT g_card_table, $t3 - srli.d $t5, \destReg, 11 - add.d $t4, $t3, $t5 - - // Check that this card has not already been written. Avoiding useless writes is a big win on - // multi-proc systems since it avoids cache thrashing. - ld.b $t3, $t4, 0 - ori $t5, $zero, 0xFF - beq $t3, $t5, 0f - - ori $t3, $zero, 0xFF - st.b $t3, $t4, 0 - -#ifdef FEATURE_MANUALLY_MANAGED_CARD_BUNDLES - // Check if we need to update the card bundle table - PREPARE_EXTERNAL_VAR_INDIRECT g_card_bundle_table, $t3 - srli.d $t5, \destReg, 21 - add.d $t4, $t3, $t5 - ld.b $t3, $t4, 0 - ori $t5, $zero, 0xFF - beq $t3, $t5, 0f - - ori $t3, $zero, 0xFF - st.b $t3, $t4, 0 -#endif - -0: - // Exit label - .endm - - // On entry: - // destReg: location to be updated - // refReg: objectref to be stored - // - // On exit: - // t3, t4: trashed - // - .macro INSERT_CHECKED_WRITE_BARRIER_CORE destReg, refReg - - // The "check" of this checked write barrier - is destReg - // within the heap? if no, early out. - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sltu $t4, \destReg, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 - - // If \destReg >= g_lowest_address, compare \destReg to g_highest_address. - // Otherwise, set the C flag (0x2) to take the next branch. - bnez $t4, 1f - bgeu \destReg, $t3, 0f - -1: - INSERT_UNCHECKED_WRITE_BARRIER_CORE \destReg, \refReg - -0: - // Exit label - .endm - -// void JIT_ByRefWriteBarrier -// On entry: -// t8 : the source address (points to object reference to write) -// t6 : the destination address (object reference written here) -// -// On exit: -// t8 : incremented by 8 -// t6 : incremented by 8 -// t7 : trashed -// t3, t4 : trashed -// -// NOTE: Keep in sync with RBM_CALLEE_TRASH_WRITEBARRIER_BYREF and RBM_CALLEE_GCTRASH_WRITEBARRIER_BYREF -// if you add more trashed registers. -// -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpByRefAssignRefAVLocation1 -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address -LEAF_ENTRY RhpByRefAssignRef, _TEXT - - ALTERNATE_ENTRY RhpByRefAssignRefAVLocation1 - ld.d $t7, $t8, 0 - addi.d $t8, $t8, 8 - b C_FUNC(RhpCheckedAssignRef) - -LEAF_END RhpByRefAssignRef, _TEXT - -// JIT_CheckedWriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that may reside -// on the managed heap. -// -// On entry: -// t6 : the destination address (LHS of the assignment). -// May not be a heap location (hence the checked). -// t7 : the object reference (RHS of the assignment). -// -// On exit: -// t3, t4 : trashed -// t6 : incremented by 8 - LEAF_ENTRY RhpCheckedAssignRef, _TEXT - - // is destReg within the heap? - PREPARE_EXTERNAL_VAR_INDIRECT g_lowest_address, $t3 - sltu $t4, $t6, $t3 - - PREPARE_EXTERNAL_VAR_INDIRECT g_highest_address, $t3 - sltu $t0, $t3, $t6 - or $t4, $t0, $t4 - beq $t4, $zero, C_FUNC(RhpAssignRefLoongArch64) - -NotInHeap: - ALTERNATE_ENTRY RhpCheckedAssignRefAVLocation - st.d $t7, $t6, 0 - addi.d $t6, $t6, 8 - jirl $r0, $ra, 0 - -LEAF_END RhpCheckedAssignRef, _TEXT - -// JIT_WriteBarrier(Object** dst, Object* src) -// -// Write barrier for writes to objects that are known to -// reside on the managed heap. -// -// On entry: -// t6 : the destination address (LHS of the assignment). -// t7 : the object reference (RHS of the assignment). -// -// On exit: -// t3, t4 : trashed -// t6 : incremented by 8 -LEAF_ENTRY RhpAssignRefLoongArch64, _TEXT - - ALTERNATE_ENTRY RhpAssignRefAVLocation - st.d $t7, $t6, 0 - - INSERT_UNCHECKED_WRITE_BARRIER_CORE $t6, $t7 - - addi.d $t6, $t6, 8 - jirl $r0, $ra, 0 - -LEAF_END RhpAssignRefLoongArch64, _TEXT - -// Same as RhpAssignRefLoongArch64, but with standard ABI. -LEAF_ENTRY RhpAssignRef, _TEXT - ori $t6, $a0, 0 ; t6 = dst - ori $t7, $a1, 0 ; t7 = val - b C_FUNC(RhpAssignRefLoongArch64) -LEAF_END RhpAssignRef, _TEXT - - -// Interlocked operation helpers where the location is an objectref, thus requiring a GC write barrier upon -// successful updates. - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen at RhpCheckedLockCmpXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedLockCmpXchg(Object** dest, Object* value, Object* comparand) -// -// Interlocked compare exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// a2: comparand -// -// On exit: -// a0: original value of objectref -// t1, t3, t6, t4: trashed -// - LEAF_ENTRY RhpCheckedLockCmpXchg - - ori $t1, $a2, 0 - ld.d $t0, $a0, 0 - beq $t0, $t1, 12 - ori $t1, $t0, 0 - b 8 - st.d $a1, $a0, 0 - - bne $a2, $t1, CmpXchgNoUpdate - -DoCardsCmpXchg: - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 - -CmpXchgNoUpdate: - // t1 still contains the original value. - ori $a0, $t1, 0 - - jirl $r0, $ra, 0 - - LEAF_END RhpCheckedLockCmpXchg, _TEXT - -// WARNING: Code in EHHelpers.cpp makes assumptions about write barrier code, in particular: -// - Function "InWriteBarrierHelper" assumes an AV due to passed in null pointer will happen within at RhpCheckedXchgAVLocation -// - Function "UnwindSimpleHelperToCaller" assumes no registers were pushed and RA contains the return address - -// RhpCheckedXchg(Object** destination, Object* value) -// -// Interlocked exchange on objectref. -// -// On entry: -// a0: pointer to objectref -// a1: exchange value -// -// On exit: -// a0: original value of objectref -// t1: trashed -// t3, t6, t4: trashed -// - LEAF_ENTRY RhpCheckedXchg, _TEXT - - ld.d $t1, $a0, 0 - st.d $a1, $a0, 0 - -DoCardsXchg: - // We have successfully updated the value of the objectref so now we need a GC write barrier. - // The following barrier code takes the destination in $a0 and the value in $a1 so the arguments are - // already correctly set up. - - INSERT_CHECKED_WRITE_BARRIER_CORE $a0, $a1 - - // $t1 still contains the original value. - ori $a0, $t1, 0 - - jirl $r0, $ra, 0 - - LEAF_END RhpCheckedXchg, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc index 68ba993209e42..80f633327c830 100644 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc +++ b/src/coreclr/nativeaot/Runtime/unix/unixasmmacros.inc @@ -42,6 +42,4 @@ #include "unixasmmacrosarm64.inc" #elif defined(HOST_X86) #include "unixasmmacrosx86.inc" -#elif defined(HOST_LOONGARCH64) -#include "unixasmmacrosloongarch64.inc" #endif diff --git a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc b/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc deleted file mode 100644 index e016e5a1c0bc8..0000000000000 --- a/src/coreclr/nativeaot/Runtime/unix/unixasmmacrosloongarch64.inc +++ /dev/null @@ -1,328 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -#include "AsmOffsets.inc" - -.macro NESTED_ENTRY Name, Section, Handler - LEAF_ENTRY \Name, \Section - .ifnc \Handler, NoHandler - .cfi_personality 0x1b, C_FUNC(\Handler) // 0x1b == DW_EH_PE_pcrel | DW_EH_PE_sdata4 - .endif -.endm - -.macro NESTED_END Name, Section - LEAF_END \Name, \Section -.endm - -.macro PATCH_LABEL Name - .global C_FUNC(\Name) -C_FUNC(\Name): -.endm - -.macro ALTERNATE_ENTRY Name - .global C_FUNC(\Name) - .hidden C_FUNC(\Name) -C_FUNC(\Name): -.endm - -.macro LEAF_ENTRY Name, Section - .global C_FUNC(\Name) - .hidden C_FUNC(\Name) - .type \Name, %function -C_FUNC(\Name): - .cfi_startproc -.endm - -.macro LEAF_END Name, Section - .size \Name, .-\Name - .cfi_endproc -.endm - -.macro PREPARE_EXTERNAL_VAR Name, HelperReg - la.local \HelperReg, \Name -.endm - -.macro PREPARE_EXTERNAL_VAR_INDIRECT Name, HelperReg - la.local \HelperReg, \Name - ld.d \HelperReg, \HelperReg, 0 -.endm - -.macro PREPARE_EXTERNAL_VAR_INDIRECT_W Name, HelperReg - la.local \HelperReg, \Name - ld.w \HelperReg, \HelperReg, 0 -.endm - - -.macro PROLOG_STACK_ALLOC Size - addi.d $sp, $sp, -\Size -.endm - -.macro EPILOG_STACK_FREE Size - addi.d $sp, $sp, \Size - .cfi_adjust_cfa_offset -\Size -.endm - -.macro EPILOG_STACK_RESTORE - ori $sp, $fp, 0 - .cfi_restore 3 -.endm - -.macro PROLOG_SAVE_REG reg, ofs - st.d $r\reg, $sp, \ofs - .cfi_rel_offset \reg, \ofs -.endm - -.macro PROLOG_SAVE_REG_PAIR reg1, reg2, ofs - st.d $r\reg1, $sp, \ofs - st.d $r\reg2, $sp, \ofs + 8 - .cfi_rel_offset \reg1, \ofs - .cfi_rel_offset \reg2, \ofs + 8 - .ifc \reg1, $fp - ori $fp, $sp, 0 - .cfi_def_cfa_register 22 - .endif -.endm - -.macro PROLOG_SAVE_REG_PAIR_INDEXED reg1, reg2, ofs - st.d $r\reg1, $sp, \ofs - st.d $r\reg2, $sp, \ofs + 8 - addi.d $sp, $sp, \ofs - .cfi_adjust_cfa_offset -\ofs - .cfi_rel_offset \reg1, 0 - .cfi_rel_offset \reg2, 8 - .ifc \reg1, $fp - ori $fp, $sp, 0 - .cfi_def_cfa_register $fp - .endif -.endm - -.macro PROLOG_SAVE_REG_PAIR_NO_FP_INDEXED reg1, reg2, ofs - st.d $r\reg1, $sp, \ofs - st.d $r\reg2, $sp, \ofs + 8 - addi.d $sp, $sp, \ofs - .cfi_adjust_cfa_offset -\ofs - .cfi_rel_offset \reg1, 0 - .cfi_rel_offset \reg2, 8 -.endm - - -.macro EPILOG_RESTORE_REG reg, ofs - ld.d $r\reg, $sp, \ofs - .cfi_restore \reg -.endm - -.macro EPILOG_RESTORE_REG_PAIR reg1, reg2, ofs - ld.d $r\reg1, $sp, \ofs - ld.d $r\reg2, $sp, \ofs + 8 - .cfi_restore \reg1 - .cfi_restore \reg2 -.endm - -.macro EPILOG_RESTORE_REG_PAIR_INDEXED reg1, reg2, ofs - ld.d $r\reg1, $sp, 0 - ld.d $r\reg2, $sp, 8 - addi.d $sp, $sp, \ofs - .cfi_restore \reg1 - .cfi_restore \reg2 - .cfi_adjust_cfa_offset -\ofs -.endm - -.macro EPILOG_RETURN - jirl $r0, $ra, 0 -.endm - -.macro EMIT_BREAKPOINT - break 0 -.endm - -.macro EPILOG_BRANCH_REG reg - - jirl $r0, \reg, 0 - -.endm - -// Loads the address of a thread-local variable into the target register, -// which cannot be a0. Preserves all other registers. -.macro INLINE_GET_TLS_VAR target, var - .ifc \target, $a0 - .error "target cannot be a0" - .endif - - st.d $a0, $sp, -0x10 - st.d $ra, $sp, -0x8 - addi.d $sp, $sp, -16 - - // This sequence of instructions is recognized and potentially patched - // by the linker (GD->IE/LE relaxation). - //la.local $a0, \var // - //ld.d \target, $a0, 0 // - //.tlsdesccall \var //TODO-LOONGARCH64 - la.tls.ie $a0, \var - //jirl $ra, \target, 0 - // End of the sequence - - ori \target, $tp, 0 - add.d \target, \target, $a0 - - ld.d $a0, $sp, 0 - ld.d $ra, $sp, 8 - addi.d $sp, $sp, 16 -.endm - -// Inlined version of RhpGetThread. Target cannot be a0. -.macro INLINE_GETTHREAD target - INLINE_GET_TLS_VAR \target, C_FUNC(tls_CurrentThread) -.endm - -// Do not use these ETLS macros in functions that already create a stack frame. -// Creating two stack frames in one function can confuse the unwinder/debugger - -.macro GETTHREAD_ETLS_1 - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32 // ;; Push down stack pointer and store FP and RA - st.d $a0, $sp, 0x10 - - bl C_FUNC(RhpGetThread) - ori $a1, $a0, 0 - - ld.d $a0, $sp, 0x10 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 -.endm - -.macro GETTHREAD_ETLS_2 - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -32 // ;; Push down stack pointer and store FP and RA - st.d $a0, $sp, 0x10 - st.d $a1, $sp, 0x18 - - bl C_FUNC(RhpGetThread) - ori $a2, $a0, 0 - - ld.d $a0, $sp, 0x10 - ld.d $a1, $sp, 0x18 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 32 -.endm - -.macro GETTHREAD_ETLS_3 - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -48 // ;; Push down stack pointer and store FP and RA - st.d $a0, $sp, 0x10 - st.d $a1, $sp, 0x18 - st.d $a2, $sp, 0x20 - - bl C_FUNC(RhpGetThread) - ori $a3, $a0, 0 - - ld.d $a0, $sp, 0x10 - ld.d $a1, $sp, 0x18 - ld.d $a2, $sp, 0x20 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 48 -.endm - -.macro GETTHUNKDATA_ETLS_9 - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -96 // ;; Push down stack pointer and store FP and RA - st.d $a0, $sp, 0x10 - st.d $a1, $sp, 0x18 - st.d $a2, $sp, 0x20 - st.d $a3, $sp, 0x28 - st.d $a4, $sp, 0x30 - st.d $a5, $sp, 0x38 - st.d $a6, $sp, 0x40 - st.d $a7, $sp, 0x48 - st.d $t6, $sp, 0x50 - st.d $t7, $sp, 0x58 - - bl RhpGetThunkData - ori $t0, $a0, 0 - - ld.d $a0, $sp, 0x10 - ld.d $a1, $sp, 0x18 - ld.d $a2, $sp, 0x20 - ld.d $a3, $sp, 0x28 - ld.d $a4, $sp, 0x30 - ld.d $a5, $sp, 0x38 - ld.d $a6, $sp, 0x40 - ld.d $a7, $sp, 0x48 - ld.d $t6, $sp, 0x50 - ld.d $t7, $sp, 0x58 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 96 -.endm - -.macro InterlockedOperationBarrier - dbar 0 -.endm - -.macro INLINE_THREAD_UNHIJACK threadReg, trashReg1, trashReg2 - // - // Thread::Unhijack() - // - ld.d \trashReg1, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress - beq \trashReg1, $zero, 0f - - ld.d \trashReg2, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - st.d \trashReg1, \trashReg2, 0 - st.d $zero, \threadReg, OFFSETOF__Thread__m_ppvHijackedReturnAddressLocation - st.d $zero, \threadReg, OFFSETOF__Thread__m_pvHijackedReturnAddress -0: -.endm - -// Note: these must match the defs in PInvokeTransitionFrameFlags -PTFF_SAVE_SP = 0x00000200 -PTFF_SAVE_R4 = 0x00001000 -PTFF_SAVE_R5 = 0x00002000 -PTFF_SAVE_ALL_PRESERVED = 0x000001FF // NOTE: r23-r31 - -DEFAULT_FRAME_SAVE_FLAGS = PTFF_SAVE_ALL_PRESERVED + PTFF_SAVE_SP - -.macro PUSH_COOP_PINVOKE_FRAME trashReg - - PROLOG_SAVE_REG_PAIR_INDEXED 22, 1, -0x80 // Push down stack pointer and store FP and RA - - // 0x10 bytes reserved for Thread* and flags - - // Save callee saved registers - PROLOG_SAVE_REG_PAIR 23, 24, 0x20 - PROLOG_SAVE_REG_PAIR 25, 26, 0x30 - PROLOG_SAVE_REG_PAIR 27, 28, 0x40 - PROLOG_SAVE_REG_PAIR 29, 30, 0x50 - PROLOG_SAVE_REG_PAIR 31, 2, 0x60 - - // Save the value of SP before stack allocation to the last slot in the frame (slot #15) - addi.d \trashReg, $sp, 0x80 - st.d \trashReg, $sp, 0x70 - - // Record the bitmask of saved registers in the frame (slot #3) - ori \trashReg, $zero, DEFAULT_FRAME_SAVE_FLAGS - st.d \trashReg, $sp, 0x18 - - ori \trashReg, $sp, 0 -.endm - -// Pop the frame and restore register state preserved by PUSH_COOP_PINVOKE_FRAME -.macro POP_COOP_PINVOKE_FRAME - - // $s0,$s1 - EPILOG_RESTORE_REG_PAIR 23, 24, 0x20 - EPILOG_RESTORE_REG_PAIR 25, 26, 0x30 - EPILOG_RESTORE_REG_PAIR 27, 28, 0x40 - EPILOG_RESTORE_REG_PAIR 29, 30, 0x50 - EPILOG_RESTORE_REG_PAIR 31, 2, 0x60 - EPILOG_RESTORE_REG_PAIR_INDEXED 22, 1, 0x80 -.endm - -// Bit position for the flags above, to be used with andi+beq/bne instructions -PTFF_THREAD_ABORT_BIT = 36 - -// -// CONSTANTS -- INTEGER -// -#define TSF_Attached 0x01 -#define TSF_SuppressGcStress 0x08 -#define TSF_DoNotTriggerGc 0x10 -#define TSF_SuppressGcStress__OR__TSF_DoNotTriggerGC 0x18 - -// Bit position for the flags above, to be used with andi+beq/bne instructions -TrapThreadsFlags_AbortInProgress_Bit = 0 -TrapThreadsFlags_TrapThreads_Bit = 1 - -// These must match the TrapThreadsFlags enum -#define TrapThreadsFlags_None 0 -#define TrapThreadsFlags_AbortInProgress 1 -#define TrapThreadsFlags_TrapThreads 2 diff --git a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs index e3ab2aac040a1..ada020f7ca93b 100644 --- a/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs +++ b/src/coreclr/tools/Common/Compiler/DependencyAnalysis/Target_LoongArch64/LoongArch64Emitter.cs @@ -108,9 +108,9 @@ public void EmitJMP(ISymbolNode symbol) } else { - Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_LOONGARCH64_PC); - EmitPC(Register.R21); // pcalau12i R21, 0 - Builder.EmitUInt(0x4c0002a0); // jirl R0, R21, 0 + //Builder.EmitReloc(symbol, RelocType.IMAGE_REL_BASED_LOONGARCH64_PC); + Builder.EmitUInt(0xffffffff); // bad code. + throw new NotImplementedException(); } } @@ -123,7 +123,7 @@ public void EmitRETIfEqual(Register regSrc) public void EmitJE(Register regSrc, ISymbolNode symbol) { - uint offset = symbol.RepresentsIndirectionCell ? 7u : 3u; + uint offset = symbol.RepresentsIndirectionCell ? 7u : 2u; // BNEZ regSrc, offset Builder.EmitUInt((uint)(0x44000000 | (offset << 10) | ((uint)regSrc << 5))); diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs index 643f14056bd4d..d79abeea30c5d 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfBuilder.cs @@ -80,12 +80,6 @@ public DwarfBuilder( _codeRelocType = RelocType.IMAGE_REL_BASED_HIGHLOW; break; - case TargetArchitecture.LoongArch64: - _targetPointerSize = 8; - _frameRegister = 22; // FP - _codeRelocType = RelocType.IMAGE_REL_BASED_DIR64; - break; - default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs index e5303e64f1aa7..7497734283314 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfCie.cs @@ -80,19 +80,6 @@ public DwarfCie(TargetArchitecture targetArchitecture) InitialCFAOffset = 8; break; - case TargetArchitecture.LoongArch64: - CodeAlignFactor = 1; - DataAlignFactor = -4; - ReturnAddressRegister = 1; // RA - Instructions = - [ - DW_CFA_def_cfa, - 3, // SP - 0, // Offset from SP - ]; - InitialCFAOffset = 0; - break; - default: throw new NotSupportedException("Unsupported architecture"); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs index 89c2188774416..98fb159e04774 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/Dwarf/DwarfExpressionBuilder.cs @@ -161,10 +161,6 @@ public static int DwarfRegNum(TargetArchitecture architecture, int regNum) _ => regNum - (int)RegNumX86.REGNUM_COUNT + 32 // FP registers }; - case TargetArchitecture.LoongArch64: - // Normal registers are directly mapped - return regNum; - default: throw new NotSupportedException(); } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs index 8288f7fe8bd35..9abdc3361e049 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfNative.cs @@ -25,7 +25,6 @@ internal static class ElfNative public const ushort EM_ARM = 40; public const ushort EM_X86_64 = 62; public const ushort EM_AARCH64 = 183; - public const ushort EM_LOONGARCH = 258; // Section header type public const uint SHT_NULL = 0; @@ -436,122 +435,5 @@ internal static class ElfNative public const uint R_AARCH64_TLS_TPREL = 1030; public const uint R_AARCH64_TLSDESC = 1031; public const uint R_AARCH64_IRELATIVE = 1032; - - // Relocations (loongarch64) - public const uint R_LARCH_NONE = 0; - public const uint R_LARCH_32 = 1; - public const uint R_LARCH_64 = 2; - public const uint R_LARCH_RELATIVE = 3; - public const uint R_LARCH_COPY = 4; - public const uint R_LARCH_JUMP_SLOT = 5; - public const uint R_LARCH_TLS_DTPMOD32 = 6; - public const uint R_LARCH_TLS_DTPMOD64 = 7; - public const uint R_LARCH_TLS_DTPREL32 = 8; - public const uint R_LARCH_TLS_DTPREL64 = 9; - public const uint R_LARCH_TLS_TPREL32 = 10; - public const uint R_LARCH_TLS_TPREL64 = 11; - public const uint R_LARCH_IRELATIVE = 12; - public const uint R_LARCH_MARK_LA = 20; - public const uint R_LARCH_MARK_PCREL = 21; - public const uint R_LARCH_SOP_PUSH_PCREL = 22; - public const uint R_LARCH_SOP_PUSH_ABSOLUTE = 23; - public const uint R_LARCH_SOP_PUSH_DUP = 24; - public const uint R_LARCH_SOP_PUSH_GPREL = 25; - public const uint R_LARCH_SOP_PUSH_TLS_TPREL = 26; - public const uint R_LARCH_SOP_PUSH_TLS_GOT = 27; - public const uint R_LARCH_SOP_PUSH_TLS_GD = 28; - public const uint R_LARCH_SOP_PUSH_PLT_PCREL = 29; - public const uint R_LARCH_SOP_ASSERT = 30; - public const uint R_LARCH_SOP_NOT = 31; - public const uint R_LARCH_SOP_SUB = 32; - public const uint R_LARCH_SOP_SL = 33; - public const uint R_LARCH_SOP_SR = 34; - public const uint R_LARCH_SOP_ADD = 35; - public const uint R_LARCH_SOP_AND = 36; - public const uint R_LARCH_SOP_IF_ELSE = 37; - public const uint R_LARCH_SOP_POP_32_S_10_5 = 38; - public const uint R_LARCH_SOP_POP_32_U_10_12 = 39; - public const uint R_LARCH_SOP_POP_32_S_10_12 = 40; - public const uint R_LARCH_SOP_POP_32_S_10_16 = 41; - public const uint R_LARCH_SOP_POP_32_S_10_16_S2 = 42; - public const uint R_LARCH_SOP_POP_32_S_5_20 = 43; - public const uint R_LARCH_SOP_POP_32_S_0_5_10_16_S2 = 44; - public const uint R_LARCH_SOP_POP_32_S_0_10_10_16_S2 = 45; - public const uint R_LARCH_SOP_POP_32_U = 46; - public const uint R_LARCH_ADD8 = 47; - public const uint R_LARCH_ADD16 = 48; - public const uint R_LARCH_ADD24 = 49; - public const uint R_LARCH_ADD32 = 50; - public const uint R_LARCH_ADD64 = 51; - public const uint R_LARCH_SUB8 = 52; - public const uint R_LARCH_SUB16 = 53; - public const uint R_LARCH_SUB24 = 54; - public const uint R_LARCH_SUB32 = 55; - public const uint R_LARCH_SUB64 = 56; - public const uint R_LARCH_GNU_VTINHERIT = 57; - public const uint R_LARCH_GNU_VTENTRY = 58; - public const uint R_LARCH_B16 = 64; - public const uint R_LARCH_B21 = 65; - public const uint R_LARCH_B26 = 66; - public const uint R_LARCH_ABS_HI20 = 67; - public const uint R_LARCH_ABS_LO12 = 68; - public const uint R_LARCH_ABS64_LO20 = 69; - public const uint R_LARCH_ABS64_HI12 = 70; - public const uint R_LARCH_PCALA_HI20 = 71; - public const uint R_LARCH_PCALA_LO12 = 72; - public const uint R_LARCH_PCALA64_LO20 = 73; - public const uint R_LARCH_PCALA64_HI12 = 74; - public const uint R_LARCH_GOT_PC_HI20 = 75; - public const uint R_LARCH_GOT_PC_LO12 = 76; - public const uint R_LARCH_GOT64_PC_LO20 = 77; - public const uint R_LARCH_GOT64_PC_HI12 = 78; - public const uint R_LARCH_GOT_HI20 = 79; - public const uint R_LARCH_GOT_LO12 = 80; - public const uint R_LARCH_GOT64_LO20 = 81; - public const uint R_LARCH_GOT64_HI12 = 82; - public const uint R_LARCH_TLS_LE_HI20 = 83; - public const uint R_LARCH_TLS_LE_LO12 = 84; - public const uint R_LARCH_TLS_LE64_LO20 = 85; - public const uint R_LARCH_TLS_LE64_HI12 = 86; - public const uint R_LARCH_TLS_IE_PC_HI20 = 87; - public const uint R_LARCH_TLS_IE_PC_LO12 = 88; - public const uint R_LARCH_TLS_IE64_PC_LO20 = 89; - public const uint R_LARCH_TLS_IE64_PC_HI12 = 90; - public const uint R_LARCH_TLS_IE_HI20 = 91; - public const uint R_LARCH_TLS_IE_LO12 = 92; - public const uint R_LARCH_TLS_IE64_LO20 = 93; - public const uint R_LARCH_TLS_IE64_HI12 = 94; - public const uint R_LARCH_TLS_LD_PC_HI20 = 95; - public const uint R_LARCH_TLS_LD_HI20 = 96; - public const uint R_LARCH_TLS_GD_PC_HI20 = 97; - public const uint R_LARCH_TLS_GD_HI20 = 98; - public const uint R_LARCH_32_PCREL = 99; - public const uint R_LARCH_RELAX = 100; - public const uint R_LARCH_ALIGN = 102; - public const uint R_LARCH_PCREL20_S2 = 103; - public const uint R_LARCH_ADD6 = 105; - public const uint R_LARCH_SUB6 = 106; - public const uint R_LARCH_ADD_ULEB128 = 107; - public const uint R_LARCH_SUB_ULEB128 = 108; - public const uint R_LARCH_64_PCREL = 109; - public const uint R_LARCH_CALL36 = 110; - public const uint R_LARCH_TLS_DESC32 = 13; - public const uint R_LARCH_TLS_DESC64 = 14; - public const uint R_LARCH_TLS_DESC_PC_HI20 = 111; - public const uint R_LARCH_TLS_DESC_PC_LO12 = 112; - public const uint R_LARCH_TLS_DESC64_PC_LO20 = 113; - public const uint R_LARCH_TLS_DESC64_PC_HI12 = 114; - public const uint R_LARCH_TLS_DESC_HI20 = 115; - public const uint R_LARCH_TLS_DESC_LO12 = 116; - public const uint R_LARCH_TLS_DESC64_LO20 = 117; - public const uint R_LARCH_TLS_DESC64_HI12 = 118; - public const uint R_LARCH_TLS_DESC_LD = 119; - public const uint R_LARCH_TLS_DESC_CALL = 120; - public const uint R_LARCH_TLS_LE_HI20_R = 121; - public const uint R_LARCH_TLS_LE_ADD_R = 122; - public const uint R_LARCH_TLS_LE_LO12_R = 123; - public const uint R_LARCH_TLS_LD_PCREL20_S2 = 124; - public const uint R_LARCH_TLS_GD_PCREL20_S2 = 125; - public const uint R_LARCH_TLS_DESC_PCREL20_S2 = 126; } } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs index 9f3d877b602b6..2cd90d06977e0 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ObjectWriter/ElfObjectWriter.cs @@ -59,7 +59,6 @@ public ElfObjectWriter(NodeFactory factory, ObjectWritingOptions options) TargetArchitecture.X64 => EM_X86_64, TargetArchitecture.ARM => EM_ARM, TargetArchitecture.ARM64 => EM_AARCH64, - TargetArchitecture.LoongArch64 => EM_LOONGARCH, _ => throw new NotSupportedException("Unsupported architecture") }; _useInlineRelocationAddends = _machine is EM_386 or EM_ARM; @@ -359,9 +358,6 @@ private protected override void EmitRelocations(int sectionIndex, List rel } } - private void EmitRelocationsLoongArch64(int sectionIndex, List relocationList) - { - if (relocationList.Count > 0) - { - Span relocationEntry = stackalloc byte[24]; - var relocationStream = new MemoryStream(24 * relocationList.Count); - _sections[sectionIndex].RelocationStream = relocationStream; - foreach (SymbolicRelocation symbolicRelocation in relocationList) - { - uint symbolIndex = _symbolNameToIndex[symbolicRelocation.SymbolName]; - uint type = symbolicRelocation.Type switch - { - IMAGE_REL_BASED_DIR64 => R_LARCH_64, - IMAGE_REL_BASED_HIGHLOW => R_LARCH_32, - IMAGE_REL_BASED_RELPTR32 => R_LARCH_32_PCREL, - IMAGE_REL_BASED_LOONGARCH64_PC => R_LARCH_PCALA_HI20, - IMAGE_REL_BASED_LOONGARCH64_JIR => R_LARCH_CALL36, - _ => throw new NotSupportedException("Unknown relocation type: " + symbolicRelocation.Type) - }; - - BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset); - BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type); - BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); - relocationStream.Write(relocationEntry); - - if (symbolicRelocation.Type is IMAGE_REL_BASED_LOONGARCH64_PC) - { - BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry, (ulong)symbolicRelocation.Offset + 4); - BinaryPrimitives.WriteUInt64LittleEndian(relocationEntry.Slice(8), ((ulong)symbolIndex << 32) | type + 1); - BinaryPrimitives.WriteInt64LittleEndian(relocationEntry.Slice(16), symbolicRelocation.Addend); - relocationStream.Write(relocationEntry); - } - } - } - } - private protected override void EmitSectionsAndLayout() { if (_machine == EM_ARM) @@ -801,12 +761,8 @@ private void EmitObjectFile(FileStream outputFileStream) SectionHeaderEntrySize = (ushort)ElfSectionHeader.GetSize(), SectionHeaderEntryCount = sectionCount < SHN_LORESERVE ? (ushort)sectionCount : (ushort)0u, StringTableIndex = strTabSectionIndex < SHN_LORESERVE ? (ushort)strTabSectionIndex : (ushort)SHN_XINDEX, - Flags = _machine switch - { - EM_ARM => 0x05000000u, // For ARM32 claim conformance with the EABI specification - EM_LOONGARCH => 0x43u, // For LoongArch ELF psABI specify the ABI version (1) and modifiers (64-bit GPRs, 64-bit FPRs) - _ => 0u - }, + // For ARM32 claim conformance with the EABI specification + Flags = _machine is EM_ARM ? 0x05000000u : 0u, }; elfHeader.Write(outputFileStream); From a947936a9d5e4ac107b912a4467f44b192d860e5 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Fri, 28 Jun 2024 17:45:56 +0800 Subject: [PATCH 05/10] Update EHHelpers.cpp and startup.cpp in nativeaot-runtime. --- src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 49 ------------------- src/coreclr/nativeaot/Runtime/startup.cpp | 1 - .../ExpectedIsaFeaturesRootProvider.cs | 3 +- 3 files changed, 2 insertions(+), 51 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index fc5f4dcf48c9a..de35a123e012a 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -211,7 +211,6 @@ FCIMPL3(void, RhpCopyContextFromExInfo, void * pOSContext, int32_t cbOSContext, } FCIMPLEND -#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) struct DISPATCHER_CONTEXT { uintptr_t ControlPc; @@ -273,56 +272,8 @@ EXTERN_C int32_t __stdcall RhpPInvokeExceptionGuard(PEXCEPTION_RECORD pExc return 0; } -#else -EXTERN_C int32_t RhpPInvokeExceptionGuard() -{ - ASSERT_UNCONDITIONALLY("RhpPInvokeExceptionGuard NYI for this architecture!"); - RhFailFast(); - return 0; -} -#endif -#if defined(HOST_AMD64) || defined(HOST_ARM) || defined(HOST_X86) || defined(HOST_ARM64) || defined(HOST_WASM) || defined(HOST_LOONGARCH64) FCDECL2(void, RhpThrowHwEx, int exceptionCode, TADDR faultingIP); -#else -FCIMPL0(void, RhpThrowHwEx) -{ - ASSERT_UNCONDITIONALLY("RhpThrowHwEx NYI for this architecture!"); -} -FCIMPLEND -FCIMPL0(void, RhpThrowEx) -{ - ASSERT_UNCONDITIONALLY("RhpThrowEx NYI for this architecture!"); -} -FCIMPLEND -FCIMPL0(void, RhpCallCatchFunclet) -{ - ASSERT_UNCONDITIONALLY("RhpCallCatchFunclet NYI for this architecture!"); -} -FCIMPLEND -FCIMPL0(void, RhpCallFinallyFunclet) -{ - ASSERT_UNCONDITIONALLY("RhpCallFinallyFunclet NYI for this architecture!"); -} -FCIMPLEND -FCIMPL0(void, RhpCallFilterFunclet) -{ - ASSERT_UNCONDITIONALLY("RhpCallFilterFunclet NYI for this architecture!"); -} -FCIMPLEND -FCIMPL0(void, RhpRethrow) -{ - ASSERT_UNCONDITIONALLY("RhpRethrow NYI for this architecture!"); -} -FCIMPLEND - -EXTERN_C void* RhpCallCatchFunclet2 = NULL; -EXTERN_C void* RhpCallFinallyFunclet2 = NULL; -EXTERN_C void* RhpCallFilterFunclet2 = NULL; -EXTERN_C void* RhpThrowEx2 = NULL; -EXTERN_C void* RhpThrowHwEx2 = NULL; -EXTERN_C void* RhpRethrow2 = NULL; -#endif EXTERN_C CODE_LOCATION RhpAssignRefAVLocation; #if defined(HOST_X86) diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 116fd40d65a5c..c43bf0ffd57c7 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -55,7 +55,6 @@ int g_cpuFeatures = 0; // This field is defined in the generated code and sets the ISA expectations. EXTERN_C int g_requiredCpuFeatures; -int g_requiredCpuFeatures = 0; #endif #ifdef TARGET_UNIX diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs index 90f7b77a4f76f..1b656a168df16 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -20,7 +20,8 @@ void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootPr { if (_isaSupport.Architecture == TargetArchitecture.X64 || _isaSupport.Architecture == TargetArchitecture.X86 - || _isaSupport.Architecture == TargetArchitecture.ARM64) + || _isaSupport.Architecture == TargetArchitecture.ARM64 + || _isaSupport.Architecture == TargetArchitecture.LoongArch64) { int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); From 9d378f9a60bb2d383e7f4fe4f77a5272cf0632c4 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Mon, 1 Jul 2024 11:25:46 +0800 Subject: [PATCH 06/10] Revert src/coreclr/nativeaot/Runtime/startup.cpp. --- src/coreclr/nativeaot/Runtime/startup.cpp | 6 +++--- .../Compiler/ExpectedIsaFeaturesRootProvider.cs | 3 +-- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index c43bf0ffd57c7..f87bc947d970a 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -48,7 +48,7 @@ static bool DetectCPUFeatures(); extern RhConfig * g_pRhConfig; -#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) // This field is inspected from the generated code to determine what intrinsics are available. EXTERN_C int g_cpuFeatures; int g_cpuFeatures = 0; @@ -177,7 +177,7 @@ static bool InitDLL(HANDLE hPalInstance) bool DetectCPUFeatures() { -#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#if defined(HOST_X86) || defined(HOST_AMD64) || defined(HOST_ARM64) g_cpuFeatures = minipal_getcpufeatures(); if ((g_cpuFeatures & g_requiredCpuFeatures) != g_requiredCpuFeatures) @@ -185,7 +185,7 @@ bool DetectCPUFeatures() PalPrintFatalError("\nThe required instruction sets are not supported by the current CPU.\n"); RhFailFast(); } -#endif // HOST_X86|| HOST_AMD64 || HOST_ARM64 || HOST_LOONGARCH64 +#endif // HOST_X86|| HOST_AMD64 || HOST_ARM64 return true; } diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs index 1b656a168df16..90f7b77a4f76f 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/ExpectedIsaFeaturesRootProvider.cs @@ -20,8 +20,7 @@ void ICompilationRootProvider.AddCompilationRoots(IRootingServiceProvider rootPr { if (_isaSupport.Architecture == TargetArchitecture.X64 || _isaSupport.Architecture == TargetArchitecture.X86 - || _isaSupport.Architecture == TargetArchitecture.ARM64 - || _isaSupport.Architecture == TargetArchitecture.LoongArch64) + || _isaSupport.Architecture == TargetArchitecture.ARM64) { int isaFlags = HardwareIntrinsicHelpers.GetRuntimeRequiredIsaFlags(_isaSupport); byte[] bytes = BitConverter.GetBytes(isaFlags); From 809b6386ad54e0ddbebf869bddf08aa31d0b358a Mon Sep 17 00:00:00 2001 From: sunlijun Date: Tue, 2 Jul 2024 10:59:24 +0800 Subject: [PATCH 07/10] Revert src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h --- .../nativeaot/Runtime/unix/PalRedhawkInline.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h index 0b62e08d558ff..983f17a36aba0 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkInline.h @@ -87,26 +87,14 @@ FORCEINLINE int64_t PalInterlockedCompareExchange64(_Inout_ int64_t volatile *pD return result; } -#if defined(HOST_AMD64) || defined(HOST_ARM64) || defined(HOST_LOONGARCH64) +#if defined(HOST_AMD64) || defined(HOST_ARM64) FORCEINLINE uint8_t PalInterlockedCompareExchange128(_Inout_ int64_t volatile *pDst, int64_t iValueHigh, int64_t iValueLow, int64_t *pComparandAndResult) { -#if defined(HOST_LOONGARCH64) - int64_t iResult0 = __sync_val_compare_and_swap(pDst, pComparandAndResult[0], iValueLow); - int64_t iResult1 = __sync_val_compare_and_swap(pDst+1, pComparandAndResult[1], iValueHigh); - - uint8_t ret = pComparandAndResult[0] == iResult0; - pComparandAndResult[0] = iResult0; - ret &= pComparandAndResult[1] == iResult1; - pComparandAndResult[1] = iResult1; - - return ret; -#else __int128_t iComparand = ((__int128_t)pComparandAndResult[1] << 64) + (uint64_t)pComparandAndResult[0]; __int128_t iResult = __sync_val_compare_and_swap((__int128_t volatile*)pDst, iComparand, ((__int128_t)iValueHigh << 64) + (uint64_t)iValueLow); PalInterlockedOperationBarrier(); pComparandAndResult[0] = (int64_t)iResult; pComparandAndResult[1] = (int64_t)(iResult >> 64); return iComparand == iResult; -#endif } #endif // HOST_AMD64 From c60ed12fca326f15269495101baeb67ae5930ef5 Mon Sep 17 00:00:00 2001 From: Sun Lijun Date: Wed, 3 Jul 2024 15:54:44 +0800 Subject: [PATCH 08/10] Update src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h Co-authored-by: Jan Kotas --- src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index be655a6470c3b..d72b9d9f8e3d7 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -111,7 +111,7 @@ struct PAL_LIMITED_CONTEXT uintptr_t SP; uintptr_t IP; - uint64_t F[32 - 24]; // Only the F registers F24..F31 needs to be preserved + uint64_t F[32 - 24]; // Only the F registers F24..F31 need to be preserved // (F0-F23 are not preserved according to the ABI spec). From 474d483efe8326f64106b9a18f9778f6b3e538bb Mon Sep 17 00:00:00 2001 From: sunlijun Date: Thu, 4 Jul 2024 09:47:40 +0800 Subject: [PATCH 09/10] Update MiscHelpers.cpp and PalRedhawk.h in nativeaot-runtime --- src/coreclr/nativeaot/Runtime/MiscHelpers.cpp | 10 +- src/coreclr/nativeaot/Runtime/PalRedhawk.h | 91 ++++++++----------- 2 files changed, 42 insertions(+), 59 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp index 30a9489d4b4e7..3351326ad3071 100644 --- a/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/MiscHelpers.cpp @@ -358,16 +358,16 @@ FCIMPL1(uint8_t *, RhGetCodeTarget, uint8_t * pCodeOrg) return *pIatCell; } // is this an unboxing stub followed by a relative jump? - // pcalau12i $r21, imm20; jirl $r0, $r21, imm16 + // pcaddu18i $r21, imm20; jirl $r0, $r21, imm16 else if (unboxingStub && - (pCode[0] & 0xfe00001f) == 0x1a000015 && + (pCode[0] & 0xfe00001f) == 0x1e000015 && (pCode[1] & 0xfc0003ff) == 0x4c0002a0) { // relative jump - dist is relative to the instruction - // offset = SignExtend(immhi10:immlo16:'00', 64); - int64_t distToTarget = ((((int64_t)pCode[0] & ~0x1f) << 39) >> 32); + // offset = SignExtend(immhi20:immlo16:'00', 64); + int64_t distToTarget = ((((int64_t)pCode[0] & ~0x1f) << 39) >> 26); distToTarget += ((((int64_t)pCode[1] & ~0x3ff) << 38) >> 46); - return (uint8_t *)(((int64_t)pCode & ~0xfff) + distToTarget); + return (uint8_t *)((int64_t)pCode + distToTarget); } #else UNREFERENCED_PARAMETER(unboxingStub); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index 0ca8c4084fe96..16b719a6cb43c 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -450,7 +450,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { #elif defined(HOST_LOONGARCH64) -#define CONTEXT_LOONGARCH64 0x00400000L +#define CONTEXT_LOONGARCH64 0x00800000L #define CONTEXT_CONTROL (CONTEXT_LOONGARCH64 | 0x1L) #define CONTEXT_INTEGER (CONTEXT_LOONGARCH64 | 0x2L) @@ -471,63 +471,46 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { // // Integer registers // - uint32_t Csr; // NZVF + DAIF + CurrentEL + SPSel - union { - struct { - uint64_t R0; - uint64_t R2; - uint64_t R4; - uint64_t R5; - uint64_t R6; - uint64_t R7; - uint64_t R8; - uint64_t R9; - uint64_t R10; - uint64_t R11; - uint64_t R12; - uint64_t R13; - uint64_t R14; - uint64_t R15; - uint64_t R16; - uint64_t R17; - uint64_t R18; - uint64_t R19; - uint64_t R20; - uint64_t R21; - uint64_t R23; - uint64_t R24; - uint64_t R25; - uint64_t R26; - uint64_t R27; - uint64_t R28; - uint64_t R29; - uint64_t R30; - uint64_t R31; -#pragma warning(push) -#pragma warning(disable:4201) // nameless struct - }; - uint64_t R[29]; - }; -#pragma warning(pop) - uint64_t Fp; // R22 - uint64_t Ra; // R1 - uint64_t Sp; // R3 + uint64_t R0; + uint64_t Ra; + uint64_t R2; + uint64_t Sp; + uint64_t R4; + uint64_t R5; + uint64_t R6; + uint64_t R7; + uint64_t R8; + uint64_t R9; + uint64_t R10; + uint64_t R11; + uint64_t R12; + uint64_t R13; + uint64_t R14; + uint64_t R15; + uint64_t R16; + uint64_t R17; + uint64_t R18; + uint64_t R19; + uint64_t R20; + uint64_t R21; + uint64_t Fp; + uint64_t R23; + uint64_t R24; + uint64_t R25; + uint64_t R26; + uint64_t R27; + uint64_t R28; + uint64_t R29; + uint64_t R30; + uint64_t R31; uint64_t Pc; // - // Floating Point Registers - // - uint64_t F[32]; - uint32_t Fpcr; - uint32_t Fpsr; - - // - // Debug registers + // Floating Point Registers: FPR64/LSX/LASX. // - uint32_t Bcr[LOONGARCH64_MAX_BREAKPOINTS]; - uint64_t Bvr[LOONGARCH64_MAX_BREAKPOINTS]; - uint32_t Wcr[LOONGARCH64_MAX_WATCHPOINTS]; - uint64_t Wvr[LOONGARCH64_MAX_WATCHPOINTS]; + uint64_t F[4*32]; + uint64_t Fcc; + uint32_t Fcsr; void SetIp(uintptr_t ip) { Pc = ip; } void SetArg0Reg(uintptr_t val) { R4 = val; } From c8ab84bfb6c2832cc8bac26c04c1e46391d1c8a2 Mon Sep 17 00:00:00 2001 From: sunlijun Date: Fri, 5 Jul 2024 09:16:54 +0800 Subject: [PATCH 10/10] Update src/coreclr/nativeaot/Runtime/gcenv.ee.cpp --- src/coreclr/nativeaot/Runtime/gcenv.ee.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp index 450df71d5a889..f041e499c11d4 100644 --- a/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp +++ b/src/coreclr/nativeaot/Runtime/gcenv.ee.cpp @@ -56,7 +56,7 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) { FireEtwGCRestartEEBegin_V1(GetClrInstanceId()); -#if defined(TARGET_ARM) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) +#if !defined(TARGET_X86) && !defined(TARGET_AMD64) // Flush the store buffers on all CPUs, to ensure that they all see changes made // by the GC threads. This only matters on weak memory ordered processors as // the strong memory ordered processors wouldn't have reordered the relevant reads. @@ -64,7 +64,7 @@ void GCToEEInterface::RestartEE(bool /*bFinishedGC*/) // the runtime was suspended and that will return to cooperative mode after the runtime // is restarted. ::FlushProcessWriteBuffers(); -#endif //TARGET_ARM || TARGET_ARM64 +#endif // !defined(TARGET_X86) && !defined(TARGET_AMD64) SyncClean::CleanUp();