Skip to content

Commit

Permalink
[RISC-V] Enable On Stack Replacement (dotnet#96558)
Browse files Browse the repository at this point in the history
* [RISC-V] Implement On Stack Replacement

Note: Pinned local test is failing.

* [RISC-V] Apply suggestions from code review

Co-authored-by: Tomasz Sowiński <[email protected]>

* [RISC-V] apply jit-format

* [RISC-V] Cosmetic changes after code review

* [RISC-V] Changes assuming memory page is always equal 4KiB

* [RISC-V] Remove stack probing

* [RISC-V] Replace GetEmitter() with emit

* [RISC-V] Sync frame type 1 in genPushCalleeSavedRegisters with genPopCalleeSavedRegisters

* [RISC-V] Fix assembly emmited by genStackProbe

* [RISC-V] Apply jit-formatter

---------

Co-authored-by: Tomasz Sowiński <[email protected]>
  • Loading branch information
2 people authored and tmds committed Jan 23, 2024
1 parent d5d7660 commit 0552936
Show file tree
Hide file tree
Showing 10 changed files with 989 additions and 547 deletions.
4 changes: 2 additions & 2 deletions src/coreclr/clrdefinitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,9 @@ endif(FEATURE_ENABLE_NO_ADDRESS_SPACE_RANDOMIZATION)
add_definitions(-DFEATURE_SVR_GC)
add_definitions(-DFEATURE_SYMDIFF)
add_compile_definitions(FEATURE_TIERED_COMPILATION)
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
if (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
add_compile_definitions(FEATURE_ON_STACK_REPLACEMENT)
endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64)
endif (CLR_CMAKE_TARGET_ARCH_AMD64 OR CLR_CMAKE_TARGET_ARCH_ARM64 OR CLR_CMAKE_TARGET_ARCH_LOONGARCH64 OR CLR_CMAKE_TARGET_ARCH_RISCV64)
add_compile_definitions(FEATURE_PGO)
if (CLR_CMAKE_TARGET_WIN32)
add_definitions(-DFEATURE_TYPEEQUIVALENCE)
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -569,11 +569,11 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_HillClimbing_GainExponent,
#endif // _DEBUG
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TieredCompilation, W("TieredCompilation"), 1, "Enables tiered compilation")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_QuickJit, W("TC_QuickJit"), 1, "For methods that would be jitted, enable using quick JIT when appropriate.")
#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 1, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64))
#else // !(defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)) || defined(TARGET_RISCV64)
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TC_QuickJitForLoops, W("TC_QuickJitForLoops"), 0, "When quick JIT is enabled, quick JIT may also be used for methods that contain loops.")
#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_AggressiveTiering, W("TC_AggressiveTiering"), 0, "Transition through tiers aggressively.")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountThreshold, W("TC_CallCountThreshold"), TC_CallCountThreshold, "Number of times a method must be called in tier 0 after which it is promoted to the next tier.")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_TC_CallCountingDelayMs, W("TC_CallCountingDelayMs"), TC_CallCountingDelayMs, "A perpetual delay in milliseconds that is applied to call counting in tier 0 and jitting at higher tiers, while there is startup-like activity.")
Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -265,7 +265,7 @@ class CodeGen final : public CodeGenInterface
void genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbered, RegState* regState);
#endif
void genEnregisterIncomingStackArgs();
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed);
#else
void genEnregisterOSRArgsAndLocals();
Expand Down Expand Up @@ -345,6 +345,10 @@ class CodeGen final : public CodeGenInterface
void genOSRSaveRemainingCalleeSavedRegisters();
#endif // TARGET_AMD64

#if defined(TARGET_RISCV64)
void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
#endif

void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);

void genPoisonFrame(regMaskTP bbRegLiveIn);
Expand Down Expand Up @@ -450,11 +454,11 @@ class CodeGen final : public CodeGenInterface
regMaskTP fiSaveRegs; // Set of callee-saved registers saved in the funclet prolog (includes RA)
int fiFunction_CallerSP_to_FP_delta; // Delta between caller SP and the frame pointer in the parent function
// (negative)
int fiSP_to_FPRA_save_delta; // FP/RA register save offset from SP (positive)
int fiSP_to_CalleeSaved_delta; // CalleeSaved register save offset from SP (positive)
int fiCalleeSavedPadding; // CalleeSaved offset padding (positive)
int fiSP_to_PSP_slot_delta; // PSP slot offset from SP (positive)
int fiCallerSP_to_PSP_slot_delta; // PSP slot offset from Caller SP (negative)
int fiFrameType; // Funclet frame types are numbered. See genFuncletProlog() for details.
int fiSpDelta1; // Stack pointer delta 1 (negative)
int fiSpDelta; // Stack pointer delta (negative)
};

FuncletFrameInfoDsc genFuncletInfo;
Expand Down
30 changes: 21 additions & 9 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4750,7 +4750,7 @@ void CodeGen::genZeroInitFrame(int untrLclHi, int untrLclLo, regNumber initReg,
// initReg -- scratch register to use if needed
// pInitRegZeroed -- [IN,OUT] if init reg is zero (on entry/exit)
//
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
void CodeGen::genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed)
#else
void CodeGen::genEnregisterOSRArgsAndLocals()
Expand Down Expand Up @@ -4891,7 +4891,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals()

GetEmitter()->emitIns_R_AR(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset);

#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#elif defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

// Patchpoint offset is from top of Tier0 frame
//
Expand Down Expand Up @@ -4923,7 +4923,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals()

genInstrWithConstant(ins_Load(lclTyp), size, varDsc->GetRegNum(), genFramePointerReg(), offset, initReg);
*pInitRegZeroed = false;
#endif
#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
}
}

Expand Down Expand Up @@ -5530,7 +5530,7 @@ void CodeGen::genFnProlog()
psiBegProlog();
}

#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
// For arm64 OSR, emit a "phantom prolog" to account for the actions taken
// in the tier0 frame that impact FP and SP on entry to the OSR method.
//
Expand All @@ -5545,7 +5545,7 @@ void CodeGen::genFnProlog()
// SP is tier0 method's SP.
compiler->unwindAllocStack(tier0FrameSize);
}
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

#ifdef DEBUG

Expand Down Expand Up @@ -5875,13 +5875,25 @@ void CodeGen::genFnProlog()
{
initReg = REG_SCRATCH;
}
#elif defined(TARGET_RISCV64)
// For RISC-V64 OSR root frames, we may need a scratch register for large
// offset addresses. Use a register that won't be allocated.
if (isRoot && compiler->opts.IsOSR())
{
initReg = REG_SCRATCH; // REG_T0
}
#endif

#ifndef TARGET_LOONGARCH64
#if !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
// For LoongArch64's OSR root frames, we may need a scratch register for large
// offset addresses. But this does not conflict with the REG_PINVOKE_FRAME.
//
// RISC-V64's OSR root frames are similar to LoongArch64's. In this case
// REG_SCRATCH also shouldn't conflict with REG_PINVOKE_FRAME, even if
// technically they are the same register - REG_T0.
//
noway_assert(!compiler->compMethodRequiresPInvokeFrame() || (initReg != REG_PINVOKE_FRAME));
#endif
#endif // !TARGET_LOONGARCH64 && !TARGET_RISCV64

#if defined(TARGET_AMD64)
// If we are a varargs call, in order to set up the arguments correctly this
Expand Down Expand Up @@ -6192,7 +6204,7 @@ void CodeGen::genFnProlog()
// Otherwise we'll do some of these fetches twice.
//
CLANG_FORMAT_COMMENT_ANCHOR;
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64)
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
genEnregisterOSRArgsAndLocals(initReg, &initRegZeroed);
#else
genEnregisterOSRArgsAndLocals();
Expand Down Expand Up @@ -6250,7 +6262,7 @@ void CodeGen::genFnProlog()
assignIncomingRegisterArgs(&intRegState);
#else
assignIncomingRegisterArgs(&intRegState);
#endif
#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64

#endif // TARGET_LOONGARCH64 || TARGET_RISCV64

Expand Down
Loading

0 comments on commit 0552936

Please sign in to comment.