Skip to content

Commit

Permalink
Fix build breaks
Browse files Browse the repository at this point in the history
  • Loading branch information
jkotas committed Oct 8, 2024
1 parent de3cc06 commit 4f82273
Show file tree
Hide file tree
Showing 6 changed files with 13 additions and 263 deletions.
6 changes: 0 additions & 6 deletions src/coreclr/vm/arm64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -506,12 +506,6 @@ class StubLinkerCPU : public StubLinker
void EmitLoadRegReg(IntReg Xt, IntReg Xn, IntReg Xm, DWORD option);

void EmitCallRegister(IntReg reg);
void EmitProlog(unsigned short cIntRegArgs,
unsigned short cVecRegArgs,
unsigned short cCalleeSavedRegs,
unsigned short cbStackSpace = 0);

void EmitEpilog();

void EmitRet(IntReg reg);

Expand Down
137 changes: 0 additions & 137 deletions src/coreclr/vm/arm64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1174,143 +1174,6 @@ void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
Emit32((DWORD) (0x3587C0<<10 | regTarget<<5));
}

void StubLinkerCPU::EmitProlog(unsigned short cIntRegArgs, unsigned short cVecRegArgs, unsigned short cCalleeSavedRegs, unsigned short cbStackSpace)
{

_ASSERTE(!m_fProlog);

unsigned short numberOfEntriesOnStack = 2 + cIntRegArgs + cVecRegArgs + cCalleeSavedRegs; // 2 for fp, lr

// Stack needs to be 16 byte (2 qword) aligned. Compute the required padding before saving it
unsigned short totalPaddedFrameSize = static_cast<unsigned short>(ALIGN_UP(cbStackSpace + numberOfEntriesOnStack *sizeof(void*), 2*sizeof(void*)));
// The padding is going to be applied to the local stack
cbStackSpace = totalPaddedFrameSize - numberOfEntriesOnStack *sizeof(void*);

// Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
DescribeProlog(cIntRegArgs, cVecRegArgs, cCalleeSavedRegs, cbStackSpace);



// N.B Despite the range of a jump with a sub sp is 4KB, we're limiting to 504 to save from emitting right prolog that's
// expressable in unwind codes efficiently. The largest offset in typical unwindinfo encodings that we use is 504.
// so allocations larger than 504 bytes would require setting the SP in multiple strides, which would complicate both
// prolog and epilog generation as well as unwindinfo generation.
_ASSERTE((totalPaddedFrameSize <= 504) && "NYI:ARM64 Implement StubLinker prologs with larger than 504 bytes of frame size");
if (totalPaddedFrameSize > 504)
COMPlusThrow(kNotSupportedException);

// Here is how the stack would look like (Stack grows up)
// [Low Address]
// +------------+
// SP -> | | <-+
// : : | Stack Frame, (i.e outgoing arguments) including padding
// | | <-+
// +------------+
// | FP |
// +------------+
// | LR |
// +------------+
// | X19 | <-+
// +------------+ |
// : : | Callee-saved registers
// +------------+ |
// | X28 | <-+
// +------------+
// | V0 | <-+
// +------------+ |
// : : | Vec Args
// +------------+ |
// | V7 | <-+
// +------------+
// | X0 | <-+
// +------------+ |
// : : | Int Args
// +------------+ |
// | X7 | <-+
// +------------+
// Old SP -> |[Stack Args]|
// [High Address]



// Regarding the order of operations in the prolog and epilog;
// If the prolog and the epilog matches each other we can simplify emitting the unwind codes and save a few
// bytes of unwind codes by making prolog and epilog share the same unwind codes.
// In order to do that we need to make the epilog be the reverse of the prolog.
// But we wouldn't want to add restoring of the argument registers as that's completely unnecessary.
// Besides, saving argument registers cannot be expressed by the unwind code encodings.
// So, we'll push saving the argument registers to the very last in the prolog, skip restoring it in epilog,
// and also skip reporting it to the OS.
//
// Another bit that we can save is resetting the frame pointer.
// This is not necessary when the SP doesn't get modified beyond prolog and epilog. (i.e no alloca/localloc)
// And in that case we don't need to report setting up the FP either.



// 1. Relocate SP
EmitSubImm(RegSp, RegSp, totalPaddedFrameSize);

unsigned cbOffset = 2*sizeof(void*) + cbStackSpace; // 2 is for fp,lr

// 2. Store callee-saved registers
_ASSERTE(cCalleeSavedRegs <= 10);
for (unsigned short i=0; i<(cCalleeSavedRegs/2)*2; i+=2)
EmitLoadStoreRegPairImm(eSTORE, IntReg(19+i), IntReg(19+i+1), RegSp, cbOffset + i*sizeof(void*));
if ((cCalleeSavedRegs %2) ==1)
EmitLoadStoreRegImm(eSTORE, IntReg(cCalleeSavedRegs-1), RegSp, cbOffset + (cCalleeSavedRegs-1)*sizeof(void*));

// 3. Store FP/LR
EmitLoadStoreRegPairImm(eSTORE, RegFp, RegLr, RegSp, cbStackSpace);

// 4. Set the frame pointer
EmitMovReg(RegFp, RegSp);

// 5. Store floating point argument registers
cbOffset += cCalleeSavedRegs*sizeof(void*);
_ASSERTE(cVecRegArgs <= 8);
for (unsigned short i=0; i<(cVecRegArgs/2)*2; i+=2)
EmitLoadStoreRegPairImm(eSTORE, VecReg(i), VecReg(i+1), RegSp, cbOffset + i*sizeof(void*));
if ((cVecRegArgs % 2) == 1)
EmitLoadStoreRegImm(eSTORE, VecReg(cVecRegArgs-1), RegSp, cbOffset + (cVecRegArgs-1)*sizeof(void*));

// 6. Store int argument registers
cbOffset += cVecRegArgs*sizeof(void*);
_ASSERTE(cIntRegArgs <= 8);
for (unsigned short i=0 ; i<(cIntRegArgs/2)*2; i+=2)
EmitLoadStoreRegPairImm(eSTORE, IntReg(i), IntReg(i+1), RegSp, cbOffset + i*sizeof(void*));
if ((cIntRegArgs % 2) == 1)
EmitLoadStoreRegImm(eSTORE,IntReg(cIntRegArgs-1), RegSp, cbOffset + (cIntRegArgs-1)*sizeof(void*));
}

void StubLinkerCPU::EmitEpilog()
{
_ASSERTE(m_fProlog);

// 6. Restore int argument registers
// nop: We don't need to. They are scratch registers

// 5. Restore floating point argument registers
// nop: We don't need to. They are scratch registers

// 4. Restore the SP from FP
// N.B. We're assuming that the stublinker stubs doesn't do alloca, hence nop

// 3. Restore FP/LR
EmitLoadStoreRegPairImm(eLOAD, RegFp, RegLr, RegSp, m_cbStackSpace);

// 2. restore the calleeSavedRegisters
unsigned cbOffset = 2*sizeof(void*) + m_cbStackSpace; // 2 is for fp,lr
if ((m_cCalleeSavedRegs %2) ==1)
EmitLoadStoreRegImm(eLOAD, IntReg(m_cCalleeSavedRegs-1), RegSp, cbOffset + (m_cCalleeSavedRegs-1)*sizeof(void*));
for (int i=(m_cCalleeSavedRegs/2)*2-2; i>=0; i-=2)
EmitLoadStoreRegPairImm(eLOAD, IntReg(19+i), IntReg(19+i+1), RegSp, cbOffset + i*sizeof(void*));

// 1. Restore SP
EmitAddImm(RegSp, RegSp, GetStackFrameSize());
EmitRet(RegLr);
}

void StubLinkerCPU::EmitRet(IntReg Xn)
{
// Encoding: 1101011001011111000000| Rn |00000
Expand Down
3 changes: 0 additions & 3 deletions src/coreclr/vm/riscv64/cgencpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -411,9 +411,6 @@ class StubLinkerCPU : public StubLinker
void EmitLoad(FloatReg dest, IntReg srcAddr, int offset = 0);
void EmitStore(IntReg src, IntReg destAddr, int offset = 0);
void EmitStore(FloatReg src, IntReg destAddr, int offset = 0);

void EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace = 0);
void EmitEpilog();
};


Expand Down
110 changes: 0 additions & 110 deletions src/coreclr/vm/riscv64/stubs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1115,116 +1115,6 @@ void StubLinkerCPU::EmitJumpRegister(IntReg regTarget)
Emit32(0x00000067 | (regTarget << 15));
}

void StubLinkerCPU::EmitProlog(unsigned short cIntRegArgs, unsigned short cFpRegArgs, unsigned short cbStackSpace)
{
_ASSERTE(!m_fProlog);

unsigned short numberOfEntriesOnStack = 2 + cIntRegArgs + cFpRegArgs; // 2 for fp, ra

// Stack needs to be 16 byte aligned. Compute the required padding before saving it
unsigned short totalPaddedFrameSize = static_cast<unsigned short>(ALIGN_UP(cbStackSpace + numberOfEntriesOnStack * sizeof(void*), 2 * sizeof(void*)));
// The padding is going to be applied to the local stack
cbStackSpace = totalPaddedFrameSize - numberOfEntriesOnStack * sizeof(void*);

// Record the parameters of this prolog so that we can generate a matching epilog and unwind info.
DescribeProlog(cIntRegArgs, cFpRegArgs, cbStackSpace);


// N.B Despite the range of a jump with a sub sp is 4KB, we're limiting to 504 to save from emitting right prolog that's
// expressable in unwind codes efficiently. The largest offset in typical unwindinfo encodings that we use is 504.
// so allocations larger than 504 bytes would require setting the SP in multiple strides, which would complicate both
// prolog and epilog generation as well as unwindinfo generation.
_ASSERTE((totalPaddedFrameSize <= 504) && "NYI:RISCV64 Implement StubLinker prologs with larger than 504 bytes of frame size");
if (totalPaddedFrameSize > 504)
COMPlusThrow(kNotSupportedException);

// Here is how the stack would look like (Stack grows up)
// [Low Address]
// +------------+
// SP -> | | <-+
// : : | Stack Frame, (i.e outgoing arguments) including padding
// | | <-+
// +------------+
// | FP |
// +------------+
// | RA |
// +------------+
// | F10 | <-+
// +------------+ |
// : : | Fp Args
// +------------+ |
// | F17 | <-+
// +------------+
// | X10 | <-+
// +------------+ |
// : : | Int Args
// +------------+ |
// | X17 | <-+
// +------------+
// Old SP -> |[Stack Args]|
// [High Address]

// Regarding the order of operations in the prolog and epilog;
// If the prolog and the epilog matches each other we can simplify emitting the unwind codes and save a few
// bytes of unwind codes by making prolog and epilog share the same unwind codes.
// In order to do that we need to make the epilog be the reverse of the prolog.
// But we wouldn't want to add restoring of the argument registers as that's completely unnecessary.
// Besides, saving argument registers cannot be expressed by the unwind code encodings.
// So, we'll push saving the argument registers to the very last in the prolog, skip restoring it in epilog,
// and also skip reporting it to the OS.
//
// Another bit that we can save is resetting the frame pointer.
// This is not necessary when the SP doesn't get modified beyond prolog and epilog. (i.e no alloca/localloc)
// And in that case we don't need to report setting up the FP either.

// 1. Relocate SP
EmitSubImm(RegSp, RegSp, totalPaddedFrameSize);

unsigned cbOffset = 2 * sizeof(void*) + cbStackSpace; // 2 is for fp, ra

// 2. Store FP/RA
EmitStore(RegFp, RegSp, cbStackSpace);
EmitStore(RegRa, RegSp, cbStackSpace + sizeof(void*));

// 3. Set the frame pointer
EmitMovReg(RegFp, RegSp);

// 4. Store floating point argument registers
_ASSERTE(cFpRegArgs <= 8);
for (unsigned short i = 0; i < cFpRegArgs; i++)
EmitStore(FloatReg(i + 10), RegSp, cbOffset + i * sizeof(void*));

// 5. Store int argument registers
cbOffset += cFpRegArgs * sizeof(void*);
_ASSERTE(cIntRegArgs <= 8);
for (unsigned short i = 0 ; i < cIntRegArgs; i++)
EmitStore(IntReg(i + 10), RegSp, cbOffset + i * sizeof(void*));
}

void StubLinkerCPU::EmitEpilog()
{
_ASSERTE(m_fProlog);

// 5. Restore int argument registers
// nop: We don't need to. They are scratch registers

// 4. Restore floating point argument registers
// nop: We don't need to. They are scratch registers

// 3. Restore the SP from FP
// N.B. We're assuming that the stublinker stubs doesn't do alloca, hence nop

// 2. Restore FP/RA
EmitLoad(RegFp, RegSp, m_cbStackSpace);
EmitLoad(RegRa, RegSp, m_cbStackSpace + sizeof(void*));

// 1. Restore SP
EmitAddImm(RegSp, RegSp, GetStackFrameSize());

// jalr x0, 0(ra)
EmitJumpRegister(RegRa);
}

// Instruction types as per RISC-V Spec, Chapter 24 RV32/64G Instruction Set Listings
static unsigned ITypeInstr(unsigned opcode, unsigned funct3, unsigned rd, unsigned rs1, int imm12)
{
Expand Down
6 changes: 0 additions & 6 deletions src/coreclr/vm/stublink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,6 @@ StubLinker::StubLinker()
m_cbStackFrame = 0;
m_fPushArgRegs = FALSE;
#endif
#ifdef TARGET_RISCV64
m_fProlog = FALSE;
m_cIntRegArgs = 0;
m_cFpRegArgs = 0;
m_cbStackSpace = 0;
#endif
}


Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/vm/stublink.h
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ class StubLinker

void SetDataOnly(BOOL fDataOnly = TRUE) { LIMITED_METHOD_CONTRACT; m_fDataOnly = fDataOnly; }

public:
#ifdef TARGET_ARM
void DescribeProlog(UINT cCalleeSavedRegs, UINT cbStackFrame, BOOL fPushArgRegs);
#endif

public:

//---------------------------------------------------------------
// Generate the actual stub. The returned stub has a refcount of 1.
Expand All @@ -190,6 +194,14 @@ class StubLinker
// internals.
BOOL m_fDataOnly; // the stub contains only data - does not need FlushInstructionCache

#ifdef TARGET_ARM
protected:
BOOL m_fProlog; // True if DescribeProlog has been called
UINT m_cCalleeSavedRegs; // Count of callee saved registers (0 == none, 1 == r4, 2 ==
// r4-r5 etc. up to 8 == r4-r11)
UINT m_cbStackFrame; // Count of bytes in the stack frame (excl of saved regs)
BOOL m_fPushArgRegs; // If true, r0-r3 are saved before callee saved regs
#endif // TARGET_ARM

CodeRun *AppendNewEmptyCodeRun();

Expand Down

0 comments on commit 4f82273

Please sign in to comment.