Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix regressions due to pipeline stalls introduced in #59415 #59497

Merged
merged 12 commits into from
Sep 23, 2021
Merged
10 changes: 5 additions & 5 deletions src/coreclr/jit/_typeinfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -405,8 +405,8 @@ class typeInfo
}

unsigned type = li.m_flags & TI_FLAG_DATA_MASK;
assert(TI_ERROR <
TI_ONLY_ENUM); // TI_ERROR looks like it needs more than enum. This optimises the success case a bit
assert(TI_ERROR < TI_ONLY_ENUM); // TI_ERROR looks like it needs more than enum. This optimises the success
// case a bit
if (type > TI_ONLY_ENUM)
{
return true;
Expand Down Expand Up @@ -664,9 +664,9 @@ class typeInfo
// as primitives
bool IsValueClassWithClsHnd() const
{
if ((GetType() == TI_STRUCT) ||
(m_cls && GetType() != TI_REF && GetType() != TI_METHOD &&
GetType() != TI_ERROR)) // necessary because if byref bit is set, we return TI_ERROR)
if ((GetType() == TI_STRUCT) || (m_cls && GetType() != TI_REF && GetType() != TI_METHOD &&
GetType() != TI_ERROR)) // necessary because if byref bit is set, we return
// TI_ERROR)
{
return true;
}
Expand Down
12 changes: 6 additions & 6 deletions src/coreclr/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2416,8 +2416,8 @@ AssertionInfo Compiler::optAssertionGenJtrue(GenTree* tree)
ValueNum op1VN = vnStore->VNConservativeNormalValue(op1->gtVNPair);
ValueNum op2VN = vnStore->VNConservativeNormalValue(op2->gtVNPair);
// If op1 is lcl and op2 is const or lcl, create assertion.
if ((op1->gtOper == GT_LCL_VAR) &&
((op2->OperKind() & GTK_CONST) || (op2->gtOper == GT_LCL_VAR))) // Fix for Dev10 851483
if ((op1->gtOper == GT_LCL_VAR) && ((op2->OperKind() & GTK_CONST) || (op2->gtOper == GT_LCL_VAR))) // Fix for Dev10
// 851483
{
return optCreateJtrueAssertions(op1, op2, assertionKind);
}
Expand Down Expand Up @@ -2741,10 +2741,10 @@ AssertionIndex Compiler::optAssertionIsSubrange(GenTree* tree, IntegralRange ran
for (AssertionIndex index = 1; index <= optAssertionCount; index++)
{
AssertionDsc* curAssertion = optGetAssertion(index);
if ((optLocalAssertionProp ||
BitVecOps::IsMember(apTraits, assertions, index - 1)) && // either local prop or use propagated assertions
(curAssertion->assertionKind == OAK_SUBRANGE) &&
(curAssertion->op1.kind == O1K_LCLVAR))
if ((optLocalAssertionProp || BitVecOps::IsMember(apTraits, assertions, index - 1)) && // either local prop or
// use propagated
// assertions
(curAssertion->assertionKind == OAK_SUBRANGE) && (curAssertion->op1.kind == O1K_LCLVAR))
{
// For local assertion prop use comparison on locals, and use comparison on vns for global prop.
bool isEqual = optLocalAssertionProp
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/codegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1483,8 +1483,8 @@ void CodeGen::genCaptureFuncletPrologEpilogInfo()

if (compiler->lvaPSPSym != BAD_VAR_NUM)
{
if (CallerSP_to_PSP_slot_delta !=
compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for debugging
if (CallerSP_to_PSP_slot_delta != compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym)) // for
// debugging
{
printf("lvaGetCallerSPRelativeOffset(lvaPSPSym): %d\n",
compiler->lvaGetCallerSPRelativeOffset(compiler->lvaPSPSym));
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,8 @@ void CodeGen::genCodeForBBlist()
genMarkLabelsForCodegen();

assert(!compiler->fgFirstBBScratch ||
compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch has to be first.
compiler->fgFirstBB == compiler->fgFirstBBScratch); // compiler->fgFirstBBScratch
// has to be first.

/* Initialize structures used in the block list iteration */
genInitialize();
Expand Down
101 changes: 32 additions & 69 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1808,8 +1808,8 @@ void CodeGen::genCodeForTreeNode(GenTree* treeNode)

// The last slot is reserved for ICodeManager::FixContext(ppEndRegion)
unsigned filterEndOffsetSlotOffs;
PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) >
TARGET_POINTER_SIZE); // below doesn't underflow.
PREFIX_ASSUME(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) > TARGET_POINTER_SIZE); // below doesn't
// underflow.
filterEndOffsetSlotOffs =
(unsigned)(compiler->lvaLclSize(compiler->lvaShadowSPslotsVar) - TARGET_POINTER_SIZE);

Expand Down Expand Up @@ -2837,21 +2837,8 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
#endif
if (bytesWritten + regSize > size)
{
#ifdef TARGET_AMD64
if (size - bytesWritten <= XMM_REGSIZE_BYTES)
{
regSize = XMM_REGSIZE_BYTES;
}

// Shift dstOffset back to use full SIMD move
unsigned shiftBack = regSize - (size - bytesWritten);
assert(shiftBack <= regSize);
bytesWritten -= shiftBack;
dstOffset -= shiftBack;
#else
assert(srcIntReg != REG_NA);
break;
#endif
}

if (dstLclNum != BAD_VAR_NUM)
Expand All @@ -2866,6 +2853,11 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)

dstOffset += regSize;
bytesWritten += regSize;

if (regSize == YMM_REGSIZE_BYTES && size - bytesWritten < YMM_REGSIZE_BYTES)
{
regSize = XMM_REGSIZE_BYTES;
}
}

size -= bytesWritten;
Expand Down Expand Up @@ -3083,65 +3075,36 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
? YMM_REGSIZE_BYTES
: XMM_REGSIZE_BYTES;

for (; size >= regSize; size -= regSize, srcOffset += regSize, dstOffset += regSize)
while (size >= regSize)
{
if (srcLclNum != BAD_VAR_NUM)
{
emit->emitIns_R_S(simdMov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset);
}
else
for (; size >= regSize; size -= regSize, srcOffset += regSize, dstOffset += regSize)
{
emit->emitIns_R_ARX(simdMov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg,
srcAddrIndexScale, srcOffset);
}
if (srcLclNum != BAD_VAR_NUM)
{
emit->emitIns_R_S(simdMov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset);
}
else
{
emit->emitIns_R_ARX(simdMov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg,
srcAddrIndexScale, srcOffset);
}

if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(simdMov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(simdMov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}
}
}

if (size > 0)
{
if (size <= XMM_REGSIZE_BYTES)
if (regSize == YMM_REGSIZE_BYTES)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth adding a line of comment that further moves are not possible using YMM and so we will try XMM.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, good point. I'll add a note.

{
regSize = XMM_REGSIZE_BYTES;
}

// Copy the remainder by moving the last regSize bytes of the buffer
unsigned shiftBack = regSize - size;
assert(shiftBack <= regSize);

srcOffset -= shiftBack;
dstOffset -= shiftBack;

if (srcLclNum != BAD_VAR_NUM)
{
emit->emitIns_R_S(simdMov, EA_ATTR(regSize), tempReg, srcLclNum, srcOffset);
}
else
{
emit->emitIns_R_ARX(simdMov, EA_ATTR(regSize), tempReg, srcAddrBaseReg, srcAddrIndexReg,
srcAddrIndexScale, srcOffset);
}

if (dstLclNum != BAD_VAR_NUM)
{
emit->emitIns_S_R(simdMov, EA_ATTR(regSize), tempReg, dstLclNum, dstOffset);
}
else
{
emit->emitIns_ARX_R(simdMov, EA_ATTR(regSize), tempReg, dstAddrBaseReg, dstAddrIndexReg,
dstAddrIndexScale, dstOffset);
}
}

return;
}

// Fill the remainder with normal loads/stores
Expand Down Expand Up @@ -8483,10 +8446,10 @@ void CodeGen::genEmitHelperCall(unsigned helper, int argSize, emitAttr retSize,
}

/*****************************************************************************
* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
* disassembler thinks the instructions as the same as we do.
*/
* Unit testing of the XArch emitter: generate a bunch of instructions into the prolog
* (it's as good a place as any), then use COMPlus_JitLateDisasm=* to see if the late
* disassembler thinks the instructions as the same as we do.
*/

// Uncomment "#define ALL_ARM64_EMITTER_UNIT_TESTS" to run all the unit tests here.
// After adding a unit test, and verifying it works, put it under this #ifdef, so we don't see it run every time.
Expand Down
6 changes: 1 addition & 5 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4519,7 +4519,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// PostImportPhase: cleanup inlinees
//
auto postImportPhase = [this]() {

// If this is a viable inline candidate
if (compIsForInlining() && !compDonotInline())
{
Expand Down Expand Up @@ -4603,7 +4602,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// that we can generate code out of them.
//
auto morphInitPhase = [this]() {

// Initialize the BlockSet epoch
NewBasicBlockEpoch();

Expand Down Expand Up @@ -4812,7 +4810,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Promote struct locals
//
auto promoteStructsPhase = [this]() {

// For x64 and ARM64 we need to mark irregular parameters
lvaRefCountState = RCS_EARLY;
fgResetImplicitByRefRefCount();
Expand Down Expand Up @@ -7011,8 +7008,7 @@ int jitNativeCode(CORINFO_METHOD_HANDLE methodHnd,
setErrorTrap(nullptr, Param*, pParam, pParamOuter)
{
if (pParam->inlineInfo)
{
// Lazily create the inlinee compiler object
{ // Lazily create the inlinee compiler object
if (pParam->inlineInfo->InlinerCompiler->InlineeCompiler == nullptr)
{
pParam->inlineInfo->InlinerCompiler->InlineeCompiler =
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4147,10 +4147,10 @@ class Compiler
enum
{
PREFIX_TAILCALL_EXPLICIT = 0x00000001, // call has "tail" IL prefix
PREFIX_TAILCALL_IMPLICIT =
0x00000010, // call is treated as having "tail" prefix even though there is no "tail" IL prefix
PREFIX_TAILCALL_STRESS =
0x00000100, // call doesn't "tail" IL prefix but is treated as explicit because of tail call stress
PREFIX_TAILCALL_IMPLICIT = 0x00000010, // call is treated as having "tail" prefix even though there is no "tail"
// IL prefix
PREFIX_TAILCALL_STRESS = 0x00000100, // call doesn't "tail" IL prefix but is treated as explicit because of tail
// call stress
PREFIX_TAILCALL = (PREFIX_TAILCALL_EXPLICIT | PREFIX_TAILCALL_IMPLICIT | PREFIX_TAILCALL_STRESS),
PREFIX_VOLATILE = 0x00001000,
PREFIX_UNALIGNED = 0x00010000,
Expand Down
28 changes: 14 additions & 14 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -149,9 +149,9 @@ unsigned __int64 genFindHighestBit(unsigned __int64 mask)
#endif // 0

/*****************************************************************************
*
* Return true if the given 64-bit value has exactly zero or one bits set.
*/
*
* Return true if the given 64-bit value has exactly zero or one bits set.
*/

template <typename T>
inline bool genMaxOneBit(T value)
Expand All @@ -160,19 +160,19 @@ inline bool genMaxOneBit(T value)
}

/*****************************************************************************
*
* Return true if the given 32-bit value has exactly zero or one bits set.
*/
*
* Return true if the given 32-bit value has exactly zero or one bits set.
*/

inline bool genMaxOneBit(unsigned value)
{
return (value & (value - 1)) == 0;
}

/*****************************************************************************
*
* Return true if the given 64-bit value has exactly one bit set.
*/
*
* Return true if the given 64-bit value has exactly one bit set.
*/

template <typename T>
inline bool genExactlyOneBit(T value)
Expand All @@ -181,9 +181,9 @@ inline bool genExactlyOneBit(T value)
}

/*****************************************************************************
*
* Return true if the given 32-bit value has exactly zero or one bits set.
*/
*
* Return true if the given 32-bit value has exactly zero or one bits set.
*/

inline bool genExactlyOneBit(unsigned value)
{
Expand Down Expand Up @@ -833,8 +833,8 @@ inline Statement* Compiler::gtNewStmt(GenTree* expr, IL_OFFSETX offset)
inline GenTree* Compiler::gtNewOperNode(genTreeOps oper, var_types type, GenTree* op1, bool doSimplifications)
{
assert((GenTree::OperKind(oper) & (GTK_UNOP | GTK_BINOP)) != 0);
assert((GenTree::OperKind(oper) & GTK_EXOP) ==
0); // Can't use this to construct any types that extend unary/binary operator.
assert((GenTree::OperKind(oper) & GTK_EXOP) == 0); // Can't use this to construct any types that extend unary/binary
// operator.
assert(op1 != nullptr || oper == GT_RETFILT || oper == GT_NOP || (oper == GT_RETURN && type == TYP_VOID));

if (doSimplifications)
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/ee_il_dll.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1257,10 +1257,10 @@ const char* jitHlpFuncTable[CORINFO_HELP_COUNT] = {
};

/*****************************************************************************
*
* Filter wrapper to handle exception filtering.
* On Unix compilers don't support SEH.
*/
*
* Filter wrapper to handle exception filtering.
* On Unix compilers don't support SEH.
*/

struct FilterSuperPMIExceptionsParam_ee_il
{
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/eeinterface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,6 @@ const char* Compiler::eeGetMethodFullName(CORINFO_METHOD_HANDLE hnd)

bool success = eeRunWithSPMIErrorTrap<FilterSuperPMIExceptionsParam_eeinterface>(
[](FilterSuperPMIExceptionsParam_eeinterface* pParam) {

/* figure out the signature */

pParam->pThis->eeGetMethodSig(pParam->hnd, &pParam->sig);
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2125,10 +2125,10 @@ void emitter::emitEndFnEpilog()
// because the only instruction is the last one and thus a slight
// underestimation of the epilog size is harmless (since the EIP
// can not be between instructions).
assert(emitEpilogCnt == 1 ||
(emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp (size is either 6 or 5),
// and various forms of ret (size is either 1 or 3). The combination can
// be anything between 1 and 5.
assert(emitEpilogCnt == 1 || (emitExitSeqSize - newSize) <= 5 // delta between size of various forms of jmp
// (size is either 6 or 5), and various forms of
// ret (size is either 1 or 3). The combination
// can be anything between 1 and 5.
);
emitExitSeqSize = newSize;
}
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/emitarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3188,8 +3188,8 @@ void emitter::emitIns_R_R_R(instruction ins,
case INS_mul:
if (insMustSetFlags(flags))
{
assert(reg1 !=
REG_PC); // VM debugging single stepper doesn't support PC register with this instruction.
assert(reg1 != REG_PC); // VM debugging single stepper doesn't support PC register with this
// instruction.
assert(reg2 != REG_PC);
assert(reg3 != REG_PC);

Expand Down Expand Up @@ -5754,7 +5754,7 @@ BYTE* emitter::emitOutputIT(BYTE* dst, instruction ins, insFormat fmt, code_t co
#endif // FEATURE_ITINSTRUCTION

/*****************************************************************************
*
*
* Append the machine code corresponding to the given instruction descriptor
* to the code block at '*dp'; the base of the code block is 'bp', and 'ig'
* is the instruction group that contains the instruction. Updates '*dp' to
Expand Down
Loading