Skip to content
4 changes: 1 addition & 3 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -888,12 +888,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;

// Track offsets where IL instructions begin in DEBUG builds. Used to
// validate debug info generated by the JIT.
Expand Down
31 changes: 28 additions & 3 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3948,10 +3948,12 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
costSz = 2 * 2;
break;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
#if defined(FEATURE_HW_INTRINSICS)
case GT_HWINTRINSIC:
{
if (tree->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
GenTreeHWIntrinsic* hwTree = tree->AsHWIntrinsic();
#if defined(TARGET_XARCH)
if (hwTree->OperIsMemoryLoadOrStore())
{
costEx = IND_COST_EX;
costSz = 2;
Expand All @@ -3964,9 +3966,32 @@ unsigned Compiler::gtSetEvalOrder(GenTree* tree)
goto DONE;
}
}
#endif

switch (hwTree->gtHWIntrinsicId)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to review costing for HW intrinsics more broadly?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We likely should. We probably aren't accounting for cases where helper intrinsics are more expensive than they appear nor cases where operands have less cost due to special handling that hardware intrinsics get.

There's also probably cases where operands (like scalar DBL_CNS) are currently participating in overall CSE and shouldn't for certain cases.

{
#if defined(TARGET_XARCH)
case NI_Vector128_Create:
case NI_Vector256_Create:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector128_Create:
#endif
{
if (hwTree->gtGetOp1()->OperIsConst() && (hwTree->gtGetOp2() == nullptr))
{
// Vector.Create(cns) is cheap but not that cheap to be (1,1)
costEx = 2;
costSz = 2;
}
break;
}
default:
break;
}
}
break;
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS

case GT_BLK:
case GT_IND:
Expand Down