Skip to content

Commit 0ddc132

Browse files
authored
Fix perf issues discovered in "For software performance, can you always trust inlining" blog (#61408)
1 parent 9872424 commit 0ddc132

File tree

2 files changed

+47
-20
lines changed

2 files changed

+47
-20
lines changed

src/coreclr/jit/fgbasic.cpp

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -894,12 +894,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
894894
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
895895
const bool makeInlineObservations = (compInlineResult != nullptr);
896896
const bool isInlining = compIsForInlining();
897-
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
898-
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
899897
unsigned retBlocks = 0;
900898
int prefixFlags = 0;
901899
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
902-
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
900+
const bool resolveTokens = preciseScan;
903901

904902
// Track offsets where IL instructions begin in DEBUG builds. Used to
905903
// validate debug info generated by the JIT.

src/coreclr/jit/gentree.cpp

Lines changed: 46 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -2620,29 +2620,58 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
26202620
unsigned level = 0;
26212621
unsigned lvl2 = 0;
26222622

2623-
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
2624-
if (multiOp->OperIs(GT_HWINTRINSIC) && (multiOp->GetOperandCount() == 1) &&
2625-
multiOp->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
2623+
#if defined(FEATURE_HW_INTRINSICS)
2624+
if (multiOp->OperIs(GT_HWINTRINSIC))
26262625
{
2627-
costEx = IND_COST_EX;
2628-
costSz = 2;
2626+
GenTreeHWIntrinsic* hwTree = multiOp->AsHWIntrinsic();
2627+
#if defined(TARGET_XARCH)
2628+
if ((hwTree->GetOperandCount() == 1) && hwTree->OperIsMemoryLoadOrStore())
2629+
{
2630+
costEx = IND_COST_EX;
2631+
costSz = 2;
26292632

2630-
GenTree* addr = multiOp->Op(1)->gtEffectiveVal();
2631-
level = gtSetEvalOrder(addr);
2633+
GenTree* addr = hwTree->Op(1)->gtEffectiveVal();
2634+
level = gtSetEvalOrder(addr);
26322635

2633-
// See if we can form a complex addressing mode.
2634-
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, multiOp->TypeGet()))
2635-
{
2636-
// Nothing to do, costs have been set.
2636+
// See if we can form a complex addressing mode.
2637+
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet()))
2638+
{
2639+
// Nothing to do, costs have been set.
2640+
}
2641+
else
2642+
{
2643+
costEx += addr->GetCostEx();
2644+
costSz += addr->GetCostSz();
2645+
}
2646+
2647+
hwTree->SetCosts(costEx, costSz);
2648+
return level;
26372649
}
2638-
else
2650+
#endif
2651+
switch (hwTree->GetHWIntrinsicId())
26392652
{
2640-
costEx += addr->GetCostEx();
2641-
costSz += addr->GetCostSz();
2653+
#if defined(TARGET_XARCH)
2654+
case NI_Vector128_Create:
2655+
case NI_Vector256_Create:
2656+
#elif defined(TARGET_ARM64)
2657+
case NI_Vector64_Create:
2658+
case NI_Vector128_Create:
2659+
#endif
2660+
{
2661+
if ((hwTree->GetOperandCount() == 1) && hwTree->Op(1)->OperIsConst())
2662+
{
2663+
// Vector.Create(cns) is cheap but not that cheap to be (1,1)
2664+
costEx = IND_COST_EX;
2665+
costSz = 2;
2666+
level = gtSetEvalOrder(hwTree->Op(1));
2667+
hwTree->SetCosts(costEx, costSz);
2668+
return level;
2669+
}
2670+
break;
2671+
}
2672+
default:
2673+
break;
26422674
}
2643-
2644-
multiOp->SetCosts(costEx, costSz);
2645-
return level;
26462675
}
26472676
#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)
26482677

0 commit comments

Comments
 (0)