Skip to content
4 changes: 1 addition & 3 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,12 +894,10 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
const bool isForceInline = (info.compFlags & CORINFO_FLG_FORCEINLINE) != 0;
const bool makeInlineObservations = (compInlineResult != nullptr);
const bool isInlining = compIsForInlining();
const bool isPreJit = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_PREJIT);
const bool isTier1 = opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER1);
unsigned retBlocks = 0;
int prefixFlags = 0;
bool preciseScan = makeInlineObservations && compInlineResult->GetPolicy()->RequiresPreciseScan();
const bool resolveTokens = preciseScan && (isPreJit || isTier1);
const bool resolveTokens = preciseScan;

// Track offsets where IL instructions begin in DEBUG builds. Used to
// validate debug info generated by the JIT.
Expand Down
63 changes: 46 additions & 17 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2620,29 +2620,58 @@ unsigned Compiler::gtSetMultiOpOrder(GenTreeMultiOp* multiOp)
unsigned level = 0;
unsigned lvl2 = 0;

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
if (multiOp->OperIs(GT_HWINTRINSIC) && (multiOp->GetOperandCount() == 1) &&
multiOp->AsHWIntrinsic()->OperIsMemoryLoadOrStore())
#if defined(FEATURE_HW_INTRINSICS)
if (multiOp->OperIs(GT_HWINTRINSIC))
{
costEx = IND_COST_EX;
costSz = 2;
GenTreeHWIntrinsic* hwTree = multiOp->AsHWIntrinsic();
#if defined(TARGET_XARCH)
if ((hwTree->GetOperandCount() == 1) && hwTree->OperIsMemoryLoadOrStore())
{
costEx = IND_COST_EX;
costSz = 2;

GenTree* addr = multiOp->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);
GenTree* addr = hwTree->Op(1)->gtEffectiveVal();
level = gtSetEvalOrder(addr);

// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, multiOp->TypeGet()))
{
// Nothing to do, costs have been set.
// See if we can form a complex addressing mode.
if (addr->OperIs(GT_ADD) && gtMarkAddrMode(addr, &costEx, &costSz, hwTree->TypeGet()))
{
// Nothing to do, costs have been set.
}
else
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
}

hwTree->SetCosts(costEx, costSz);
return level;
}
else
#endif
switch (hwTree->GetHWIntrinsicId())
{
costEx += addr->GetCostEx();
costSz += addr->GetCostSz();
#if defined(TARGET_XARCH)
case NI_Vector128_Create:
case NI_Vector256_Create:
#elif defined(TARGET_ARM64)
case NI_Vector64_Create:
case NI_Vector128_Create:
#endif
{
if ((hwTree->GetOperandCount() == 1) && hwTree->Op(1)->OperIsConst())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are we only doing OperandCount == 1?

What about the cases where OperandCount == 2 through OperandCount == 32? Are those being properly tracked as "expensive" and getting CSEd?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tannergooding yeah, they are assigned a higher cost automatically due to multiple arguments so the problem doesn't reproduce for them. but that's a good point, I guess Vector128.Create(1,2,3,4,5,6,7,8) currently gets a very high cost while in reality it should still be 3/2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its fine to log an issue for and cover in a separate issue here.

{
// Vector.Create(cns) is cheap but not that cheap to be (1,1)
costEx = IND_COST_EX;
costSz = 2;
level = gtSetEvalOrder(hwTree->Op(1));
hwTree->SetCosts(costEx, costSz);
return level;
}
break;
}
default:
break;
}

multiOp->SetCosts(costEx, costSz);
return level;
}
#endif // defined(FEATURE_SIMD) || defined(FEATURE_HW_INTRINSICS)

Expand Down