Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 116 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25286,6 +25286,18 @@ bool GenTreeHWIntrinsic::OperIsCreateScalarUnsafe() const
}
}

//------------------------------------------------------------------------
// OperIsBitwiseHWIntrinsic: Is this HWIntrinsic a bitwise logic intrinsic node.
//
// Return Value:
// Whether "this" is a bitwise logic intrinsic node.
//
bool GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic() const
{
genTreeOps Oper = HWOperGet();
return Oper == GT_AND || Oper == GT_OR || Oper == GT_XOR || Oper == GT_AND_NOT;
}

//------------------------------------------------------------------------------
// OperRequiresAsgFlag : Check whether the operation requires GTF_ASG flag regardless
// of the children's flags.
Expand Down Expand Up @@ -25474,7 +25486,7 @@ void GenTreeHWIntrinsic::Initialize(NamedIntrinsic intrinsicId)
//------------------------------------------------------------------------------
// HWOperGet : Returns Oper based on the HWIntrinsicId
//
genTreeOps GenTreeHWIntrinsic::HWOperGet()
genTreeOps GenTreeHWIntrinsic::HWOperGet() const
{
switch (GetHWIntrinsicId())
{
Expand All @@ -25483,6 +25495,8 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_And:
#endif
Expand All @@ -25502,13 +25516,40 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet()
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Xor:
#endif
{
return GT_XOR;
}

#if defined(TARGET_XARCH)
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Or:
#endif
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to either include the { return GT_OR; } as part of the ifdef -or- more ideally we add the relevant:

#elif defined(TARGET_ARM64)
    case NI_AdvSimd_Or:
#endif

{
return GT_OR;
}

#if defined(TARGET_XARCH)
case NI_SSE_AndNot:
case NI_SSE2_AndNot:
case NI_AVX_AndNot:
case NI_AVX2_AndNot:
case NI_AVX512F_AndNot:
case NI_AVX512DQ_AndNot:
{
return GT_AND_NOT;
}
#endif
// TODO: Handle other cases

default:
Expand Down Expand Up @@ -26295,6 +26336,80 @@ unsigned GenTreeHWIntrinsic::GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree

return 0;
}

//------------------------------------------------------------------------
// GetTernaryControlByte: calculate the value of the control byte for ternary node
// with given logic nodes on the input.
//
// Return value: the value of the ternary control byte.
uint8_t GenTreeHWIntrinsic::GetTernaryControlByte(GenTreeHWIntrinsic* second) const
{
// we assume we have a structure like:
/*
/- A
+- B
t1 = binary logical op1

/- C
+- t1
t2 = binary logical op2
*/

// To calculate the control byte value:
// The way the constants work is we have three keys:
// * A: 0xF0
// * B: 0xCC
// * C: 0xAA
//
// To compute the correct control byte, you simply perform the corresponding operation on these keys. So, if you
// wanted to do (A & B) ^ C, you would compute (0xF0 & 0xCC) ^ 0xAA or 0x6A.
assert(second->Op(1) == this || second->Op(2) == this);
const uint8_t A = 0xF0;
const uint8_t B = 0xCC;
const uint8_t C = 0xAA;

genTreeOps firstOper = HWOperGet();
genTreeOps secondOper = second->HWOperGet();

uint8_t AB = 0;
uint8_t ABC = 0;

if (firstOper == GT_AND)
{
AB = A & B;
}
else if (firstOper == GT_OR)
{
AB = A | B;
}
else if (firstOper == GT_XOR)
{
AB = A ^ B;
}
else
{
unreached();
}

if (secondOper == GT_AND)
{
ABC = AB & C;
}
else if (secondOper == GT_OR)
{
ABC = AB | C;
}
else if (secondOper == GT_XOR)
{
ABC = AB ^ C;
}
else
{
unreached();
}

return ABC;
}
#endif // TARGET_XARCH && FEATURE_HW_INTRINSICS

unsigned GenTreeLclFld::GetSize() const
Expand Down
4 changes: 3 additions & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -6249,11 +6249,13 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
bool OperIsEmbBroadcastCompatible() const;
bool OperIsBroadcastScalar() const;
bool OperIsCreateScalarUnsafe() const;
bool OperIsBitwiseHWIntrinsic() const;

bool OperRequiresAsgFlag() const;
bool OperRequiresCallFlag() const;

unsigned GetResultOpNumForRmwIntrinsic(GenTree* use, GenTree* op1, GenTree* op2, GenTree* op3);
uint8_t GetTernaryControlByte(GenTreeHWIntrinsic* second) const;

ClassLayout* GetLayout(Compiler* compiler) const;

Expand Down Expand Up @@ -6343,7 +6345,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic

static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2);

genTreeOps HWOperGet();
genTreeOps HWOperGet() const;

private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);
Expand Down
76 changes: 76 additions & 0 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,6 +1662,82 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
LowerFusedMultiplyAdd(node);
break;

case NI_SSE_And:
case NI_SSE2_And:
case NI_AVX_And:
case NI_AVX2_And:
case NI_AVX512F_And:
case NI_AVX512DQ_And:
case NI_SSE_Or:
case NI_SSE2_Or:
case NI_AVX_Or:
case NI_AVX2_Or:
case NI_AVX512F_Or:
case NI_AVX512DQ_Or:
case NI_SSE_Xor:
case NI_SSE2_Xor:
case NI_AVX_Xor:
case NI_AVX2_Xor:
case NI_AVX512F_Xor:
case NI_AVX512DQ_Xor:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are there any concerns around this becoming out of sync from OperIsBitwiseHWIntrinsic

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this list, we have the intrinsics that can be folded into ternary logic, while ANDNOT related intrinsics cannot be folded currently. On the other hand, OperIsBitwiseHWIntrinsic should be consistent with its name from my view, so I included ANDNOT there.

I could leave some comments there to specify this issue, if this is the better way to make thing more clear.

{
if (!comp->IsBaselineVector512IsaSupportedOpportunistically())
{
break;
}
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);

LIR::Use use;
if (BlockRange().TryGetUse(node, &use))
{
// search for structure like:
/*
/- A
+- B
t1 = binary logical op1

/- C
+- t1
t2 = binary logical op2
*/
GenTree* second = use.User();
if (!second->OperIs(GT_HWINTRINSIC) || !second->AsHWIntrinsic()->OperIsBitwiseHWIntrinsic())
{
break;
}

if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT)
{
// currently ANDNOT logic cannot be optimized by the ternary node.
break;
}
GenTree* op3 = second->AsHWIntrinsic()->Op(1) == node ? second->AsHWIntrinsic()->Op(2)
: second->AsHWIntrinsic()->Op(1);
GenTree* control = comp->gtNewIconNode(node->GetTernaryControlByte(second->AsHWIntrinsic()));
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* ternaryNode =
comp->gtNewSimdTernaryLogicNode(simdType, op1, op2, op3, control, simdBaseJitType, simdSize);
BlockRange().InsertBefore(second, control, ternaryNode);
LIR::Use finalRes;
if (BlockRange().TryGetUse(second, &finalRes))
{
finalRes.ReplaceWith(ternaryNode);
}
else
{
ternaryNode->SetUnusedValue();
}
GenTree* next = node->gtNext;
BlockRange().Remove(node);
BlockRange().Remove(second);
return next;
}
break;
}

default:
break;
}
Expand Down