Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5405,6 +5405,9 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)

switch (intrinsicId)
{
case NI_Vector128_ToScalar:
case NI_Vector256_ToScalar:
case NI_Vector512_ToScalar:
case NI_SSE2_ConvertToInt32:
case NI_SSE2_ConvertToUInt32:
case NI_SSE2_X64_ConvertToInt64:
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/decomposelongs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1702,11 +1702,15 @@ GenTree* DecomposeLongs::DecomposeHWIntrinsic(LIR::Use& use)
case NI_Vector128_GetElement:
case NI_Vector256_GetElement:
case NI_Vector512_GetElement:
{
return DecomposeHWIntrinsicGetElement(use, hwintrinsicTree);
}

default:
{
noway_assert(!"unexpected GT_HWINTRINSIC node in long decomposition");
break;
}
}

return nullptr;
Expand Down
46 changes: 45 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19067,6 +19067,9 @@ bool GenTree::isContainableHWIntrinsic() const
}

case NI_Vector128_GetElement:
case NI_Vector128_ToScalar:
case NI_Vector256_ToScalar:
case NI_Vector512_ToScalar:
case NI_SSE2_ConvertToInt32:
case NI_SSE2_ConvertToUInt32:
case NI_SSE2_X64_ConvertToInt64:
Expand Down Expand Up @@ -22057,27 +22060,56 @@ GenTree* Compiler::gtNewSimdGetElementNode(
assert(varTypeIsArithmetic(simdBaseType));

#if defined(TARGET_XARCH)
bool useToScalar = op2->IsIntegralConst(0);

#if defined(TARGET_X86)
// We handle decomposition via GetElement for simplicity
useToScalar &= !varTypeIsLong(simdBaseType);
#endif // TARGET_X86

if (useToScalar)
{
intrinsicId = NI_Vector128_ToScalar;

if (simdSize == 64)
{
intrinsicId = NI_Vector512_ToScalar;
}
else if (simdSize == 32)
{
intrinsicId = NI_Vector256_ToScalar;
}

return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseJitType, simdSize);
}

switch (simdBaseType)
{
// Using software fallback if simdBaseType is not supported by hardware
case TYP_BYTE:
case TYP_UBYTE:
case TYP_INT:
case TYP_UINT:
case TYP_LONG:
case TYP_ULONG:
{
// Using software fallback if simdBaseType is not supported by hardware
assert(compIsaSupportedDebugOnly(InstructionSet_SSE41));
break;
}

case TYP_DOUBLE:
case TYP_FLOAT:
case TYP_SHORT:
case TYP_USHORT:
{
assert(compIsaSupportedDebugOnly(InstructionSet_SSE2));
break;
}

default:
{
unreached();
}
}

if (simdSize == 64)
Expand All @@ -22089,6 +22121,18 @@ GenTree* Compiler::gtNewSimdGetElementNode(
intrinsicId = NI_Vector256_GetElement;
}
#elif defined(TARGET_ARM64)
if (op2->IsIntegralConst(0))
{
intrinsicId = NI_Vector128_ToScalar;

if (simdSize == 8)
{
intrinsicId = NI_Vector64_ToScalar;
}

return gtNewSimdHWIntrinsicNode(type, op1, intrinsicId, simdBaseJitType, simdSize);
}

if (simdSize == 8)
{
intrinsicId = NI_Vector64_GetElement;
Expand Down
30 changes: 23 additions & 7 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1678,34 +1678,48 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 2);

op2 = impStackTop(0).val;

switch (simdBaseType)
{
// Using software fallback if simdBaseType is not supported by hardware
case TYP_BYTE:
case TYP_UBYTE:
case TYP_INT:
case TYP_UINT:
case TYP_LONG:
case TYP_ULONG:
if (!compExactlyDependsOn(InstructionSet_SSE41))
{
bool useToScalar = op2->IsIntegralConst(0);

#if defined(TARGET_X86)
useToScalar &= !varTypeIsLong(simdBaseType);
#endif // TARGET_X86

if (!useToScalar && !compExactlyDependsOn(InstructionSet_SSE41))
{
// Using software fallback if simdBaseType is not supported by hardware
return nullptr;
}
break;
}

case TYP_DOUBLE:
case TYP_FLOAT:
case TYP_SHORT:
case TYP_USHORT:
{
// short/ushort/float/double is supported by SSE2
break;
}

default:
{
unreached();
}
}

GenTree* op2 = impPopStack().val;
GenTree* op1 = impSIMDPopStack();
impPopStack();
op1 = impSIMDPopStack();

retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize);
break;
Expand Down Expand Up @@ -2543,16 +2557,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
{
assert(sig->numArgs == 1);

op1 = impSIMDPopStack();

#if defined(TARGET_X86)
if (varTypeIsLong(simdBaseType))
{
// TODO-XARCH-CQ: It may be beneficial to decompose this operation
// Create a GetElement node which handles decomposition
op2 = gtNewIconNode(0);
retNode = gtNewSimdGetElementNode(retType, op1, op2, simdBaseJitType, simdSize);
break;
}
#endif // TARGET_X86

// TODO-XARCH-CQ: It may be beneficial to import this as GetElement(0)
op1 = impSIMDPopStack();
retNode = gtNewSimdHWIntrinsicNode(retType, op1, intrinsic, simdBaseJitType, simdSize);
break;
}
Expand Down
Loading