Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1482,6 +1482,19 @@ void Compiler::fgFindJumpTargets(const BYTE* codeAddr, IL_OFFSET codeSize, Fixed
case NI_VectorT256_AsVectorUInt32:
case NI_VectorT256_AsVectorUInt64:
case NI_VectorT256_op_UnaryPlus:
case NI_Vector512_As:
case NI_Vector512_AsByte:
case NI_Vector512_AsDouble:
case NI_Vector512_AsInt16:
case NI_Vector512_AsInt32:
case NI_Vector512_AsInt64:
case NI_Vector512_AsNInt:
case NI_Vector512_AsNUInt:
case NI_Vector512_AsSByte:
case NI_Vector512_AsSingle:
case NI_Vector512_AsUInt16:
case NI_Vector512_AsUInt32:
case NI_Vector512_AsUInt64:
#endif // TARGET_XARCH
#endif // FEATURE_HW_INTRINSICS
case NI_SRCS_UNSAFE_As:
Expand Down
34 changes: 30 additions & 4 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,12 +1102,21 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
}

case NI_Vector128_ToVector256:
case NI_Vector128_ToVector512:
case NI_Vector256_ToVector512:
{
// ToVector256 has zero-extend semantics in order to ensure it is deterministic
// We always emit a move to the target register, even when op1Reg == targetReg,
// in order to ensure that Bits MAXVL-1:128 are zeroed.

attr = emitTypeSize(TYP_SIMD16);
if (intrinsicId == NI_Vector256_ToVector512)
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD16);
}

if (op1->isContained() || op1->isUsedFromSpillTemp())
{
Expand All @@ -1124,15 +1133,24 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
case NI_Vector128_ToVector256Unsafe:
case NI_Vector256_ToVector512Unsafe:
case NI_Vector256_GetLower:
case NI_Vector512_GetLower:
case NI_Vector512_GetLower128:
{
if (op1->isContained() || op1->isUsedFromSpillTemp())
{
// We want to always emit the EA_16BYTE version here.
//
// For ToVector256Unsafe the upper bits don't matter and for GetLower we
// only actually need the lower 16-bytes, so we can just be "more efficient"

genHWIntrinsic_R_RM(node, ins, EA_16BYTE, targetReg, op1);
if ((intrinsicId == NI_Vector512_GetLower) || (intrinsicId == NI_Vector256_ToVector512Unsafe))
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD16);
}
genHWIntrinsic_R_RM(node, ins, attr, targetReg, op1);
}
else
{
Expand All @@ -1143,7 +1161,15 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node)
// so the upper bits aren't impactful either allowing the same.

// Just use movaps for reg->reg moves as it has zero-latency on modern CPUs
emit->emitIns_Mov(INS_movaps, EA_32BYTE, targetReg, op1Reg, /* canSkip */ true);
if ((intrinsicId == NI_Vector128_ToVector256Unsafe) || (intrinsicId == NI_Vector256_GetLower))
{
attr = emitTypeSize(TYP_SIMD32);
}
else
{
attr = emitTypeSize(TYP_SIMD64);
}
emit->emitIns_Mov(INS_movaps, attr, targetReg, op1Reg, /* canSkip */ true);
}
break;
}
Expand Down
Loading