Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5806,9 +5806,37 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType, compiler);
attr = emitActualTypeSize(Compiler::getSIMDTypeForSize(hwintrinsic->GetSimdSize()));

if (intrinsicId == NI_X86Base_Extract)
// We may have opportunistically selected an EVEX only instruction
// that isn't actually required, so fallback to the VEX compatible
// encoding to potentially save on the number of bytes emitted.

switch (ins)
{
ins = INS_pextrw_sse42;
case INS_pextrw:
{
// The encoding which supports containment is SSE4.1+ only
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_SSE42));

ins = INS_pextrw_sse42;
break;
}

case INS_vextractf64x2:
{
ins = INS_vextractf32x4;
break;
}

case INS_vextracti64x2:
{
ins = INS_vextracti32x4;
break;
}

default:
{
break;
}
}

// The hardware intrinsics take unsigned bytes between [0, 255].
Expand Down
9 changes: 7 additions & 2 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20539,7 +20539,7 @@ bool GenTree::isEmbeddedBroadcastCompatibleHWIntrinsic(Compiler* comp) const
{
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();
var_types simdBaseType = AsHWIntrinsic()->GetSimdBaseType();
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, nullptr);
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, simdBaseType, comp);

if (comp->codeGen->instIsEmbeddedBroadcastCompatible(ins))
{
Expand Down Expand Up @@ -20784,7 +20784,12 @@ bool GenTree::isEmbeddedMaskingCompatible(Compiler* comp, unsigned tgtMaskSize,

if (tgtSimdBaseJitType != CORINFO_TYPE_UNDEF)
{
ins = HWIntrinsicInfo::lookupIns(intrinsic, simdBaseType, comp);
var_types tgtSimdBaseType = JitType2PreciseVarType(tgtSimdBaseJitType);

instruction tgtIns = HWIntrinsicInfo::lookupIns(intrinsic, tgtSimdBaseType, comp);
assert(ins != tgtIns);

ins = tgtIns;
maskBaseSize = CodeGenInterface::instKMaskBaseSize(ins);
}

Expand Down
167 changes: 103 additions & 64 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,108 +87,147 @@ instruction HWIntrinsicInfo::lookupIns(NamedIntrinsic id, var_types type, Compil
#endif // TARGET_X86

#if defined(TARGET_XARCH)
instruction evexIns = ins;

switch (ins)
if (comp != nullptr)
{
case INS_movdqa32:
instruction evexIns = ins;

switch (ins)
{
if (varTypeIsLong(type))
case INS_movdqa32:
{
evexIns = INS_vmovdqa64;
if (varTypeIsLong(type))
{
evexIns = INS_vmovdqa64;
}
break;
}
break;
}

case INS_movdqu32:
{
if (varTypeIsLong(type))
case INS_movdqu32:
{
evexIns = INS_vmovdqu64;
if (varTypeIsLong(type))
{
evexIns = INS_vmovdqu64;
}
break;
}
break;
}

case INS_vbroadcastf32x4:
{
if (type == TYP_DOUBLE)
case INS_pandd:
{
evexIns = INS_vbroadcastf64x2;
if (varTypeIsLong(type))
{
evexIns = INS_vpandq;
}
break;
}
break;
}

case INS_vbroadcasti32x4:
{
if (varTypeIsLong(type))
case INS_pandnd:
{
evexIns = INS_vbroadcasti64x2;
if (varTypeIsLong(type))
{
evexIns = INS_vpandnq;
}
break;
}
break;
}

case INS_vextractf32x4:
{
if (type == TYP_DOUBLE)
case INS_pord:
{
evexIns = INS_vextractf64x2;
if (varTypeIsLong(type))
{
evexIns = INS_vporq;
}
break;
}
else if (varTypeIsInt(type))

case INS_pxord:
{
evexIns = INS_vextracti32x4;
if (varTypeIsLong(type))
{
evexIns = INS_vpxorq;
}
break;
}
else if (varTypeIsLong(type))

case INS_vbroadcastf32x4:
{
evexIns = INS_vextracti64x2;
if (type == TYP_DOUBLE)
{
evexIns = INS_vbroadcastf64x2;
}
break;
}
break;
}

case INS_vextracti32x4:
{
if (varTypeIsLong(type))
case INS_vbroadcasti32x4:
{
if (varTypeIsLong(type))
{
evexIns = INS_vbroadcasti64x2;
}
break;
}

case INS_vextractf32x4:
{
evexIns = INS_vextracti64x2;
if (type == TYP_DOUBLE)
{
evexIns = INS_vextractf64x2;
}
else if (varTypeIsInt(type))
{
evexIns = INS_vextracti32x4;
}
else if (varTypeIsLong(type))
{
evexIns = INS_vextracti64x2;
}
break;
}
break;
}

case INS_vinsertf32x4:
{
if (type == TYP_DOUBLE)
case INS_vextracti32x4:
{
evexIns = INS_vinsertf64x2;
if (varTypeIsLong(type))
{
evexIns = INS_vextracti64x2;
}
break;
}
else if (varTypeIsInt(type))

case INS_vinsertf32x4:
{
evexIns = INS_vinserti32x4;
if (type == TYP_DOUBLE)
{
evexIns = INS_vinsertf64x2;
}
else if (varTypeIsInt(type))
{
evexIns = INS_vinserti32x4;
}
else if (varTypeIsLong(type))
{
evexIns = INS_vinserti64x2;
}
break;
}
else if (varTypeIsLong(type))

case INS_vinserti32x4:
{
evexIns = INS_vinserti64x2;
if (varTypeIsLong(type))
{
evexIns = INS_vinserti64x2;
}
break;
}
break;
}

case INS_vinserti32x4:
{
if (varTypeIsLong(type))
default:
{
evexIns = INS_vinserti64x2;
break;
}
break;
}

default:
if ((evexIns != ins) && comp->canUseEvexEncoding())
{
break;
ins = evexIns;
}
}

if ((evexIns != ins) && (comp != nullptr) && comp->canUseEvexEncoding())
{
ins = evexIns;
}
#endif // TARGET_XARCH

return ins;
Expand Down
38 changes: 38 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1059,6 +1059,44 @@ void CodeGen::genHWIntrinsic_R_RM(
{
instOptions = AddEmbBroadcastMode(instOptions);
}
else if ((instOptions == INS_OPTS_NONE) && !GetEmitter()->IsVexEncodableInstruction(ins))
{
// We may have opportunistically selected an EVEX only instruction
// that isn't actually required, so fallback to the VEX compatible
// encoding to potentially save on the number of bytes emitted.

switch (ins)
{
case INS_vbroadcastf64x2:
{
ins = INS_vbroadcastf32x4;
break;
}

case INS_vbroadcasti64x2:
{
ins = INS_vbroadcasti32x4;
break;
}

case INS_vmovdqa64:
{
ins = INS_movdqa32;
break;
}

case INS_vmovdqu64:
{
ins = INS_movdqu32;
break;
}

default:
{
break;
}
}
}

OperandDesc rmOpDesc = genOperandDesc(ins, rmOp);

Expand Down
Loading
Loading