@@ -10312,6 +10312,11 @@ static bool isNonZeroElementsInOrder(const APInt &Zeroable,
1031210312 return true;
1031310313}
1031410314
10315+ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
10316+ ArrayRef<SDValue> Ops, SelectionDAG &DAG,
10317+ const X86Subtarget &Subtarget,
10318+ unsigned Depth = 0);
10319+
1031510320/// Try to lower a shuffle with a single PSHUFB of V1 or V2.
1031610321static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT,
1031710322 ArrayRef<int> Mask, SDValue V1,
@@ -10692,7 +10697,8 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
1069210697 SelectionDAG &DAG) {
1069310698 assert((VT.is128BitVector() || VT.is256BitVector()) &&
1069410699 "Unexpected VTRUNC type");
10695- if (!Subtarget.hasAVX512())
10700+ if (!Subtarget.hasAVX512() ||
10701+ (VT.is256BitVector() && !Subtarget.useAVX512Regs()))
1069610702 return SDValue();
1069710703
1069810704 unsigned NumElts = VT.getVectorNumElements();
@@ -10721,30 +10727,19 @@ static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1,
1072110727 bool UndefUppers =
1072210728 UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts);
1072310729
10730+ // As we're using both sources then we need to concat them together
10731+ // and truncate from the double-sized src.
10732+ MVT ConcatVT = VT.getDoubleNumVectorElementsVT();
10733+
1072410734 // For offset truncations, ensure that the concat is cheap.
10725- if (Offset) {
10726- auto IsCheapConcat = [&](SDValue Lo, SDValue Hi) {
10727- if (Lo.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
10728- Hi.getOpcode() == ISD::EXTRACT_SUBVECTOR)
10729- return Lo.getOperand(0) == Hi.getOperand(0);
10730- if (ISD::isNormalLoad(Lo.getNode()) &&
10731- ISD::isNormalLoad(Hi.getNode())) {
10732- auto *LDLo = cast<LoadSDNode>(Lo);
10733- auto *LDHi = cast<LoadSDNode>(Hi);
10734- return DAG.areNonVolatileConsecutiveLoads(
10735- LDHi, LDLo, Lo.getValueType().getStoreSize(), 1);
10736- }
10737- return false;
10738- };
10739- if (!IsCheapConcat(peekThroughBitcasts(V1), peekThroughBitcasts(V2)))
10735+ SDValue Src =
10736+ combineConcatVectorOps(DL, ConcatVT, {V1, V2}, DAG, Subtarget);
10737+ if (!Src) {
10738+ if (Offset)
1074010739 continue;
10740+ Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
1074110741 }
1074210742
10743- // As we're using both sources then we need to concat them together
10744- // and truncate from the double-sized src.
10745- MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2);
10746- SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2);
10747-
1074810743 MVT SrcSVT = MVT::getIntegerVT(SrcEltBits);
1074910744 MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts);
1075010745 Src = DAG.getBitcast(SrcVT, Src);
@@ -42183,11 +42178,6 @@ static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V,
4218342178 return SDValue();
4218442179}
4218542180
42186- static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
42187- ArrayRef<SDValue> Ops, SelectionDAG &DAG,
42188- const X86Subtarget &Subtarget,
42189- unsigned Depth = 0);
42190-
4219142181/// Try to combine x86 target specific shuffles.
4219242182static SDValue combineTargetShuffle(SDValue N, const SDLoc &DL,
4219342183 SelectionDAG &DAG,
0 commit comments