diff --git a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index 3b034f6c37f66..a5a859ed57445 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -2893,6 +2893,145 @@ Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) { return new ShuffleVectorInst(NewBO, SVI.getShuffleMask()); } +/// Describes whether and how a shuffle operand can be compacted. +struct ShuffleOperandCompaction { + /// Whether this operand can be compacted (has a single use and is either + /// a constant or another shuffle instruction). + bool CanCompact; + /// Conservative heuristic: whether this operand's compaction justifies + /// the overall transformation (true for constants; false for shuffles). + bool ShouldCompact; + /// The minimal width required for the compacted vector. + unsigned CompactedWidth; + /// Function to create the compacted operand if the transformation applies. + std::function Apply; +}; + +/// Attempt to narrow/compact a constant vector used in a shuffle by removing +/// elements that are not referenced by the shuffle mask. +static ShuffleOperandCompaction +compactShuffleOperand(Constant *ShuffleInput, + MutableArrayRef UserShuffleMask, int IndexStart) { + auto *VecTy = cast(ShuffleInput->getType()); + unsigned Width = VecTy->getNumElements(); + + // Collect only the constant elements that are actually used. + SmallVector CompactedElts; + // Map from original element index to compacted index. + SmallVector IndexRemap(Width, -1); + + for (int &MaskElt : UserShuffleMask) { + if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) { + int RelMaskElt = MaskElt - IndexStart; + if (IndexRemap[RelMaskElt] < 0) { + IndexRemap[RelMaskElt] = CompactedElts.size() + IndexStart; + CompactedElts.push_back(ShuffleInput->getAggregateElement(RelMaskElt)); + } + MaskElt = IndexRemap[RelMaskElt]; + } + } + + return {true, true, static_cast(CompactedElts.size()), + [CompactedElts = std::move(CompactedElts), + VecTy](unsigned PaddedWidth, + InstCombiner::BuilderTy &Builder) -> Value * { + // Pad with poison to reach the requested width. + SmallVector PaddedElts(CompactedElts); + while (PaddedElts.size() < PaddedWidth) + PaddedElts.push_back(PoisonValue::get(VecTy->getElementType())); + + return ConstantVector::get(PaddedElts); + }}; +} + +/// Attempt to narrow/compact a shuffle instruction used in a shuffle by +/// removing elements that are not referenced by the shuffle mask. +static ShuffleOperandCompaction +compactShuffleOperand(ShuffleVectorInst *ShuffleInput, + MutableArrayRef UserShuffleMask, int IndexStart) { + auto *VecTy = cast(ShuffleInput->getType()); + unsigned Width = VecTy->getNumElements(); + + // Collect only the shuffle mask elements that are actually used. + SmallVector CompactedMask; + // Map from original element index to compacted index. + SmallVector IndexRemap(Width, -1); + + for (int &MaskElt : UserShuffleMask) { + if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) { + int RelMaskElt = MaskElt - IndexStart; + if (IndexRemap[RelMaskElt] < 0) { + IndexRemap[RelMaskElt] = CompactedMask.size() + IndexStart; + CompactedMask.push_back(ShuffleInput->getMaskValue(RelMaskElt)); + } + MaskElt = IndexRemap[RelMaskElt]; + } + } + + return {true, false, static_cast(CompactedMask.size()), + [CompactedMask = std::move(CompactedMask), + ShuffleInput](unsigned PaddedWidth, + InstCombiner::BuilderTy &Builder) -> Value * { + // Pad with poison mask elements to reach the requested width. + SmallVector PaddedMask(CompactedMask); + while (PaddedMask.size() < PaddedWidth) + PaddedMask.push_back(PoisonMaskElem); + + return Builder.CreateShuffleVector(ShuffleInput->getOperand(0), + ShuffleInput->getOperand(1), + PaddedMask); + }}; +} + +/// Try to narrow/compact a shuffle operand by eliminating elements that are +/// not used by the shuffle mask. This updates the shuffle mask in-place to +/// reflect the compaction. Returns information about whether compaction is +/// possible and a lambda to apply the compaction if beneficial. +static ShuffleOperandCompaction +compactShuffleOperand(Value *ShuffleInput, MutableArrayRef ShuffleMask, + int IndexStart) { + if (ShuffleInput->getNumUses() > 1) + return {false, false, 0, nullptr}; + + if (auto *C = dyn_cast(ShuffleInput)) + return compactShuffleOperand(C, ShuffleMask, IndexStart); + if (auto *Shuf = dyn_cast(ShuffleInput)) + return compactShuffleOperand(Shuf, ShuffleMask, IndexStart); + + return {false, false, 0, nullptr}; +} + +/// Try to narrow the shuffle by eliminating unused elements from the operands. +static Instruction *tryCompactShuffleOperands(ShuffleVectorInst &SVI, + InstCombinerImpl &IC) { + Value *LHS = SVI.getOperand(0); + Value *RHS = SVI.getOperand(1); + ArrayRef Mask = SVI.getShuffleMask(); + unsigned LHSWidth = cast(LHS->getType())->getNumElements(); + + SmallVector NewMask(Mask.begin(), Mask.end()); + ShuffleOperandCompaction LHSCompact = compactShuffleOperand(LHS, NewMask, 0); + ShuffleOperandCompaction RHSCompact = + compactShuffleOperand(RHS, NewMask, LHSWidth); + if (LHSCompact.CanCompact && RHSCompact.CanCompact && + (LHSCompact.ShouldCompact || RHSCompact.ShouldCompact)) { + unsigned CompactWidth = + std::max(LHSCompact.CompactedWidth, RHSCompact.CompactedWidth); + if (CompactWidth < LHSWidth) { + IC.replaceOperand(SVI, 0, LHSCompact.Apply(CompactWidth, IC.Builder)); + IC.replaceOperand(SVI, 1, RHSCompact.Apply(CompactWidth, IC.Builder)); + // Adjust RHS indices in the mask to account for the new LHS width. + for (int &MaskElt : NewMask) + if (MaskElt >= (int)LHSWidth) + MaskElt = MaskElt - LHSWidth + CompactWidth; + SVI.setShuffleMask(NewMask); + return &SVI; + } + } + + return nullptr; +} + Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { Value *LHS = SVI.getOperand(0); Value *RHS = SVI.getOperand(1); @@ -3172,7 +3311,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { if (!match(RHSShuffle->getOperand(1), m_Poison())) RHSShuffle = nullptr; if (!LHSShuffle && !RHSShuffle) - return MadeChange ? &SVI : nullptr; + return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this); Value* LHSOp0 = nullptr; Value* LHSOp1 = nullptr; @@ -3212,7 +3351,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { } if (newLHS == LHS && newRHS == RHS) - return MadeChange ? &SVI : nullptr; + return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this); ArrayRef LHSMask; ArrayRef RHSMask; @@ -3294,5 +3433,5 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) { return new ShuffleVectorInst(newLHS, newRHS, newMask); } - return MadeChange ? &SVI : nullptr; + return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this); } diff --git a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll index 056caabb6d60a..818a83fcc4103 100644 --- a/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll +++ b/llvm/test/Transforms/InstCombine/AMDGPU/simplify-demanded-vector-elts-lane-intrinsics.ll @@ -115,8 +115,7 @@ define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) { ; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]] ; %vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src) @@ -287,8 +286,7 @@ define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) { ; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; %vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src) @@ -328,8 +326,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0, ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; %ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1 @@ -372,8 +369,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ] -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; %t = call token @llvm.experimental.convergence.entry() @@ -413,8 +409,7 @@ define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) { ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <2 x i32> ; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]] ; %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) @@ -439,8 +434,7 @@ define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) { ; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]]) -; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> +; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <3 x i32> ; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]] ; %vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src) diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll index 8bc915e695aa7..5dba85b0b2452 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle-inseltpoison.ll @@ -86,8 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <6 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <6 x i16> zeroinitializer, <6 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; %t1 = extractelement <4 x i16> %t0, i32 2 diff --git a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll index 470d6be88672b..5d98e20cb7b01 100644 --- a/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll +++ b/llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll @@ -86,8 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) { define <8 x i16> @pr26015(<4 x i16> %t0) { ; CHECK-LABEL: @pr26015( -; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> -; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> , <8 x i16> [[TMP1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <6 x i32> +; CHECK-NEXT: [[T5:%.*]] = shufflevector <6 x i16> zeroinitializer, <6 x i16> [[TMP1]], <8 x i32> ; CHECK-NEXT: ret <8 x i16> [[T5]] ; %t1 = extractelement <4 x i16> %t0, i32 2 diff --git a/llvm/test/Transforms/InstCombine/shufflevec-compact-operands.ll b/llvm/test/Transforms/InstCombine/shufflevec-compact-operands.ll new file mode 100644 index 0000000000000..d9c6b31c1f072 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/shufflevec-compact-operands.ll @@ -0,0 +1,180 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=instcombine -S %s | FileCheck %s + +; Interleaving splat shuffle with constant operand - SHOULD compact +define <8 x i8> @interleave_splat_constant(i8 %x) { +; CHECK-LABEL: @interleave_splat_constant( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = insertelement <4 x i8> poison, i8 %x, i32 0 + %2 = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer + %3 = shufflevector <4 x i8> %2, <4 x i8> poison, <8 x i32> + %4 = shufflevector <8 x i8> , <8 x i8> %3, <8 x i32> + ret <8 x i8> %4 +} + +; Interleaving constant with splat shuffle operand - SHOULD compact +define <8 x i8> @interleave_constant_splat(i8 %x) { +; CHECK-LABEL: @interleave_constant_splat( +; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> , <4 x i8> [[TMP2]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = insertelement <4 x i8> poison, i8 %x, i32 0 + %2 = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer + %3 = shufflevector <4 x i8> %2, <4 x i8> poison, <8 x i32> + %4 = shufflevector <8 x i8> , <8 x i8> %3, <8 x i32> + ret <8 x i8> %4 +} + +; Interleaving random shuffle with constant operand - SHOULD compact +define <8 x i8> @interleave_shuffle_constant(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @interleave_shuffle_constant( +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> %1, <8 x i8> , <8 x i32> + ret <8 x i8> %2 +} + +; Interleaving constant with random shuffle - SHOULD compact +define <8 x i8> @interleave_constant_shuffle(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @interleave_constant_shuffle( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i8> , <4 x i8> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> , <8 x i8> %1, <8 x i32> + ret <8 x i8> %2 +} + +; Randomly shuffle random shuffle with constant operand - SHOULD compact +define <8 x i8> @shuffle_shuffle_constant(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_shuffle_constant( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <5 x i8> [[TMP1]], <5 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> %1, <8 x i8> , <8 x i32> + ret <8 x i8> %2 +} + +; Randomly shuffle constant with random shuffle - SHOULD compact +define <8 x i8> @shuffle_constant_shuffle(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_constant_shuffle( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x i8> , <5 x i8> [[TMP1]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP2]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> , <8 x i8> %1, <8 x i32> + ret <8 x i8> %2 +} + +; Both operands are shuffles - does NOT compact +define <8 x i32> @interleave_shuffle_shuffle(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CHECK-LABEL: @interleave_shuffle_shuffle( +; CHECK-NEXT: [[S1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[C:%.*]], <8 x i32> +; CHECK-NEXT: [[S2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[C]], <8 x i32> +; CHECK-NEXT: [[RESULT:%.*]] = shufflevector <8 x i32> [[S1]], <8 x i32> [[S2]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[RESULT]] +; + %s1 = shufflevector <4 x i32> %a, <4 x i32> %c, <8 x i32> + %s2 = shufflevector <4 x i32> %b, <4 x i32> %c, <8 x i32> + %result = shufflevector <8 x i32> %s1, <8 x i32> %s2, <8 x i32> + ret <8 x i32> %result +} + +; Multiple uses of LHS (shufflevector) operand - does NOT compact +define <8 x i8> @shuffle_multiple_users_shuffle_constant(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_multiple_users_shuffle_constant( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <8 x i32> +; CHECK-NEXT: call void @use_vec(<8 x i8> [[TMP1]]) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + call void @use_vec(<8 x i8> %1) + %2 = shufflevector <8 x i8> %1, <8 x i8> , <8 x i32> + ret <8 x i8> %2 +} + +; Multiple uses of RHS (constant) operand - does NOT compact +define <8 x i8> @shuffle_shuffle_multiple_users_constant(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_shuffle_multiple_users_constant( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <8 x i32> +; CHECK-NEXT: call void @use_vec(<8 x i8> ) +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + call void @use_vec(<8 x i8> ) + %2 = shufflevector <8 x i8> %1, <8 x i8> , <8 x i32> + ret <8 x i8> %2 +} + +; Interleaving non-compactible operand with constant operand - does NOT compact +define <8 x i8> @interleave_argument_constant(<8 x i8> %x) { +; CHECK-LABEL: @interleave_argument_constant( +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> , <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <8 x i8> %x, <8 x i8> , <8 x i32> + ret <8 x i8> %1 +} + +; Interleaving constant with non-compactible operand - does NOT compact +define <8 x i8> @interleave_constant_argument(<8 x i8> %x) { +; CHECK-LABEL: @interleave_constant_argument( +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i8> , <8 x i8> [[X:%.*]], <8 x i32> +; CHECK-NEXT: ret <8 x i8> [[TMP3]] +; + %1 = shufflevector <8 x i8> , <8 x i8> %x, <8 x i32> + ret <8 x i8> %1 +} + +; Different element type (f32) - SHOULD compact +define <8 x float> @shuffle_shuffle_constant_float(<4 x float> %x, <4 x float> %y) { +; CHECK-LABEL: @shuffle_shuffle_constant_float( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <5 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> , <8 x i32> +; CHECK-NEXT: ret <8 x float> [[TMP2]] +; + %1 = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> + %2 = shufflevector <8 x float> %1, <8 x float> , <8 x i32> + ret <8 x float> %2 +} + +; Values from the operands are duplicated by the shuffle - SHOULD compact +define <16 x i8> @shuffle_shuffle_constant_repeated(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_shuffle_constant_repeated( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <7 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <7 x i8> [[TMP1]], <7 x i8> , <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> %1, <8 x i8> , <16 x i32> + ret <16 x i8> %2 +} + +; Values from the operands are duplicated by the shuffle - SHOULD compact +define <16 x i8> @shuffle_constant_shuffle_repeated(<4 x i8> %x, <4 x i8> %y) { +; CHECK-LABEL: @shuffle_constant_shuffle_repeated( +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <7 x i32> +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <7 x i8> , <7 x i8> [[TMP1]], <16 x i32> +; CHECK-NEXT: ret <16 x i8> [[TMP2]] +; + %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> + %2 = shufflevector <8 x i8> , <8 x i8> %1, <16 x i32> + ret <16 x i8> %2 +} + +declare void @use_vec(<8 x i8>) diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll index 86fc5bbf72e7b..b9b552035fcbc 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle-inseltpoison.ll @@ -132,8 +132,8 @@ define <4 x i8> @test9(<16 x i8> %t6) { define <4 x i8> @test9a(<16 x i8> %t6) { ; CHECK-LABEL: @test9a( -; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> -; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <3 x i32> +; CHECK-NEXT: [[T9:%.*]] = shufflevector <3 x i8> [[T7]], <3 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T9]] ; %t7 = shufflevector <16 x i8> %t6, <16 x i8> poison, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > diff --git a/llvm/test/Transforms/InstCombine/vec_shuffle.ll b/llvm/test/Transforms/InstCombine/vec_shuffle.ll index 39f76f18b13ca..a937d98d4a660 100644 --- a/llvm/test/Transforms/InstCombine/vec_shuffle.ll +++ b/llvm/test/Transforms/InstCombine/vec_shuffle.ll @@ -122,8 +122,8 @@ define <4 x i8> @test9(<16 x i8> %t6) { define <4 x i8> @test9a(<16 x i8> %t6) { ; CHECK-LABEL: @test9a( -; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <4 x i32> -; CHECK-NEXT: [[T9:%.*]] = shufflevector <4 x i8> [[T7]], <4 x i8> poison, <4 x i32> +; CHECK-NEXT: [[T7:%.*]] = shufflevector <16 x i8> [[T6:%.*]], <16 x i8> poison, <3 x i32> +; CHECK-NEXT: [[T9:%.*]] = shufflevector <3 x i8> [[T7]], <3 x i8> poison, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[T9]] ; %t7 = shufflevector <16 x i8> %t6, <16 x i8> undef, <4 x i32> < i32 undef, i32 9, i32 4, i32 8 > diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll index de64bf2657f72..c7ee4cb165e0b 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll @@ -334,8 +334,7 @@ define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINSERT21]] ; %1 = extractelement <4 x float> %A, i32 2 @@ -344,7 +343,7 @@ define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B) %3 = extractelement <4 x float> %A, i32 3 %4 = extractelement <4 x float> %B, i32 3 %add2 = fadd float %3, %4 - %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 2 + %vecinsert1 = insertelement <4 x float> poison, float %sub2, i32 2 %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3 ret <4 x float> %vecinsert2 } @@ -374,8 +373,7 @@ define <4 x float> @test_addsub_v4f32_partial_12(<4 x float> %A, <4 x float> %B) ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[TMP4:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> -; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> , <4 x i32> +; CHECK-NEXT: [[VECINSERT21:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> ; CHECK-NEXT: ret <4 x float> [[VECINSERT21]] ; %1 = extractelement <4 x float> %A, i32 2 @@ -384,7 +382,7 @@ define <4 x float> @test_addsub_v4f32_partial_12(<4 x float> %A, <4 x float> %B) %3 = extractelement <4 x float> %A, i32 1 %4 = extractelement <4 x float> %B, i32 1 %add = fadd float %3, %4 - %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2 + %vecinsert1 = insertelement <4 x float> poison, float %sub, i32 2 %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1 ret <4 x float> %vecinsert2 } diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll index 63f8250b5f3de..1985fce6c3706 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hadd.ll @@ -132,8 +132,8 @@ define <8 x i16> @add_v8i16_76u43210(<8 x i16> %a, <8 x i16> %b) { ; SSE2-NEXT: [[TMP6:%.*]] = add <8 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HADD41:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[HADD6:%.*]] = shufflevector <8 x i16> [[HADD41]], <8 x i16> [[TMP2]], <8 x i32> -; SSE2-NEXT: [[HADD7:%.*]] = shufflevector <8 x i16> [[HADD6]], <8 x i16> [[TMP3]], <8 x i32> -; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HADD7]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[HADD7:%.*]] = shufflevector <8 x i16> [[HADD6]], <8 x i16> [[TMP3]], <7 x i32> +; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <7 x i16> [[HADD7]], <7 x i16> poison, <8 x i32> ; SSE2-NEXT: ret <8 x i16> [[RESULT]] ; ; SSE4-LABEL: @add_v8i16_76u43210( diff --git a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll index bbfe844400b0c..5389587379d23 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/hsub.ll @@ -132,8 +132,8 @@ define <8 x i16> @sub_v8i16_76u43210(<8 x i16> %a, <8 x i16> %b) { ; SSE2-NEXT: [[TMP6:%.*]] = sub <8 x i16> [[TMP4]], [[TMP5]] ; SSE2-NEXT: [[HSUB41:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> [[TMP6]], <8 x i32> ; SSE2-NEXT: [[HSUB6:%.*]] = shufflevector <8 x i16> [[HSUB41]], <8 x i16> [[TMP2]], <8 x i32> -; SSE2-NEXT: [[HSUB7:%.*]] = shufflevector <8 x i16> [[HSUB6]], <8 x i16> [[TMP3]], <8 x i32> -; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <8 x i16> [[HSUB7]], <8 x i16> poison, <8 x i32> +; SSE2-NEXT: [[HSUB7:%.*]] = shufflevector <8 x i16> [[HSUB6]], <8 x i16> [[TMP3]], <7 x i32> +; SSE2-NEXT: [[RESULT:%.*]] = shufflevector <7 x i16> [[HSUB7]], <7 x i16> poison, <8 x i32> ; SSE2-NEXT: ret <8 x i16> [[RESULT]] ; ; SSE4-LABEL: @sub_v8i16_76u43210(