diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
index 3e06f74fa5c65..47ebe2ca24340 100644
--- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
+++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp
@@ -143,6 +143,7 @@ class VectorCombine {
   bool foldShufflesOfLengthChangingShuffles(Instruction &I);
   bool foldShuffleOfIntrinsics(Instruction &I);
   bool foldShuffleToIdentity(Instruction &I);
+  bool compactShuffleOperands(Instruction &I);
   bool foldShuffleFromReductions(Instruction &I);
   bool foldShuffleChainsToReduce(Instruction &I);
   bool foldCastFromReductions(Instruction &I);
@@ -2762,6 +2763,239 @@ bool VectorCombine::foldShuffleOfCastops(Instruction &I) {
   return true;
 }
 
+/// Describes whether and how a shuffle operand can be compacted.
+struct ShuffleOperandCompaction {
+  /// The cost difference between compacted and original operand. Used to avoid
+  /// compactions that increase cost. Zero if compaction cannot be applied, but
+  /// note that valid compactions may also have zero cost.
+  InstructionCost Cost;
+  /// The minimal width required for the compacted vector.
+  unsigned CompactedWidth;
+  /// Function to create the compacted operand, or nullptr if no compaction can
+  /// be applied.
+  std::function<Value *(unsigned, IRBuilder<InstSimplifyFolder> &)> Apply;
+};
+
+/// Attempt to narrow/compact a constant vector used in a shuffle by removing
+/// elements that are not referenced by the shuffle mask.
+static ShuffleOperandCompaction
+compactShuffleOperand(Constant *ShuffleInput,
+                      MutableArrayRef<int> UserShuffleMask, int IndexStart) {
+  auto *VecTy = cast<FixedVectorType>(ShuffleInput->getType());
+  unsigned Width = VecTy->getNumElements();
+
+  // Collect only the constant elements that are actually used.
+  SmallVector<Constant *, 16> CompactedElts;
+  // Map from original element index to compacted index.
+  SmallVector<int, 16> IndexRemap(Width, -1);
+
+  // Track whether used elements are already compacted at the front. Even if
+  // true, we may still shrink this operand by not re-adding trailing poison.
+  bool AlreadyCompacted = true;
+
+  // This modifies UserShuffleMask, so we cannot back out of transforming the
+  // operand while proceeding with compactShuffleOperands on the instruction.
+  for (int &MaskElt : UserShuffleMask) {
+    if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) {
+      int RelMaskElt = MaskElt - IndexStart;
+      if (IndexRemap[RelMaskElt] < 0) {
+        IndexRemap[RelMaskElt] = CompactedElts.size() + IndexStart;
+        CompactedElts.push_back(ShuffleInput->getAggregateElement(RelMaskElt));
+      }
+      if (IndexRemap[RelMaskElt] != MaskElt) {
+        AlreadyCompacted = false;
+        MaskElt = IndexRemap[RelMaskElt];
+      }
+    }
+  }
+
+  unsigned CompactedWidth = CompactedElts.size();
+
+  // To determine the eventual width (between CompactedWidth and Width), we have
+  // to consider the other operand. Hence, we return a functor here to delay.
+  return {0, CompactedWidth,
+          [ShuffleInput, AlreadyCompacted, Width, VecTy,
+           CompactedElts = std::move(CompactedElts)](
+              unsigned PaddedWidth,
+              IRBuilder<InstSimplifyFolder> &Builder) -> Value * {
+            // Return original if unchanged to guarantee fixpoint termination.
+            if (AlreadyCompacted && Width == PaddedWidth)
+              return ShuffleInput;
+
+            // Pad with poison to reach the requested width.
+            SmallVector<Constant *, 16> PaddedElts(CompactedElts);
+            while (PaddedElts.size() < PaddedWidth)
+              PaddedElts.push_back(PoisonValue::get(VecTy->getElementType()));
+
+            return ConstantVector::get(PaddedElts);
+          }};
+}
+
+/// Attempt to narrow/compact a shuffle instruction used in a shuffle by
+/// removing elements that are not referenced by the shuffle mask.
+static ShuffleOperandCompaction
+compactShuffleOperand(ShuffleVectorInst *ShuffleInput,
+                      MutableArrayRef<int> UserShuffleMask, int IndexStart,
+                      const TargetTransformInfo &TTI,
+                      TTI::TargetCostKind CostKind) {
+  auto *VecTy = cast<FixedVectorType>(ShuffleInput->getType());
+  unsigned Width = VecTy->getNumElements();
+
+  // Collect only the shuffle mask elements that are actually used.
+  SmallVector<int, 16> CompactedMask;
+  // Map from original element index to compacted index.
+  SmallVector<int, 16> IndexRemap(Width, -1);
+
+  // Track whether used elements are already compacted at the front. Even if
+  // true, we may still shrink this operand by not re-adding trailing poison.
+  bool AlreadyCompacted = true;
+
+  // This modifies UserShuffleMask, so we cannot back out of transforming the
+  // operand while proceeding with compactShuffleOperands on the instruction.
+  for (int &MaskElt : UserShuffleMask) {
+    if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) {
+      int RelMaskElt = MaskElt - IndexStart;
+      if (IndexRemap[RelMaskElt] < 0) {
+        IndexRemap[RelMaskElt] = CompactedMask.size() + IndexStart;
+        CompactedMask.push_back(ShuffleInput->getMaskValue(RelMaskElt));
+      }
+      if (IndexRemap[RelMaskElt] != MaskElt) {
+        AlreadyCompacted = false;
+        MaskElt = IndexRemap[RelMaskElt];
+      }
+    }
+  }
+
+  unsigned CompactedWidth = CompactedMask.size();
+
+  // Check if the compacted shuffle would be more expensive than the original.
+  InstructionCost CompactionCost(0);
+  if (!AlreadyCompacted) {
+    ArrayRef<int> OriginalMask = ShuffleInput->getShuffleMask();
+    auto *OriginalSrcTy =
+        cast<FixedVectorType>(ShuffleInput->getOperand(0)->getType());
+
+    InstructionCost OriginalCost =
+        TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, VecTy,
+                           OriginalSrcTy, OriginalMask, CostKind);
+
+    // Create a type for the compacted shuffle result.
+    auto *CompactedDstTy =
+        FixedVectorType::get(VecTy->getElementType(), CompactedWidth);
+
+    InstructionCost CompactedCost = TTI.getShuffleCost(
+        TargetTransformInfo::SK_PermuteTwoSrc, CompactedDstTy, OriginalSrcTy,
+        CompactedMask, CostKind);
+
+    CompactionCost = CompactedCost - OriginalCost;
+  }
+
+  // To determine the eventual width (between CompactedWidth and Width), we have
+  // to consider the other operand. Hence, we return a functor here to delay.
+  return {CompactionCost, CompactedWidth,
+          [ShuffleInput, AlreadyCompacted, Width,
+           CompactedMask = std::move(CompactedMask)](
+              unsigned PaddedWidth,
+              IRBuilder<InstSimplifyFolder> &Builder) -> Value * {
+            // Return original if unchanged to guarantee fixpoint termination.
+            if (AlreadyCompacted && Width == PaddedWidth)
+              return ShuffleInput;
+
+            // Pad with poison mask elements to reach the requested width.
+            SmallVector<int, 16> PaddedMask(CompactedMask);
+            while (PaddedMask.size() < PaddedWidth)
+              PaddedMask.push_back(PoisonMaskElem);
+
+            return Builder.CreateShuffleVector(ShuffleInput->getOperand(0),
+                                               ShuffleInput->getOperand(1),
+                                               PaddedMask);
+          }};
+}
+
+/// Try to narrow/compact a shuffle operand by eliminating elements that are
+/// not used by the shuffle mask. This updates the shuffle mask in-place to
+/// reflect the compaction. Returns information about whether compaction is
+/// possible and a lambda to apply the compaction if beneficial.
+static ShuffleOperandCompaction
+compactShuffleOperand(Value *ShuffleInput, MutableArrayRef<int> ShuffleMask,
+                      int IndexStart, const TargetTransformInfo &TTI,
+                      TTI::TargetCostKind CostKind) {
+  auto *VecTy = cast<FixedVectorType>(ShuffleInput->getType());
+  unsigned Width = VecTy->getNumElements();
+  if (ShuffleInput->getNumUses() > 1)
+    return {0, Width, nullptr};
+
+  if (auto *C = dyn_cast<Constant>(ShuffleInput))
+    return compactShuffleOperand(C, ShuffleMask, IndexStart);
+  if (auto *Shuf = dyn_cast<ShuffleVectorInst>(ShuffleInput))
+    return compactShuffleOperand(Shuf, ShuffleMask, IndexStart, TTI, CostKind);
+
+  return {0, Width, nullptr};
+}
+
+/// Try to narrow the shuffle by eliminating unused elements from the operands.
+bool VectorCombine::compactShuffleOperands(Instruction &I) {
+  Value *LHS, *RHS;
+  ArrayRef<int> Mask;
+  if (!match(&I, m_Shuffle(m_Value(LHS), m_Value(RHS), m_Mask(Mask))))
+    return false;
+
+  // Require at least one constant operand to ensure profitability.
+  if (!isa<Constant>(LHS) && !isa<Constant>(RHS))
+    return false;
+
+  auto *LHSTy = dyn_cast<FixedVectorType>(LHS->getType());
+  if (!LHSTy)
+    return false;
+
+  // Analyze both operands. This updates NewMask in-place to reflect compaction.
+  unsigned LHSWidth = LHSTy->getNumElements();
+  SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+  ShuffleOperandCompaction LHSCompact =
+      compactShuffleOperand(LHS, NewMask, 0, TTI, CostKind);
+  ShuffleOperandCompaction RHSCompact =
+      compactShuffleOperand(RHS, NewMask, LHSWidth, TTI, CostKind);
+
+  unsigned CompactedWidth =
+      std::max(LHSCompact.CompactedWidth, RHSCompact.CompactedWidth);
+
+  // Check total cost: compacting operands + change to outer shuffle.
+  if (LHSCompact.Apply || RHSCompact.Apply) {
+    auto *ShuffleDstTy = cast<FixedVectorType>(I.getType());
+    InstructionCost CostBefore =
+        TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
+                           LHSTy, Mask, CostKind, 0, nullptr, {LHS, RHS}, &I);
+
+    InstructionCost CostAfter =
+        TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, ShuffleDstTy,
+                           LHSTy, NewMask, CostKind);
+
+    InstructionCost OuterCost = CostAfter - CostBefore;
+
+    if (OuterCost + LHSCompact.Cost + RHSCompact.Cost > 0)
+      return false;
+  } else if (CompactedWidth == LHSWidth)
+    return false;
+
+  Value *NewLHS =
+      LHSCompact.Apply ? LHSCompact.Apply(CompactedWidth, Builder) : LHS;
+  Value *NewRHS =
+      RHSCompact.Apply ? RHSCompact.Apply(CompactedWidth, Builder) : RHS;
+
+  // Ensure we terminate from the optimization fixpoint loop eventually.
+  if (LHS == NewLHS && RHS == NewRHS)
+    return false;
+
+  // Adjust RHS indices in the mask to account for the new LHS width.
+  for (int &MaskElt : NewMask)
+    if (MaskElt >= (int)LHSWidth)
+      MaskElt = MaskElt - LHSWidth + CompactedWidth;
+
+  Value *NewShuf = Builder.CreateShuffleVector(NewLHS, NewRHS, NewMask);
+  replaceValue(I, *NewShuf);
+  return true;
+}
+
 /// Try to convert any of:
 /// "shuffle (shuffle x, y), (shuffle y, x)"
 /// "shuffle (shuffle x, undef), (shuffle y, undef)"
@@ -5034,6 +5268,8 @@ bool VectorCombine::run() {
           return true;
         if (foldShuffleToIdentity(I))
           return true;
+        if (compactShuffleOperands(I))
+          return true;
         break;
       case Instruction::Load:
         if (shrinkLoadForShuffles(I))
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
index de64bf2657f72..e3c1318278d38 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/addsub.ll
@@ -334,8 +334,7 @@ define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 2, i32 3>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> <float undef, float undef, float poison, float poison>, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
+; CHECK-NEXT:    [[VECINSERT21:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 3>
 ; CHECK-NEXT:    ret <4 x float> [[VECINSERT21]]
 ;
   %1 = extractelement <4 x float> %A, i32 2
@@ -344,7 +343,7 @@ define <4 x float> @test_addsub_v4f32_partial_23(<4 x float> %A, <4 x float> %B)
   %3 = extractelement <4 x float> %A, i32 3
   %4 = extractelement <4 x float> %B, i32 3
   %add2 = fadd float %3, %4
-  %vecinsert1 = insertelement <4 x float> undef, float %sub2, i32 2
+  %vecinsert1 = insertelement <4 x float> poison, float %sub2, i32 2
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add2, i32 3
   ret <4 x float> %vecinsert2
 }
@@ -353,8 +352,7 @@ define <4 x float> @test_addsub_v4f32_partial_03(<4 x float> %A, <4 x float> %B)
 ; CHECK-LABEL: @test_addsub_v4f32_partial_03(
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP:%.*]] = fsub <4 x float> [[A:%.*]], [[B:%.*]]
 ; CHECK-NEXT:    [[FOLDEXTEXTBINOP2:%.*]] = fadd <4 x float> [[A]], [[B]]
-; CHECK-NEXT:    [[VECINSERT1:%.*]] = shufflevector <4 x float> [[FOLDEXTEXTBINOP]], <4 x float> <float poison, float undef, float undef, float poison>, <4 x i32> <i32 0, i32 5, i32 6, i32 poison>
-; CHECK-NEXT:    [[VECINSERT2:%.*]] = shufflevector <4 x float> [[VECINSERT1]], <4 x float> [[FOLDEXTEXTBINOP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
+; CHECK-NEXT:    [[VECINSERT2:%.*]] = shufflevector <4 x float> [[FOLDEXTEXTBINOP]], <4 x float> [[FOLDEXTEXTBINOP2]], <4 x i32> <i32 0, i32 poison, i32 poison, i32 7>
 ; CHECK-NEXT:    ret <4 x float> [[VECINSERT2]]
 ;
   %1 = extractelement <4 x float> %A, i32 0
@@ -363,7 +361,7 @@ define <4 x float> @test_addsub_v4f32_partial_03(<4 x float> %A, <4 x float> %B)
   %3 = extractelement <4 x float> %A, i32 3
   %4 = extractelement <4 x float> %B, i32 3
   %add = fadd float %4, %3
-  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 0
+  %vecinsert1 = insertelement <4 x float> poison, float %sub, i32 0
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 3
   ret <4 x float> %vecinsert2
 }
@@ -374,8 +372,7 @@ define <4 x float> @test_addsub_v4f32_partial_12(<4 x float> %A, <4 x float> %B)
 ; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <2 x i32> <i32 1, i32 2>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <2 x float> [[TMP1]], [[TMP2]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = fsub <2 x float> [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 0, i32 3, i32 poison, i32 poison>
-; CHECK-NEXT:    [[VECINSERT21:%.*]] = shufflevector <4 x float> [[TMP5]], <4 x float> <float undef, float poison, float poison, float undef>, <4 x i32> <i32 4, i32 0, i32 1, i32 7>
+; CHECK-NEXT:    [[VECINSERT21:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> [[TMP4]], <4 x i32> <i32 poison, i32 0, i32 3, i32 poison>
 ; CHECK-NEXT:    ret <4 x float> [[VECINSERT21]]
 ;
   %1 = extractelement <4 x float> %A, i32 2
@@ -384,7 +381,7 @@ define <4 x float> @test_addsub_v4f32_partial_12(<4 x float> %A, <4 x float> %B)
   %3 = extractelement <4 x float> %A, i32 1
   %4 = extractelement <4 x float> %B, i32 1
   %add = fadd float %3, %4
-  %vecinsert1 = insertelement <4 x float> undef, float %sub, i32 2
+  %vecinsert1 = insertelement <4 x float> poison, float %sub, i32 2
   %vecinsert2 = insertelement <4 x float> %vecinsert1, float %add, i32 1
   ret <4 x float> %vecinsert2
 }
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
index c5f56d3644c5f..6370e9ccb50db 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/fmaddsub.ll
@@ -419,11 +419,11 @@ define <8 x double> @buildvector_mul_addsub_pd512_partial(<8 x double> %C, <8 x
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <2 x i32> <i32 1, i32 3>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
 ; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <6 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7>
 ; SSE-NEXT:    [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7
 ; SSE-NEXT:    [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7
 ; SSE-NEXT:    [[ADD7:%.*]] = fadd double [[A7]], [[B7]]
-; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> <double undef, double poison, double poison, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 poison>
+; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 0, i32 6, i32 1, i32 7, i32 2, i32 3>
+; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> <double undef, double poison, double poison, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 5, i32 poison>
 ; SSE-NEXT:    [[VECINSERT8:%.*]] = insertelement <8 x double> [[TMP8]], double [[ADD7]], i64 7
 ; SSE-NEXT:    ret <8 x double> [[VECINSERT8]]
 ;
@@ -934,11 +934,11 @@ define <8 x double> @buildvector_mul_subadd_pd512_partial(<8 x double> %C, <8 x
 ; SSE-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[TMP3]], <8 x double> poison, <2 x i32> <i32 1, i32 3>
 ; SSE-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison>
 ; SSE-NEXT:    [[TMP6:%.*]] = shufflevector <2 x double> [[TMP4]], <2 x double> poison, <6 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison>
-; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7>
 ; SSE-NEXT:    [[A7:%.*]] = extractelement <8 x double> [[A]], i64 7
 ; SSE-NEXT:    [[B7:%.*]] = extractelement <8 x double> [[B]], i64 7
 ; SSE-NEXT:    [[ADD7:%.*]] = fsub double [[A7]], [[B7]]
-; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> <double undef, double poison, double poison, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 poison>
+; SSE-NEXT:    [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 0, i32 6, i32 1, i32 7, i32 2, i32 3>
+; SSE-NEXT:    [[TMP8:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> <double undef, double poison, double poison, double poison, double poison, double poison>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 5, i32 poison>
 ; SSE-NEXT:    [[VECINSERT8:%.*]] = insertelement <8 x double> [[TMP8]], double [[ADD7]], i64 7
 ; SSE-NEXT:    ret <8 x double> [[VECINSERT8]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
index 7ffd0d29b4f05..5de2bb6515e15 100644
--- a/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shuffletoidentity.ll
@@ -1026,9 +1026,8 @@ define <4 x i64> @bitcast_smax_v8i32_v4i32(<4 x i64> %a, <4 x i64> %b) {
 define void @bitcast_srcty_mismatch() {
 ; CHECK-LABEL: @bitcast_srcty_mismatch(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SHUFFLE_I_I:%.*]] = shufflevector <2 x i64> zeroinitializer, <2 x i64> zeroinitializer, <2 x i32> <i32 1, i32 3>
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> zeroinitializer to <4 x float>
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> [[SHUFFLE_I_I]] to <4 x float>
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> zeroinitializer to <4 x float>
 ; CHECK-NEXT:    [[SHUFP_I196:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> [[TMP1]], <4 x i32> <i32 2, i32 1, i32 4, i32 7>
 ; CHECK-NEXT:    store <4 x float> [[SHUFP_I196]], ptr null, align 16
 ; CHECK-NEXT:    ret void
@@ -1064,8 +1063,8 @@ entry:
 define <16 x i64> @operandbundles(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) {
 ; CHECK-LABEL: @operandbundles(
 ; CHECK-NEXT:    [[CALL:%.*]] = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> [[A:%.*]], <4 x i64> [[B:%.*]], <4 x i64> [[C:%.*]]) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
-; CHECK-NEXT:    [[SHUFFLEVECTOR:%.*]] = shufflevector <4 x i64> [[CALL]], <4 x i64> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[SHUFFLEVECTOR1:%.*]] = shufflevector <16 x i64> [[SHUFFLEVECTOR]], <16 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT:    [[SHUFFLEVECTOR:%.*]] = shufflevector <4 x i64> [[CALL]], <4 x i64> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[SHUFFLEVECTOR1:%.*]] = shufflevector <12 x i64> [[TMP1]], <12 x i64> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
 ; CHECK-NEXT:    ret <16 x i64> [[SHUFFLEVECTOR1]]
 ;
   %call = call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %a, <4 x i64> %b, <4 x i64> %c) [ "jl_roots"(ptr addrspace(10) null, ptr addrspace(10) null) ]
diff --git a/llvm/test/Transforms/VectorCombine/AArch64/shufflevec-compact-operands.ll b/llvm/test/Transforms/VectorCombine/AArch64/shufflevec-compact-operands.ll
new file mode 100644
index 0000000000000..7141808658ad1
--- /dev/null
+++ b/llvm/test/Transforms/VectorCombine/AArch64/shufflevec-compact-operands.ll
@@ -0,0 +1,212 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=vector-combine %s -S -o - | FileCheck %s
+
+target triple = "aarch64"
+
+; Interleaving splat shuffle with constant operand - SHOULD compact
+define <8 x i8> @interleave_splat_constant(i8 %x) {
+; CHECK-LABEL: @interleave_splat_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> <i8 0, i8 1, i8 2, i8 3>, <4 x i8> [[TMP2]], <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = insertelement <4 x i8> poison, i8 %x, i32 0
+  %2 = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
+  %3 = shufflevector <4 x i8> %2, <4 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison>
+  %4 = shufflevector <8 x i8> <i8 poison, i8 0, i8 poison, i8 1, i8 poison, i8 2, i8 poison, i8 3>, <8 x i8> %3, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
+  ret <8 x i8> %4
+}
+
+; Interleaving constant with splat shuffle operand - SHOULD compact
+define <8 x i8> @interleave_constant_splat(i8 %x) {
+; CHECK-LABEL: @interleave_constant_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i32 0
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> <i8 0, i8 1, i8 2, i8 3>, <4 x i8> [[TMP2]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = insertelement <4 x i8> poison, i8 %x, i32 0
+  %2 = shufflevector <4 x i8> %1, <4 x i8> poison, <4 x i32> zeroinitializer
+  %3 = shufflevector <4 x i8> %2, <4 x i8> poison, <8 x i32> <i32 0, i32 poison, i32 1, i32 poison, i32 2, i32 poison, i32 3, i32 poison>
+  %4 = shufflevector <8 x i8> <i8 0, i8 poison, i8 1, i8 poison, i8 2, i8 poison, i8 3, i8 poison>, <8 x i8> %3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %4
+}
+
+; Interleaving random shuffle with constant operand - SHOULD compact
+define <8 x i8> @interleave_shuffle_constant(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @interleave_shuffle_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 7, i32 1, i32 3, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> [[TMP1]], <4 x i8> <i8 0, i8 1, i8 2, i8 3>, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> %1, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %2
+}
+
+; Interleaving constant operand with random shuffle - SHOULD compact
+define <8 x i8> @interleave_constant_shuffle(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @interleave_constant_shuffle(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <4 x i32> <i32 7, i32 1, i32 3, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i8> <i8 0, i8 1, i8 2, i8 3>, <4 x i8> [[TMP1]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %2
+}
+
+; Randomly shuffle random shuffle with constant operand - SHOULD compact
+define <8 x i8> @shuffle_shuffle_constant(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle_shuffle_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <5 x i32> <i32 5, i32 6, i32 7, i32 4, i32 2>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <5 x i8> [[TMP1]], <5 x i8> <i8 2, i8 3, i8 9, i8 poison, i8 poison>, <8 x i32> <i32 0, i32 1, i32 5, i32 2, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> %1, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %2
+}
+
+; Randomly shuffle constant operand with random shuffle - SHOULD compact
+define <8 x i8> @shuffle_constant_shuffle(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle_constant_shuffle(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <5 x i32> <i32 3, i32 2, i32 4, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <5 x i8> <i8 9, i8 9, i8 0, i8 9, i8 3>, <5 x i8> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 5, i32 2, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %1, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %2
+}
+
+; Randomly shuffle interleave shuffle with constant operand - does NOT compact
+define <8 x i8> @shuffle_interleave_constant(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: @shuffle_interleave_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> [[Y:%.*]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <8 x i8> %x, <8 x i8> %y, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  %2 = shufflevector <8 x i8> %1, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %2
+}
+
+; Randomly shuffle constant operand with interleave shuffle - does NOT compact
+define <8 x i8> @shuffle_constant_interleave(<8 x i8> %x, <8 x i8> %y) {
+; CHECK-LABEL: @shuffle_constant_interleave(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> [[Y:%.*]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> [[TMP1]], <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+; CHECK-NEXT:    ret <8 x i8> [[TMP2]]
+;
+  %1 = shufflevector <8 x i8> %x, <8 x i8> %y, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+  %2 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %1, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %2
+}
+
+; Both operands are shuffles - does NOT compact
+define <8 x i32> @interleave_shuffle_shuffle(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
+; CHECK-LABEL: @interleave_shuffle_shuffle(
+; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[A:%.*]], <4 x i32> [[C:%.*]], <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[B:%.*]], <4 x i32> [[C]], <8 x i32> <i32 4, i32 7, i32 0, i32 5, i32 2, i32 1, i32 6, i32 3>
+; CHECK-NEXT:    [[RESULT:%.*]] = shufflevector <8 x i32> [[S1]], <8 x i32> [[S2]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; CHECK-NEXT:    ret <8 x i32> [[RESULT]]
+;
+  %s1 = shufflevector <4 x i32> %a, <4 x i32> %c, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %s2 = shufflevector <4 x i32> %b, <4 x i32> %c, <8 x i32> <i32 4, i32 7, i32 0, i32 5, i32 2, i32 1, i32 6, i32 3>
+  %result = shufflevector <8 x i32> %s1, <8 x i32> %s2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i32> %result
+}
+
+; Multiple uses of LHS (shufflevector) operand - does NOT compact
+define <8 x i8> @shuffle_multiple_users_shuffle_constant(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle_multiple_users_shuffle_constant(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+; CHECK-NEXT:    call void @use_vec(<8 x i8> [[TMP1]])
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> [[TMP1]], <8 x i8> <i8 2, i8 3, i8 9, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <8 x i32> <i32 7, i32 3, i32 8, i32 0, i32 9, i32 10, i32 1, i32 6>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  call void @use_vec(<8 x i8> %1)
+  %2 = shufflevector <8 x i8> %1, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %2
+}
+
+; Interleaving non-compactible operand with constant operand - does NOT compact
+define <8 x i8> @interleave_argument_constant(<8 x i8> %x) {
+; CHECK-LABEL: @interleave_argument_constant(
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <8 x i8> %x, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %1
+}
+
+; Interleaving constant operand with non-compactible operand - does NOT compact
+define <8 x i8> @interleave_constant_argument(<8 x i8> %x) {
+; CHECK-LABEL: @interleave_constant_argument(
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> [[X:%.*]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %x, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+  ret <8 x i8> %1
+}
+
+; Randomly shuffle non-compactible operand with constant operand - SHOULD compact
+define <8 x i8> @shuffle_argument_constant(<8 x i8> %x) {
+; CHECK-LABEL: @shuffle_argument_constant(
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> [[X:%.*]], <8 x i8> <i8 2, i8 3, i8 9, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <8 x i32> <i32 7, i32 3, i32 8, i32 0, i32 9, i32 10, i32 1, i32 6>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <8 x i8> %x, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %1
+}
+
+; Randomly shuffle constant operand with non-compactible operand - SHOULD compact
+define <8 x i8> @shuffle_constant_argument(<8 x i8> %x) {
+; CHECK-LABEL: @shuffle_constant_argument(
+; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i8> <i8 9, i8 9, i8 0, i8 9, i8 3, i8 poison, i8 poison, i8 poison>, <8 x i8> [[X:%.*]], <8 x i32> <i32 0, i32 1, i32 12, i32 2, i32 14, i32 9, i32 3, i32 4>
+; CHECK-NEXT:    ret <8 x i8> [[TMP3]]
+;
+  %1 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %x, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x i8> %1
+}
+
+; Different element type (f32) - SHOULD compact
+define <8 x float> @shuffle_shuffle_constant_float(<4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: @shuffle_shuffle_constant_float(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]], <5 x i32> <i32 5, i32 6, i32 7, i32 4, i32 2>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> <float 3.750000e+00, float 2.500000e-01, float 2.500000e+00, float poison, float poison>, <8 x i32> <i32 0, i32 1, i32 5, i32 2, i32 6, i32 7, i32 3, i32 4>
+; CHECK-NEXT:    ret <8 x float> [[TMP2]]
+;
+  %1 = shufflevector <4 x float> %x, <4 x float> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x float> %1, <8 x float> <float 1.0, float 2.5, float 0.5, float 4.0, float 3.75, float 8.0, float 0.25, float 6.5>, <8 x i32> <i32 7, i32 3, i32 12, i32 0, i32 14, i32 9, i32 1, i32 6>
+  ret <8 x float> %2
+}
+
+; Values from the operands are duplicated by the shuffle - SHOULD compact
+define <16 x i8> @shuffle_shuffle_constant_repeated(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle_shuffle_constant_repeated(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <7 x i32> <i32 6, i32 5, i32 7, i32 2, i32 4, i32 3, i32 1>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <7 x i8> [[TMP1]], <7 x i8> <i8 2, i8 9, i8 9, i8 poison, i8 poison, i8 poison, i8 poison>, <16 x i32> <i32 0, i32 1, i32 0, i32 7, i32 8, i32 2, i32 9, i32 1, i32 3, i32 3, i32 4, i32 7, i32 5, i32 8, i32 6, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> %1, <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <16 x i32> <i32 3, i32 7, i32 3, i32 12, i32 9, i32 0, i32 15, i32 7, i32 6, i32 6, i32 1, i32 12, i32 4, i32 9, i32 2, i32 3>
+  ret <16 x i8> %2
+}
+
+; Values from the operands are duplicated by the shuffle - SHOULD compact
+define <16 x i8> @shuffle_constant_shuffle_repeated(<4 x i8> %x, <4 x i8> %y) {
+; CHECK-LABEL: @shuffle_constant_shuffle_repeated(
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[X:%.*]], <4 x i8> [[Y:%.*]], <7 x i32> <i32 3, i32 4, i32 5, i32 poison, i32 poison, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <7 x i8> <i8 9, i8 9, i8 0, i8 3, i8 9, i8 2, i8 1>, <7 x i8> [[TMP1]], <16 x i32> <i32 0, i32 1, i32 0, i32 7, i32 8, i32 2, i32 9, i32 1, i32 3, i32 3, i32 4, i32 7, i32 5, i32 8, i32 6, i32 0>
+; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+;
+  %1 = shufflevector <4 x i8> %x, <4 x i8> %y, <8 x i32> <i32 7, i32 4, i32 1, i32 6, i32 3, i32 0, i32 2, i32 5>
+  %2 = shufflevector <8 x i8> <i8 0, i8 9, i8 1, i8 9, i8 2, i8 9, i8 3, i8 9>, <8 x i8> %1, <16 x i32> <i32 3, i32 7, i32 3, i32 12, i32 9, i32 0, i32 15, i32 7, i32 6, i32 6, i32 1, i32 12, i32 4, i32 9, i32 2, i32 3>
+  ret <16 x i8> %2
+}
+
+declare void @use_vec(<8 x i8>)
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
index 4c1ca82b2bd06..c3513c478f065 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-binop.ll
@@ -450,16 +450,10 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
 ; CHECK-LABEL: @PR34724(
 ; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
 ; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
-; CHECK-NEXT:    [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison>
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]]
-; CHECK-NEXT:    [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]]
-; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2>
+; CHECK-NEXT:    [[SHIFT2:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B1:%.*]], <4 x i32> <i32 poison, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[B:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B1]], <4 x i32> <i32 poison, i32 3, i32 5, i32 7>
 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]]
-; CHECK-NEXT:    [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 6, i32 7>
-; CHECK-NEXT:    [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT:    [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x float> [[V3]]
+; CHECK-NEXT:    ret <4 x float> [[TMP3]]
 ;
   %a0 = extractelement <4 x float> %a, i32 0
   %a1 = extractelement <4 x float> %a, i32 1
@@ -475,7 +469,7 @@ define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) {
   %b01 = fadd float %b0, %b1
   %b23 = fadd float %b2, %b3
 
-  %v1 = insertelement <4 x float> undef, float %a23, i32 1
+  %v1 = insertelement <4 x float> poison, float %a23, i32 1
   %v2 = insertelement <4 x float> %v1, float %b01, i32 2
   %v3 = insertelement <4 x float> %v2, float %b23, i32 3
   ret <4 x float> %v3
diff --git a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
index 228f161698bb2..51d608096398e 100644
--- a/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/extract-insert-poison.ll
@@ -67,10 +67,14 @@ define <4 x double> @src_ins0_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b)
 }
 
 define <4 x double> @src_ins1_v4f64_ext1_v2f64(<4 x double> %a, <2 x double> %b) #0 {
-; CHECK-LABEL: @src_ins1_v4f64_ext1_v2f64(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT:    ret <4 x double> [[INS]]
+; SSE-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; SSE-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT:    [[INS:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; SSE-NEXT:    ret <4 x double> [[INS]]
+;
+; AVX-LABEL: @src_ins1_v4f64_ext1_v2f64(
+; AVX-NEXT:    [[INS:%.*]] = shufflevector <2 x double> [[B:%.*]], <2 x double> poison, <4 x i32> <i32 poison, i32 1, i32 poison, i32 poison>
+; AVX-NEXT:    ret <4 x double> [[INS]]
 ;
   %ext = extractelement <2 x double> %b, i32 1
   %ins = insertelement <4 x double> poison, double %ext, i32 1
diff --git a/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
index 5373f6c07be31..fd099957791e3 100644
--- a/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/permute-of-binops.ll
@@ -54,7 +54,7 @@ define <4 x double> @fadd_v4f64_mixed_types(<4 x double> %a, <2 x double> %b) {
 define <4 x float> @fadd_v4f32_mixed_types(<4 x float> %a0) {
 ; CHECK-LABEL: define <4 x float> @fadd_v4f32_mixed_types(
 ; CHECK-SAME: <4 x float> [[A0:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> zeroinitializer, <4 x i32> <i32 1, i32 5, i32 poison, i32 poison>
+; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[A0]], <4 x float> <float 0.000000e+00, float poison, float poison, float poison>, <4 x i32> <i32 1, i32 4, i32 poison, i32 poison>
 ; CHECK-NEXT:    [[POST:%.*]] = fmul <4 x float> [[TMP1]], <float 0.000000e+00, float 0.000000e+00, float undef, float undef>
 ; CHECK-NEXT:    ret <4 x float> [[POST]]
 ;
diff --git a/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll b/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll
index a0945ab81b0f7..102fc898a8b3d 100644
--- a/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll
+++ b/llvm/test/Transforms/VectorCombine/X86/reduction-two-vecs-combine.ll
@@ -5,8 +5,7 @@
 define i16 @test_spill_mixed() {
 ; CHECK-LABEL: define i16 @test_spill_mixed() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <4 x i32> <i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
+; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> zeroinitializer)
 ; CHECK-NEXT:    ret i16 0
 ;
 entry: