Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
145 changes: 142 additions & 3 deletions llvm/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2893,6 +2893,145 @@ Instruction *InstCombinerImpl::simplifyBinOpSplats(ShuffleVectorInst &SVI) {
return new ShuffleVectorInst(NewBO, SVI.getShuffleMask());
}

/// Describes whether and how a shuffle operand can be compacted.
struct ShuffleOperandCompaction {
/// Whether this operand can be compacted (has a single use and is either
/// a constant or another shuffle instruction).
bool CanCompact;
/// Conservative heuristic: whether this operand's compaction justifies
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd suggest implementing this in VectorCombine, as the vector length is changed. In VectorCombine, you can use the cost model for a better heuristic.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1

/// the overall transformation (true for constants; false for shuffles).
bool ShouldCompact;
/// The minimal width required for the compacted vector.
unsigned CompactedWidth;
/// Function to create the compacted operand if the transformation applies.
std::function<Value *(unsigned, InstCombiner::BuilderTy &)> Apply;
};

/// Attempt to narrow/compact a constant vector used in a shuffle by removing
/// elements that are not referenced by the shuffle mask.
static ShuffleOperandCompaction
compactShuffleOperand(Constant *ShuffleInput,
MutableArrayRef<int> UserShuffleMask, int IndexStart) {
auto *VecTy = cast<FixedVectorType>(ShuffleInput->getType());
unsigned Width = VecTy->getNumElements();

// Collect only the constant elements that are actually used.
SmallVector<Constant *, 16> CompactedElts;
// Map from original element index to compacted index.
SmallVector<int, 16> IndexRemap(Width, -1);

for (int &MaskElt : UserShuffleMask) {
if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) {
int RelMaskElt = MaskElt - IndexStart;
if (IndexRemap[RelMaskElt] < 0) {
IndexRemap[RelMaskElt] = CompactedElts.size() + IndexStart;
CompactedElts.push_back(ShuffleInput->getAggregateElement(RelMaskElt));
}
MaskElt = IndexRemap[RelMaskElt];
}
}

return {true, true, static_cast<unsigned>(CompactedElts.size()),
[CompactedElts = std::move(CompactedElts),
VecTy](unsigned PaddedWidth,
InstCombiner::BuilderTy &Builder) -> Value * {
// Pad with poison to reach the requested width.
SmallVector<Constant *, 16> PaddedElts(CompactedElts);
while (PaddedElts.size() < PaddedWidth)
PaddedElts.push_back(PoisonValue::get(VecTy->getElementType()));

return ConstantVector::get(PaddedElts);
}};
}

/// Attempt to narrow/compact a shuffle instruction used in a shuffle by
/// removing elements that are not referenced by the shuffle mask.
static ShuffleOperandCompaction
compactShuffleOperand(ShuffleVectorInst *ShuffleInput,
MutableArrayRef<int> UserShuffleMask, int IndexStart) {
auto *VecTy = cast<FixedVectorType>(ShuffleInput->getType());
unsigned Width = VecTy->getNumElements();

// Collect only the shuffle mask elements that are actually used.
SmallVector<int, 16> CompactedMask;
// Map from original element index to compacted index.
SmallVector<int, 16> IndexRemap(Width, -1);

for (int &MaskElt : UserShuffleMask) {
if (MaskElt >= IndexStart && MaskElt < IndexStart + (int)Width) {
int RelMaskElt = MaskElt - IndexStart;
if (IndexRemap[RelMaskElt] < 0) {
IndexRemap[RelMaskElt] = CompactedMask.size() + IndexStart;
CompactedMask.push_back(ShuffleInput->getMaskValue(RelMaskElt));
}
MaskElt = IndexRemap[RelMaskElt];
}
}

return {true, false, static_cast<unsigned>(CompactedMask.size()),
[CompactedMask = std::move(CompactedMask),
ShuffleInput](unsigned PaddedWidth,
InstCombiner::BuilderTy &Builder) -> Value * {
// Pad with poison mask elements to reach the requested width.
SmallVector<int, 16> PaddedMask(CompactedMask);
while (PaddedMask.size() < PaddedWidth)
PaddedMask.push_back(PoisonMaskElem);

return Builder.CreateShuffleVector(ShuffleInput->getOperand(0),
ShuffleInput->getOperand(1),
PaddedMask);
}};
}

/// Try to narrow/compact a shuffle operand by eliminating elements that are
/// not used by the shuffle mask. This updates the shuffle mask in-place to
/// reflect the compaction. Returns information about whether compaction is
/// possible and a lambda to apply the compaction if beneficial.
static ShuffleOperandCompaction
compactShuffleOperand(Value *ShuffleInput, MutableArrayRef<int> ShuffleMask,
int IndexStart) {
if (ShuffleInput->getNumUses() > 1)
return {false, false, 0, nullptr};

if (auto *C = dyn_cast<Constant>(ShuffleInput))
return compactShuffleOperand(C, ShuffleMask, IndexStart);
if (auto *Shuf = dyn_cast<ShuffleVectorInst>(ShuffleInput))
return compactShuffleOperand(Shuf, ShuffleMask, IndexStart);

return {false, false, 0, nullptr};
}

/// Try to narrow the shuffle by eliminating unused elements from the operands.
static Instruction *tryCompactShuffleOperands(ShuffleVectorInst &SVI,
InstCombinerImpl &IC) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
ArrayRef<int> Mask = SVI.getShuffleMask();
unsigned LHSWidth = cast<FixedVectorType>(LHS->getType())->getNumElements();

SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
ShuffleOperandCompaction LHSCompact = compactShuffleOperand(LHS, NewMask, 0);
ShuffleOperandCompaction RHSCompact =
compactShuffleOperand(RHS, NewMask, LHSWidth);
if (LHSCompact.CanCompact && RHSCompact.CanCompact &&
(LHSCompact.ShouldCompact || RHSCompact.ShouldCompact)) {
unsigned CompactWidth =
std::max(LHSCompact.CompactedWidth, RHSCompact.CompactedWidth);
if (CompactWidth < LHSWidth) {
IC.replaceOperand(SVI, 0, LHSCompact.Apply(CompactWidth, IC.Builder));
IC.replaceOperand(SVI, 1, RHSCompact.Apply(CompactWidth, IC.Builder));
// Adjust RHS indices in the mask to account for the new LHS width.
for (int &MaskElt : NewMask)
if (MaskElt >= (int)LHSWidth)
MaskElt = MaskElt - LHSWidth + CompactWidth;
SVI.setShuffleMask(NewMask);
return &SVI;
}
}

return nullptr;
}

Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
Value *LHS = SVI.getOperand(0);
Value *RHS = SVI.getOperand(1);
Expand Down Expand Up @@ -3172,7 +3311,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (!match(RHSShuffle->getOperand(1), m_Poison()))
RHSShuffle = nullptr;
if (!LHSShuffle && !RHSShuffle)
return MadeChange ? &SVI : nullptr;
return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this);

Value* LHSOp0 = nullptr;
Value* LHSOp1 = nullptr;
Expand Down Expand Up @@ -3212,7 +3351,7 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
}

if (newLHS == LHS && newRHS == RHS)
return MadeChange ? &SVI : nullptr;
return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this);

ArrayRef<int> LHSMask;
ArrayRef<int> RHSMask;
Expand Down Expand Up @@ -3294,5 +3433,5 @@ Instruction *InstCombinerImpl::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
return new ShuffleVectorInst(newLHS, newRHS, newMask);
}

return MadeChange ? &SVI : nullptr;
return MadeChange ? &SVI : tryCompactShuffleOperands(SVI, *this);
}
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ define <2 x i16> @extract_elt32_v4i16_readfirstlane(<4 x i16> %src) {
; CHECK-SAME: <4 x i16> [[SRC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[SRC]], <4 x i16> poison, <2 x i32> <i32 2, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.amdgcn.readfirstlane.v2i16(<2 x i16> [[TMP1]])
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <4 x i32> <i32 poison, i32 poison, i32 0, i32 1>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i16> [[VEC]], <4 x i16> poison, <2 x i32> <i32 3, i32 2>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> <i32 1, i32 0>
; CHECK-NEXT: ret <2 x i16> [[SHUFFLE]]
;
%vec = call <4 x i16> @llvm.amdgcn.readfirstlane.v4i16(<4 x i16> %src)
Expand Down Expand Up @@ -287,8 +286,7 @@ define <2 x i32> @extract_elt13_v4i32_readfirstlane(<4 x i32> %src) {
; CHECK-SAME: <4 x i32> [[SRC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[SRC]], <4 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
%vec = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> %src)
Expand Down Expand Up @@ -328,8 +326,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1(i32 %src0,
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]])
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
%ins.0 = insertelement <4 x i32> poison, i32 %src0, i32 1
Expand Down Expand Up @@ -372,8 +369,7 @@ define < 2 x i32> @extract_elt13_v4i32_readfirstlane_source_simplify1_convergenc
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> poison, i32 [[SRC0]], i64 0
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 poison, i32 0>
; CHECK-NEXT: [[TMP3:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP2]]) [ "convergencectrl"(token [[T]]) ]
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <4 x i32> <i32 poison, i32 0, i32 poison, i32 2>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[VEC]], <4 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP3]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
%t = call token @llvm.experimental.convergence.entry()
Expand Down Expand Up @@ -413,8 +409,7 @@ define <2 x i32> @extract_elt13_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <3 x i32> <i32 1, i32 poison, i32 3>
; CHECK-NEXT: [[TMP2:%.*]] = call <3 x i32> @llvm.amdgcn.readfirstlane.v3i32(<3 x i32> [[TMP1]])
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 poison, i32 2, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <2 x i32> <i32 1, i32 3>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <3 x i32> [[TMP2]], <3 x i32> poison, <2 x i32> <i32 0, i32 2>
; CHECK-NEXT: ret <2 x i32> [[SHUFFLE]]
;
%vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
Expand All @@ -439,8 +434,7 @@ define <3 x i32> @extract_elt124_v8i32_readfirstlane(<8 x i32> %src) {
; CHECK-SAME: <8 x i32> [[SRC:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[SRC]], <8 x i32> poison, <4 x i32> <i32 1, i32 2, i32 poison, i32 4>
; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.amdgcn.readfirstlane.v4i32(<4 x i32> [[TMP1]])
; CHECK-NEXT: [[VEC:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 poison, i32 0, i32 1, i32 poison, i32 3, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i32> [[VEC]], <8 x i32> poison, <3 x i32> <i32 1, i32 2, i32 4>
; CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 3>
; CHECK-NEXT: ret <3 x i32> [[SHUFFLE]]
;
%vec = call <8 x i32> @llvm.amdgcn.readfirstlane.v8i32(<8 x i32> %src)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {

define <8 x i16> @pr26015(<4 x i16> %t0) {
; CHECK-LABEL: @pr26015(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 0, i16 poison>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <6 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <6 x i16> zeroinitializer, <6 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 6, i32 3, i32 4, i32 5, i32 7>
; CHECK-NEXT: ret <8 x i16> [[T5]]
;
%t1 = extractelement <4 x i16> %t0, i32 2
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/Transforms/InstCombine/insert-extract-shuffle.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,8 @@ define <8 x float> @widen_extract4(<8 x float> %ins, <2 x float> %ext) {

define <8 x i16> @pr26015(<4 x i16> %t0) {
; CHECK-LABEL: @pr26015(
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <8 x i32> <i32 poison, i32 poison, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <8 x i16> <i16 0, i16 0, i16 0, i16 poison, i16 0, i16 0, i16 0, i16 poison>, <8 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 10, i32 4, i32 5, i32 6, i32 11>
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i16> [[T0:%.*]], <4 x i16> poison, <6 x i32> <i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
; CHECK-NEXT: [[T5:%.*]] = shufflevector <6 x i16> zeroinitializer, <6 x i16> [[TMP1]], <8 x i32> <i32 0, i32 1, i32 2, i32 6, i32 3, i32 4, i32 5, i32 7>
; CHECK-NEXT: ret <8 x i16> [[T5]]
;
%t1 = extractelement <4 x i16> %t0, i32 2
Expand Down
Loading