Skip to content

Commit 96ed7cb

Browse files
committed
[InstCombine] VectorCombine Pass
1 parent 96ec17d commit 96ed7cb

File tree

2 files changed

+69
-19
lines changed

2 files changed

+69
-19
lines changed

llvm/lib/Transforms/Vectorize/VectorCombine.cpp

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ class VectorCombine {
111111
bool foldInsExtFNeg(Instruction &I);
112112
bool foldInsExtBinop(Instruction &I);
113113
bool foldInsExtVectorToShuffle(Instruction &I);
114+
bool foldBitOpOfBitcasts(Instruction &I);
114115
bool foldBitcastShuffle(Instruction &I);
115116
bool scalarizeBinopOrCmp(Instruction &I);
116117
bool scalarizeVPIntrinsic(Instruction &I);
@@ -801,6 +802,58 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) {
801802
return true;
802803
}
803804

805+
bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) {
806+
// Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y))
807+
auto *BinOp = dyn_cast<BinaryOperator>(&I);
808+
if (!BinOp || !BinOp->isBitwiseLogicOp())
809+
return false;
810+
811+
Value *LHS = BinOp->getOperand(0);
812+
Value *RHS = BinOp->getOperand(1);
813+
814+
// Both operands must be bitcasts
815+
auto *LHSCast = dyn_cast<BitCastInst>(LHS);
816+
auto *RHSCast = dyn_cast<BitCastInst>(RHS);
817+
if (!LHSCast || !RHSCast)
818+
return false;
819+
820+
Value *LHSSrc = LHSCast->getOperand(0);
821+
Value *RHSSrc = RHSCast->getOperand(0);
822+
823+
// Source types must match
824+
if (LHSSrc->getType() != RHSSrc->getType())
825+
return false;
826+
827+
// Only handle vector types
828+
auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->getType());
829+
auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
830+
if (!SrcVecTy || !DstVecTy)
831+
return false;
832+
833+
// Same total bit width
834+
if (SrcVecTy->getPrimitiveSizeInBits() != DstVecTy->getPrimitiveSizeInBits())
835+
return false;
836+
837+
// Cost check: prefer operations on narrower element types
838+
unsigned SrcEltBits = SrcVecTy->getScalarSizeInBits();
839+
unsigned DstEltBits = DstVecTy->getScalarSizeInBits();
840+
841+
// Prefer smaller element sizes (more elements, finer granularity)
842+
if (SrcEltBits > DstEltBits)
843+
return false;
844+
845+
// Create the operation on the source type
846+
Value *NewOp = Builder.CreateBinOp(BinOp->getOpcode(), LHSSrc, RHSSrc,
847+
BinOp->getName() + ".inner");
848+
if (auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
849+
NewBinOp->copyIRFlags(BinOp);
850+
851+
// Bitcast the result back
852+
Value *Result = Builder.CreateBitCast(NewOp, I.getType());
853+
replaceValue(I, *Result);
854+
return true;
855+
}
856+
804857
/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
805858
/// destination type followed by shuffle. This can enable further transforms by
806859
/// moving bitcasts or shuffles together.
@@ -3562,6 +3615,11 @@ bool VectorCombine::run() {
35623615
case Instruction::BitCast:
35633616
MadeChange |= foldBitcastShuffle(I);
35643617
break;
3618+
case Instruction::And:
3619+
case Instruction::Or:
3620+
case Instruction::Xor:
3621+
MadeChange |= foldBitOpOfBitcasts(I);
3622+
break;
35653623
default:
35663624
MadeChange |= shrinkType(I);
35673625
break;

llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -477,30 +477,22 @@ define <2 x i64> @PR66513(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %s
477477
; CHECK-LABEL: @PR66513(
478478
; CHECK-NEXT: [[I:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
479479
; CHECK-NEXT: [[CMP_I23:%.*]] = icmp sgt <4 x i32> [[I]], zeroinitializer
480-
; CHECK-NEXT: [[SEXT_I24:%.*]] = sext <4 x i1> [[CMP_I23]] to <4 x i32>
481-
; CHECK-NEXT: [[I1:%.*]] = bitcast <4 x i32> [[SEXT_I24]] to <2 x i64>
482480
; CHECK-NEXT: [[I2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32>
483481
; CHECK-NEXT: [[CMP_I21:%.*]] = icmp sgt <4 x i32> [[I2]], zeroinitializer
484-
; CHECK-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32>
485-
; CHECK-NEXT: [[I3:%.*]] = bitcast <4 x i32> [[SEXT_I22]] to <2 x i64>
486482
; CHECK-NEXT: [[I4:%.*]] = bitcast <2 x i64> [[C:%.*]] to <4 x i32>
487483
; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[I4]], zeroinitializer
488-
; CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
484+
; CHECK-NEXT: [[NARROW:%.*]] = select <4 x i1> [[CMP_I21]], <4 x i1> [[CMP_I23]], <4 x i1> zeroinitializer
485+
; CHECK-NEXT: [[XOR_I_INNER1:%.*]] = xor <4 x i1> [[NARROW]], [[CMP_I]]
486+
; CHECK-NEXT: [[NARROW3:%.*]] = select <4 x i1> [[CMP_I23]], <4 x i1> [[XOR_I_INNER1]], <4 x i1> zeroinitializer
487+
; CHECK-NEXT: [[AND_I25_INNER2:%.*]] = and <4 x i1> [[XOR_I_INNER1]], [[CMP_I21]]
488+
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SRC:%.*]] to <4 x i32>
489+
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[NARROW]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
490+
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
491+
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[NARROW3]], <4 x i32> [[TMP3]], <4 x i32> [[TMP2]]
492+
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
493+
; CHECK-NEXT: [[SEXT_I:%.*]] = select <4 x i1> [[AND_I25_INNER2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP4]]
489494
; CHECK-NEXT: [[I5:%.*]] = bitcast <4 x i32> [[SEXT_I]] to <2 x i64>
490-
; CHECK-NEXT: [[AND_I27:%.*]] = and <2 x i64> [[I3]], [[I1]]
491-
; CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i64> [[AND_I27]], [[I5]]
492-
; CHECK-NEXT: [[AND_I26:%.*]] = and <2 x i64> [[XOR_I]], [[I1]]
493-
; CHECK-NEXT: [[AND_I25:%.*]] = and <2 x i64> [[XOR_I]], [[I3]]
494-
; CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[AND_I27]], [[SRC:%.*]]
495-
; CHECK-NEXT: [[I6:%.*]] = bitcast <2 x i64> [[AND_I]] to <16 x i8>
496-
; CHECK-NEXT: [[I7:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
497-
; CHECK-NEXT: [[I8:%.*]] = bitcast <2 x i64> [[AND_I26]] to <16 x i8>
498-
; CHECK-NEXT: [[I9:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[I6]], <16 x i8> [[I7]], <16 x i8> [[I8]])
499-
; CHECK-NEXT: [[I12:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
500-
; CHECK-NEXT: [[I13:%.*]] = bitcast <2 x i64> [[AND_I25]] to <16 x i8>
501-
; CHECK-NEXT: [[I14:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[I9]], <16 x i8> [[I12]], <16 x i8> [[I13]])
502-
; CHECK-NEXT: [[I15:%.*]] = bitcast <16 x i8> [[I14]] to <2 x i64>
503-
; CHECK-NEXT: ret <2 x i64> [[I15]]
495+
; CHECK-NEXT: ret <2 x i64> [[I5]]
504496
;
505497
%i = bitcast <2 x i64> %a to <4 x i32>
506498
%cmp.i23 = icmp sgt <4 x i32> %i, zeroinitializer

0 commit comments

Comments
 (0)