Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions llvm/lib/Transforms/Vectorize/VectorCombine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class VectorCombine {
bool foldInsExtFNeg(Instruction &I);
bool foldInsExtBinop(Instruction &I);
bool foldInsExtVectorToShuffle(Instruction &I);
bool foldBitOpOfBitcasts(Instruction &I);
bool foldBitcastShuffle(Instruction &I);
bool scalarizeBinopOrCmp(Instruction &I);
bool scalarizeVPIntrinsic(Instruction &I);
Expand Down Expand Up @@ -801,6 +802,58 @@ bool VectorCombine::foldInsExtBinop(Instruction &I) {
return true;
}

bool VectorCombine::foldBitOpOfBitcasts(Instruction &I) {
// Match: bitop(bitcast(x), bitcast(y)) -> bitcast(bitop(x, y))
auto *BinOp = dyn_cast<BinaryOperator>(&I);
if (!BinOp || !BinOp->isBitwiseLogicOp())
return false;

Value *LHS = BinOp->getOperand(0);
Value *RHS = BinOp->getOperand(1);

// Both operands must be bitcasts
auto *LHSCast = dyn_cast<BitCastInst>(LHS);
auto *RHSCast = dyn_cast<BitCastInst>(RHS);
if (!LHSCast || !RHSCast)
return false;

Value *LHSSrc = LHSCast->getOperand(0);
Value *RHSSrc = RHSCast->getOperand(0);

// Source types must match
if (LHSSrc->getType() != RHSSrc->getType())
return false;

// Only handle vector types
auto *SrcVecTy = dyn_cast<FixedVectorType>(LHSSrc->getType());
auto *DstVecTy = dyn_cast<FixedVectorType>(I.getType());
if (!SrcVecTy || !DstVecTy)
return false;

// Same total bit width
if (SrcVecTy->getPrimitiveSizeInBits() != DstVecTy->getPrimitiveSizeInBits())
return false;

// Cost check: prefer operations on narrower element types
unsigned SrcEltBits = SrcVecTy->getScalarSizeInBits();
unsigned DstEltBits = DstVecTy->getScalarSizeInBits();

// Prefer smaller element sizes (more elements, finer granularity)
if (SrcEltBits > DstEltBits)
return false;

// Create the operation on the source type
Value *NewOp = Builder.CreateBinOp(BinOp->getOpcode(), LHSSrc, RHSSrc,
BinOp->getName() + ".inner");
if (auto *NewBinOp = dyn_cast<BinaryOperator>(NewOp))
NewBinOp->copyIRFlags(BinOp);

// Bitcast the result back
Value *Result = Builder.CreateBitCast(NewOp, I.getType());
replaceValue(I, *Result);
return true;
}

/// If this is a bitcast of a shuffle, try to bitcast the source vector to the
/// destination type followed by shuffle. This can enable further transforms by
/// moving bitcasts or shuffles together.
Expand Down Expand Up @@ -3562,6 +3615,11 @@ bool VectorCombine::run() {
case Instruction::BitCast:
MadeChange |= foldBitcastShuffle(I);
break;
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
MadeChange |= foldBitOpOfBitcasts(I);
break;
default:
MadeChange |= shrinkType(I);
break;
Expand Down
30 changes: 11 additions & 19 deletions llvm/test/Transforms/PhaseOrdering/X86/blendv-select.ll
Original file line number Diff line number Diff line change
Expand Up @@ -477,30 +477,22 @@ define <2 x i64> @PR66513(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %s
; CHECK-LABEL: @PR66513(
; CHECK-NEXT: [[I:%.*]] = bitcast <2 x i64> [[A:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP_I23:%.*]] = icmp sgt <4 x i32> [[I]], zeroinitializer
; CHECK-NEXT: [[SEXT_I24:%.*]] = sext <4 x i1> [[CMP_I23]] to <4 x i32>
; CHECK-NEXT: [[I1:%.*]] = bitcast <4 x i32> [[SEXT_I24]] to <2 x i64>
; CHECK-NEXT: [[I2:%.*]] = bitcast <2 x i64> [[B:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP_I21:%.*]] = icmp sgt <4 x i32> [[I2]], zeroinitializer
; CHECK-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32>
; CHECK-NEXT: [[I3:%.*]] = bitcast <4 x i32> [[SEXT_I22]] to <2 x i64>
; CHECK-NEXT: [[I4:%.*]] = bitcast <2 x i64> [[C:%.*]] to <4 x i32>
; CHECK-NEXT: [[CMP_I:%.*]] = icmp sgt <4 x i32> [[I4]], zeroinitializer
; CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32>
; CHECK-NEXT: [[NARROW:%.*]] = select <4 x i1> [[CMP_I21]], <4 x i1> [[CMP_I23]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[XOR_I_INNER1:%.*]] = xor <4 x i1> [[NARROW]], [[CMP_I]]
; CHECK-NEXT: [[NARROW3:%.*]] = select <4 x i1> [[CMP_I23]], <4 x i1> [[XOR_I_INNER1]], <4 x i1> zeroinitializer
; CHECK-NEXT: [[AND_I25_INNER2:%.*]] = and <4 x i1> [[XOR_I_INNER1]], [[CMP_I21]]
; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SRC:%.*]] to <4 x i32>
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[NARROW]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer
; CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i64> [[A]] to <4 x i32>
; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[NARROW3]], <4 x i32> [[TMP3]], <4 x i32> [[TMP2]]
; CHECK-NEXT: [[TMP5:%.*]] = bitcast <2 x i64> [[B]] to <4 x i32>
; CHECK-NEXT: [[SEXT_I:%.*]] = select <4 x i1> [[AND_I25_INNER2]], <4 x i32> [[TMP5]], <4 x i32> [[TMP4]]
; CHECK-NEXT: [[I5:%.*]] = bitcast <4 x i32> [[SEXT_I]] to <2 x i64>
; CHECK-NEXT: [[AND_I27:%.*]] = and <2 x i64> [[I3]], [[I1]]
; CHECK-NEXT: [[XOR_I:%.*]] = xor <2 x i64> [[AND_I27]], [[I5]]
; CHECK-NEXT: [[AND_I26:%.*]] = and <2 x i64> [[XOR_I]], [[I1]]
; CHECK-NEXT: [[AND_I25:%.*]] = and <2 x i64> [[XOR_I]], [[I3]]
; CHECK-NEXT: [[AND_I:%.*]] = and <2 x i64> [[AND_I27]], [[SRC:%.*]]
; CHECK-NEXT: [[I6:%.*]] = bitcast <2 x i64> [[AND_I]] to <16 x i8>
; CHECK-NEXT: [[I7:%.*]] = bitcast <2 x i64> [[A]] to <16 x i8>
; CHECK-NEXT: [[I8:%.*]] = bitcast <2 x i64> [[AND_I26]] to <16 x i8>
; CHECK-NEXT: [[I9:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[I6]], <16 x i8> [[I7]], <16 x i8> [[I8]])
; CHECK-NEXT: [[I12:%.*]] = bitcast <2 x i64> [[B]] to <16 x i8>
; CHECK-NEXT: [[I13:%.*]] = bitcast <2 x i64> [[AND_I25]] to <16 x i8>
; CHECK-NEXT: [[I14:%.*]] = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> [[I9]], <16 x i8> [[I12]], <16 x i8> [[I13]])
; CHECK-NEXT: [[I15:%.*]] = bitcast <16 x i8> [[I14]] to <2 x i64>
; CHECK-NEXT: ret <2 x i64> [[I15]]
; CHECK-NEXT: ret <2 x i64> [[I5]]
;
%i = bitcast <2 x i64> %a to <4 x i32>
%cmp.i23 = icmp sgt <4 x i32> %i, zeroinitializer
Expand Down
Loading