-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[IA] Use a single callback for lowerDeinterleaveIntrinsic [nfc] #148978
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -234,53 +234,100 @@ bool RISCVTargetLowering::lowerInterleavedStore(StoreInst *SI, | |
| return true; | ||
| } | ||
|
|
||
| static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { | ||
| assert(N); | ||
| if (N == 1) | ||
| return true; | ||
|
|
||
| using namespace PatternMatch; | ||
| // Right now we're only recognizing the simplest pattern. | ||
| uint64_t C; | ||
| if (match(V, m_CombineOr(m_ConstantInt(C), | ||
| m_c_Mul(m_Value(), m_ConstantInt(C)))) && | ||
| C && C % N == 0) | ||
| return true; | ||
|
|
||
| if (isPowerOf2_32(N)) { | ||
| KnownBits KB = llvm::computeKnownBits(V, DL); | ||
| return KB.countMinTrailingZeros() >= Log2_32(N); | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( | ||
| LoadInst *LI, ArrayRef<Value *> DeinterleaveValues) const { | ||
| Instruction *Load, Value *Mask, | ||
| ArrayRef<Value *> DeinterleaveValues) const { | ||
| const unsigned Factor = DeinterleaveValues.size(); | ||
| if (Factor > 8) | ||
| return false; | ||
|
|
||
| assert(LI->isSimple()); | ||
| IRBuilder<> Builder(LI); | ||
| IRBuilder<> Builder(Load); | ||
|
|
||
| Value *FirstActive = | ||
| *llvm::find_if(DeinterleaveValues, [](Value *V) { return V != nullptr; }); | ||
| VectorType *ResVTy = cast<VectorType>(FirstActive->getType()); | ||
|
|
||
| const DataLayout &DL = LI->getDataLayout(); | ||
| const DataLayout &DL = Load->getDataLayout(); | ||
| auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen()); | ||
|
|
||
| if (!isLegalInterleavedAccessType(ResVTy, Factor, LI->getAlign(), | ||
| LI->getPointerAddressSpace(), DL)) | ||
| Value *Ptr, *VL; | ||
| Align Alignment; | ||
| if (auto *LI = dyn_cast<LoadInst>(Load)) { | ||
| assert(LI->isSimple()); | ||
| Ptr = LI->getPointerOperand(); | ||
| Alignment = LI->getAlign(); | ||
| assert(!Mask && "Unexpected mask on a load\n"); | ||
| Mask = Builder.getAllOnesMask(ResVTy->getElementCount()); | ||
| VL = isa<FixedVectorType>(ResVTy) | ||
| ? Builder.CreateElementCount(XLenTy, ResVTy->getElementCount()) | ||
| : Constant::getAllOnesValue(XLenTy); | ||
| } else { | ||
| auto *VPLoad = cast<VPIntrinsic>(Load); | ||
| assert(VPLoad->getIntrinsicID() == Intrinsic::vp_load && | ||
| "Unexpected intrinsic"); | ||
| Ptr = VPLoad->getMemoryPointerParam(); | ||
| Alignment = VPLoad->getPointerAlignment().value_or( | ||
| DL.getABITypeAlign(ResVTy->getElementType())); | ||
|
|
||
| assert(Mask && "vp.load needs a mask!"); | ||
|
|
||
| Value *WideEVL = VPLoad->getVectorLengthParam(); | ||
| // Conservatively check if EVL is a multiple of factor, otherwise some | ||
| // (trailing) elements might be lost after the transformation. | ||
| if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor)) | ||
| return false; | ||
|
|
||
| VL = Builder.CreateZExt( | ||
| Builder.CreateUDiv(WideEVL, | ||
| ConstantInt::get(WideEVL->getType(), Factor)), | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be an exact udiv?
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It probably should be, but doing this revealed a deeper problem in the EVL recognition. We're not checking for overflow in the multiply check, and I believe that to be unsound. I am going to return to this, but want to get a few things off my queue first. |
||
| XLenTy); | ||
| } | ||
|
|
||
| Type *PtrTy = Ptr->getType(); | ||
| unsigned AS = PtrTy->getPointerAddressSpace(); | ||
| if (!isLegalInterleavedAccessType(ResVTy, Factor, Alignment, AS, DL)) | ||
| return false; | ||
|
|
||
| Value *Return; | ||
| Type *PtrTy = LI->getPointerOperandType(); | ||
| Type *XLenTy = Type::getIntNTy(LI->getContext(), Subtarget.getXLen()); | ||
|
|
||
| if (isa<FixedVectorType>(ResVTy)) { | ||
| Value *VL = Builder.CreateElementCount(XLenTy, ResVTy->getElementCount()); | ||
| Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount()); | ||
| Return = Builder.CreateIntrinsic(FixedVlsegIntrIds[Factor - 2], | ||
| {ResVTy, PtrTy, XLenTy}, | ||
| {LI->getPointerOperand(), Mask, VL}); | ||
| {ResVTy, PtrTy, XLenTy}, {Ptr, Mask, VL}); | ||
| } else { | ||
| unsigned SEW = DL.getTypeSizeInBits(ResVTy->getElementType()); | ||
| unsigned NumElts = ResVTy->getElementCount().getKnownMinValue(); | ||
| Type *VecTupTy = TargetExtType::get( | ||
| LI->getContext(), "riscv.vector.tuple", | ||
| ScalableVectorType::get(Type::getInt8Ty(LI->getContext()), | ||
| Load->getContext(), "riscv.vector.tuple", | ||
| ScalableVectorType::get(Type::getInt8Ty(Load->getContext()), | ||
| NumElts * SEW / 8), | ||
| Factor); | ||
| Value *VL = Constant::getAllOnesValue(XLenTy); | ||
| Value *Mask = Builder.getAllOnesMask(ResVTy->getElementCount()); | ||
|
|
||
| Function *VlsegNFunc = Intrinsic::getOrInsertDeclaration( | ||
| LI->getModule(), ScalableVlsegIntrIds[Factor - 2], | ||
| Load->getModule(), ScalableVlsegIntrIds[Factor - 2], | ||
| {VecTupTy, PtrTy, Mask->getType(), VL->getType()}); | ||
|
|
||
| Value *Operands[] = { | ||
| PoisonValue::get(VecTupTy), | ||
| LI->getPointerOperand(), | ||
| Ptr, | ||
| Mask, | ||
| VL, | ||
| ConstantInt::get(XLenTy, | ||
|
|
@@ -290,7 +337,7 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad( | |
| CallInst *Vlseg = Builder.CreateCall(VlsegNFunc, Operands); | ||
|
|
||
| SmallVector<Type *, 2> AggrTypes{Factor, ResVTy}; | ||
| Return = PoisonValue::get(StructType::get(LI->getContext(), AggrTypes)); | ||
| Return = PoisonValue::get(StructType::get(Load->getContext(), AggrTypes)); | ||
| for (unsigned i = 0; i < Factor; ++i) { | ||
| Value *VecExtract = Builder.CreateIntrinsic( | ||
| Intrinsic::riscv_tuple_extract, {ResVTy, VecTupTy}, | ||
|
|
@@ -370,27 +417,6 @@ bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore( | |
| return true; | ||
| } | ||
|
|
||
| static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) { | ||
| assert(N); | ||
| if (N == 1) | ||
| return true; | ||
|
|
||
| using namespace PatternMatch; | ||
| // Right now we're only recognizing the simplest pattern. | ||
| uint64_t C; | ||
| if (match(V, m_CombineOr(m_ConstantInt(C), | ||
| m_c_Mul(m_Value(), m_ConstantInt(C)))) && | ||
| C && C % N == 0) | ||
| return true; | ||
|
|
||
| if (isPowerOf2_32(N)) { | ||
| KnownBits KB = llvm::computeKnownBits(V, DL); | ||
| return KB.countMinTrailingZeros() >= Log2_32(N); | ||
| } | ||
|
|
||
| return false; | ||
| } | ||
|
|
||
| /// Lower an interleaved vp.load into a vlsegN intrinsic. | ||
| /// | ||
| /// E.g. Lower an interleaved vp.load (Factor = 2): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not related to this patch since this was just moved from another location. Do we need to use m_c_Mul instead of m_MuL here? Constants should have been canonicalized to the right hand side by this point.