2121#include " llvm/ADT/SmallPtrSet.h"
2222#include " llvm/ADT/SmallSet.h"
2323#include " llvm/ADT/SmallVector.h"
24+ #include " llvm/ADT/Statistic.h"
2425#include " llvm/Analysis/AliasAnalysis.h"
2526#include " llvm/Analysis/AliasSetTracker.h"
2627#include " llvm/Analysis/LoopAnalysisManager.h"
@@ -70,6 +71,8 @@ using namespace llvm::PatternMatch;
7071
7172#define DEBUG_TYPE " loop-accesses"
7273
74+ STATISTIC (HistogramsDetected, " Number of Histograms detected" );
75+
7376static cl::opt<unsigned , true >
7477VectorizationFactor (" force-vector-width" , cl::Hidden,
7578 cl::desc (" Sets the SIMD width. Zero is autoselect." ),
@@ -731,6 +734,23 @@ class AccessAnalysis {
731734 return UnderlyingObjects;
732735 }
733736
737+ // / Find Histogram counts that match high-level code in loops:
738+ // / \code
739+ // / buckets[indices[i]]+=step;
740+ // / \endcode
741+ // /
742+ // / It matches a pattern starting from \p HSt, which Stores to the 'buckets'
743+ // / array the computed histogram. It uses a BinOp to sum all counts, storing
744+ // / them using a loop-variant index Load from the 'indices' input array.
745+ // /
746+ // / On successful matches it updates the STATISTIC 'HistogramsDetected',
747+ // / regardless of hardware support. When there is support, it additionally
748+ // / stores the BinOp/Load pairs in \p HistogramCounts, as well the pointers
749+ // / used to update histogram in \p HistogramPtrs.
750+ void findHistograms (StoreInst *HSt,
751+ SmallVectorImpl<HistogramInfo> &Histograms,
752+ SmallPtrSetImpl<const Value *> &HistogramPtrs);
753+
734754private:
735755 typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1 >> PtrAccessMap;
736756
@@ -1948,7 +1968,8 @@ getDependenceDistanceStrideAndSize(
19481968 const AccessAnalysis::MemAccessInfo &B, Instruction *BInst,
19491969 const DenseMap<Value *, const SCEV *> &Strides,
19501970 const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
1951- PredicatedScalarEvolution &PSE, const Loop *InnermostLoop) {
1971+ PredicatedScalarEvolution &PSE, const Loop *InnermostLoop,
1972+ const SmallPtrSetImpl<const Value *> &HistogramPtrs) {
19521973 auto &DL = InnermostLoop->getHeader ()->getModule ()->getDataLayout ();
19531974 auto &SE = *PSE.getSE ();
19541975 auto [APtr, AIsWrite] = A;
@@ -1966,6 +1987,15 @@ getDependenceDistanceStrideAndSize(
19661987 BPtr->getType ()->getPointerAddressSpace ())
19671988 return MemoryDepChecker::Dependence::Unknown;
19681989
1990+ // Ignore Histogram count updates as they are handled by the Intrinsic. This
1991+ // happens when the same pointer is first used to read from and then is used
1992+ // to write to.
1993+ if (!AIsWrite && BIsWrite && APtr == BPtr && HistogramPtrs.contains (APtr)) {
1994+ LLVM_DEBUG (dbgs () << " LAA: Histogram: Update is safely ignored. Pointer: "
1995+ << *APtr);
1996+ return MemoryDepChecker::Dependence::NoDep;
1997+ }
1998+
19691999 int64_t StrideAPtr =
19702000 getPtrStride (PSE, ATy, APtr, InnermostLoop, Strides, true ).value_or (0 );
19712001 int64_t StrideBPtr =
@@ -2022,15 +2052,15 @@ getDependenceDistanceStrideAndSize(
20222052MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent (
20232053 const MemAccessInfo &A, unsigned AIdx, const MemAccessInfo &B,
20242054 unsigned BIdx, const DenseMap<Value *, const SCEV *> &Strides,
2025- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2026- &UnderlyingObjects ) {
2055+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2056+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
20272057 assert (AIdx < BIdx && " Must pass arguments in program order" );
20282058
20292059 // Get the dependence distance, stride, type size and what access writes for
20302060 // the dependence between A and B.
20312061 auto Res = getDependenceDistanceStrideAndSize (
20322062 A, InstMap[AIdx], B, InstMap[BIdx], Strides, UnderlyingObjects, PSE,
2033- InnermostLoop);
2063+ InnermostLoop, HistogramPtrs );
20342064 if (std::holds_alternative<Dependence::DepType>(Res))
20352065 return std::get<Dependence::DepType>(Res);
20362066
@@ -2266,8 +2296,8 @@ MemoryDepChecker::Dependence::DepType MemoryDepChecker::isDependent(
22662296bool MemoryDepChecker::areDepsSafe (
22672297 DepCandidates &AccessSets, MemAccessInfoList &CheckDeps,
22682298 const DenseMap<Value *, const SCEV *> &Strides,
2269- const DenseMap<Value *, SmallVector<const Value *, 16 >>
2270- &UnderlyingObjects ) {
2299+ const DenseMap<Value *, SmallVector<const Value *, 16 >> &UnderlyingObjects,
2300+ const SmallPtrSetImpl< const Value *> &HistogramPtrs ) {
22712301
22722302 MinDepDistBytes = -1 ;
22732303 SmallPtrSet<MemAccessInfo, 8 > Visited;
@@ -2312,7 +2342,7 @@ bool MemoryDepChecker::areDepsSafe(
23122342
23132343 Dependence::DepType Type =
23142344 isDependent (*A.first , A.second , *B.first , B.second , Strides,
2315- UnderlyingObjects);
2345+ UnderlyingObjects, HistogramPtrs );
23162346 mergeInStatus (Dependence::isSafeForVectorization (Type));
23172347
23182348 // Gather dependences unless we accumulated MaxDependences
@@ -2648,6 +2678,9 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26482678 // check.
26492679 Accesses.buildDependenceSets ();
26502680
2681+ for (StoreInst *ST : Stores)
2682+ Accesses.findHistograms (ST, Histograms, HistogramPtrs);
2683+
26512684 // Find pointers with computable bounds. We are going to use this information
26522685 // to place a runtime bound check.
26532686 Value *UncomputablePtr = nullptr ;
@@ -2672,7 +2705,7 @@ void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
26722705 LLVM_DEBUG (dbgs () << " LAA: Checking memory dependencies\n " );
26732706 CanVecMem = DepChecker->areDepsSafe (
26742707 DependentAccesses, Accesses.getDependenciesToCheck (), SymbolicStrides,
2675- Accesses.getUnderlyingObjects ());
2708+ Accesses.getUnderlyingObjects (), HistogramPtrs );
26762709
26772710 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck ()) {
26782711 LLVM_DEBUG (dbgs () << " LAA: Retrying with memory checks\n " );
@@ -3127,6 +3160,99 @@ const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
31273160 return *I.first ->second ;
31283161}
31293162
3163+ void AccessAnalysis::findHistograms (
3164+ StoreInst *HSt, SmallVectorImpl<HistogramInfo> &Histograms,
3165+ SmallPtrSetImpl<const Value *> &HistogramPtrs) {
3166+ LLVM_DEBUG (dbgs () << " LAA: Attempting to match histogram from " << *HSt
3167+ << " \n " );
3168+ // Store value must come from a Binary Operation.
3169+ Instruction *HPtrInstr = nullptr ;
3170+ BinaryOperator *HBinOp = nullptr ;
3171+ if (!match (HSt, m_Store (m_BinOp (HBinOp), m_Instruction (HPtrInstr)))) {
3172+ LLVM_DEBUG (dbgs () << " \t No BinOp\n " );
3173+ return ;
3174+ }
3175+
3176+ // BinOp must be an Add or a Sub operating modifying the bucket value by a
3177+ // loop invariant amount.
3178+ // FIXME: We assume the loop invariant term is on the RHS.
3179+ // Fine for an immediate/constant, but maybe not a generic value?
3180+ Value *HIncVal = nullptr ;
3181+ if (!match (HBinOp, m_Add (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal))) &&
3182+ !match (HBinOp, m_Sub (m_Load (m_Specific (HPtrInstr)), m_Value (HIncVal)))) {
3183+ LLVM_DEBUG (dbgs () << " \t No matching load\n " );
3184+ return ;
3185+ }
3186+ Instruction *IndexedLoad = cast<Instruction>(HBinOp->getOperand (0 ));
3187+
3188+ // The address to store is calculated through a GEP Instruction.
3189+ // FIXME: Support GEPs with more operands.
3190+ GetElementPtrInst *HPtr = dyn_cast<GetElementPtrInst>(HPtrInstr);
3191+ if (!HPtr || HPtr->getNumOperands () > 2 ) {
3192+ LLVM_DEBUG (dbgs () << " \t Too many GEP operands\n " );
3193+ return ;
3194+ }
3195+
3196+ // Check that the index is calculated by loading from another array. Ignore
3197+ // any extensions.
3198+ // FIXME: Support indices from other sources that a linear load from memory?
3199+ Value *HIdx = HPtr->getOperand (1 );
3200+ Instruction *IdxInst = nullptr ;
3201+ // FIXME: Can this fail? Maybe if IdxInst isn't an instruction. Just need to
3202+ // look through extensions, find another way?
3203+ if (!match (HIdx, m_ZExtOrSExtOrSelf (m_Instruction (IdxInst))))
3204+ return ;
3205+
3206+ // Currently restricting this to linear addressing when loading indices.
3207+ LoadInst *VLoad = dyn_cast<LoadInst>(IdxInst);
3208+ Value *VPtrVal;
3209+ if (!VLoad || !match (VLoad, m_Load (m_Value (VPtrVal)))) {
3210+ LLVM_DEBUG (dbgs () << " \t Bad Index Load\n " );
3211+ return ;
3212+ }
3213+
3214+ if (!isa<SCEVAddRecExpr>(PSE.getSCEV (VPtrVal))) {
3215+ LLVM_DEBUG (dbgs () << " \t Cannot determine index load stride\n " );
3216+ return ;
3217+ }
3218+
3219+ // FIXME: support smaller types of input arrays. Integers can be promoted
3220+ // for codegen.
3221+ Type *VLoadTy = VLoad->getType ();
3222+ if (!VLoadTy->isIntegerTy () || (VLoadTy->getScalarSizeInBits () != 32 &&
3223+ VLoadTy->getScalarSizeInBits () != 64 )) {
3224+ LLVM_DEBUG (dbgs () << " \t Unsupported bucket type: " << *VLoadTy << " \n " );
3225+ return ;
3226+ }
3227+
3228+ // Ensure we'll have the same mask by checking that all parts of the histogram
3229+ // are in the same block.
3230+ // FIXME: Could use dominance checks instead?
3231+ if (IndexedLoad->getParent () != HBinOp->getParent () ||
3232+ IndexedLoad->getParent () != HSt->getParent ()) {
3233+ LLVM_DEBUG (dbgs () << " \t Different parent blocks\n " );
3234+ return ;
3235+ }
3236+
3237+ // A histogram pointer may only alias to itself, and must only have two uses,
3238+ // the load and the store.
3239+ for (AliasSet &AS : AST)
3240+ if (AS.isMustAlias () || AS.isMayAlias ())
3241+ if ((is_contained (AS.getPointers (), HPtr) && AS.size () > 1 ) ||
3242+ HPtr->getNumUses () != 2 ) {
3243+ LLVM_DEBUG (dbgs () << " \t Aliasing problem\n " );
3244+ return ;
3245+ }
3246+
3247+ LLVM_DEBUG (dbgs () << " LAA: Found Histogram Operation: " << *HBinOp << " \n " );
3248+ HistogramsDetected++;
3249+
3250+ // Store the operations that make up the histogram.
3251+ Histograms.emplace_back (IndexedLoad, HBinOp, HSt);
3252+ // Store pointers used to write those counts in the computed histogram.
3253+ HistogramPtrs.insert (HPtr);
3254+ }
3255+
31303256bool LoopAccessInfoManager::invalidate (
31313257 Function &F, const PreservedAnalyses &PA,
31323258 FunctionAnalysisManager::Invalidator &Inv) {
0 commit comments