Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions llvm/include/llvm/Analysis/LoopAccessAnalysis.h
Original file line number Diff line number Diff line change
Expand Up @@ -491,11 +491,12 @@ struct PointerDiffInfo {
const SCEV *SinkStart;
unsigned AccessSize;
bool NeedsFreeze;
Align AccessAlign;

PointerDiffInfo(const SCEV *SrcStart, const SCEV *SinkStart,
unsigned AccessSize, bool NeedsFreeze)
unsigned AccessSize, bool NeedsFreeze, Align AccessAlign)
: SrcStart(SrcStart), SinkStart(SinkStart), AccessSize(AccessSize),
NeedsFreeze(NeedsFreeze) {}
NeedsFreeze(NeedsFreeze), AccessAlign(AccessAlign) {}
};

/// Holds information about the memory runtime legality checks to verify
Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/Analysis/TargetTransformInfoImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -905,6 +905,9 @@ class TargetTransformInfoImplBase {
switch (ICA.getID()) {
default:
break;
case Intrinsic::loop_dependence_raw_mask:
case Intrinsic::loop_dependence_war_mask:
return 10;
case Intrinsic::allow_runtime_check:
case Intrinsic::allow_ubsan_check:
case Intrinsic::annotation:
Expand Down
3 changes: 2 additions & 1 deletion llvm/include/llvm/Transforms/Utils/LoopUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,8 @@ addRuntimeChecks(Instruction *Loc, Loop *TheLoop,

LLVM_ABI Value *addDiffRuntimeChecks(
Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC);
ElementCount VF, unsigned IC,
function_ref<bool(unsigned)> UsesLoopDependenceMaskForAccessSize);

/// Struct to hold information about a partially invariant condition.
struct IVConditionInfo {
Expand Down
7 changes: 6 additions & 1 deletion llvm/lib/Analysis/LoopAccessAnalysis.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -508,11 +508,16 @@ bool RuntimePointerChecking::tryToCreateDiffCheck(
}
}

// Find the minimum common alignment of all accesses.
Align AccessAlign = getLoadStoreAlignment(SrcInsts[0]);
for (Instruction *Inst : concat<Instruction *>(SrcInsts, SinkInsts))
AccessAlign = std::min(AccessAlign, getLoadStoreAlignment(Inst));

LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
<< "SrcStart: " << *SrcStartInt << '\n'
<< "SinkStartInt: " << *SinkStartInt << '\n');
DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
Src->NeedsFreeze || Sink->NeedsFreeze);
Src->NeedsFreeze || Sink->NeedsFreeze, AccessAlign);
return true;
}

Expand Down
62 changes: 46 additions & 16 deletions llvm/lib/Transforms/Utils/LoopUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2193,40 +2193,70 @@ struct SCEVPtrToAddrRewriter : SCEVRewriteVisitor<SCEVPtrToAddrRewriter> {

Value *llvm::addDiffRuntimeChecks(
Instruction *Loc, ArrayRef<PointerDiffInfo> Checks, SCEVExpander &Expander,
function_ref<Value *(IRBuilderBase &, unsigned)> GetVF, unsigned IC) {

ElementCount VF, unsigned IC,
function_ref<bool(unsigned)> UsesLoopDependenceMaskForAccessSize) {
LLVMContext &Ctx = Loc->getContext();
IRBuilder ChkBuilder(Ctx, InstSimplifyFolder(Loc->getDataLayout()));
ChkBuilder.SetInsertPoint(Loc);
Value *RuntimeVF = nullptr;
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;

auto &SE = *Expander.getSE();
const DataLayout &DL = Loc->getDataLayout();
SCEVPtrToAddrRewriter Rewriter(SE, DL);
// Map to keep track of created compares, The key is the pair of operands for
// the compare, to allow detecting and re-using redundant compares.
DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze] : Checks) {
for (const auto &[SrcStart, SinkStart, AccessSize, NeedsFreeze, AccessAlign] :
Checks) {
Value *IsConflict;
Type *Ty = SinkStart->getType();
// Compute VF * IC * AccessSize.
auto *VFTimesICTimesSize =
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
ConstantInt::get(Ty, IC * AccessSize));
Type *CheckTy = ChkBuilder.getIntNTy(Ty->getScalarSizeInBits());
const SCEV *SinkStartRewritten = Rewriter.visit(SinkStart);
const SCEV *SrcStartRewritten = Rewriter.visit(SrcStart);
Value *Diff = Expander.expandCodeFor(
SE.getMinusSCEV(SinkStartRewritten, SrcStartRewritten), Ty, Loc);

// Check if the same compare has already been created earlier. In that case,
// there is no need to check it again.
Value *IsConflict = SeenCompares.lookup({Diff, VFTimesICTimesSize});
if (IsConflict)
continue;
VectorType *MaskTy = VectorType::get(ChkBuilder.getInt1Ty(), VF * IC);
if (!UsesLoopDependenceMaskForAccessSize(AccessSize) ||
commonAlignment(AccessAlign, AccessSize) < AccessSize) {
// Compute VF * IC * AccessSize.
if (!RuntimeVF)
RuntimeVF = ChkBuilder.CreateElementCount(CheckTy, VF);

auto *VFTimesICTimesSize = ChkBuilder.CreateMul(
RuntimeVF, ConstantInt::get(Ty, IC * AccessSize));
// Check if the same compare has already been created earlier. In that
// case, there is no need to check it again.
if (SeenCompares.contains({Diff, VFTimesICTimesSize}))
continue;

IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesICTimesSize, "diff.check");
SeenCompares.insert({{Diff, VFTimesICTimesSize}, IsConflict});
} else {
Value *LoopAccessSize = ChkBuilder.getInt64(AccessSize);
if (SeenCompares.contains({Diff, LoopAccessSize}))
continue;

// Note: This creates loop.dependence.war.mask(ptr null, ptr %diff). This
// allows SCEV to remove common offsets and avoids creating duplicate
// checks. If %diff is a sub, it can be folded into the mask.
Value *SrcPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
Value *SinkPtr = ChkBuilder.CreateIntToPtr(Diff, ChkBuilder.getPtrTy());
Value *Mask = ChkBuilder.CreateIntrinsic(
MaskTy, Intrinsic::loop_dependence_war_mask,
{SrcPtr, SinkPtr, LoopAccessSize}, {}, "loop.dep.mask");

Value *LastLaneIdx = ChkBuilder.CreateSub(
ChkBuilder.CreateElementCount(CheckTy, MaskTy->getElementCount()),
ChkBuilder.getIntN(Ty->getScalarSizeInBits(), 1));
Value *NoConflict =
ChkBuilder.CreateExtractElement(Mask, LastLaneIdx, "no.conflict");

IsConflict = ChkBuilder.CreateNot(NoConflict, "is.conflict");
SeenCompares.insert({{Diff, LoopAccessSize}, IsConflict});
}
if (NeedsFreeze)
IsConflict =
ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
Expand Down
31 changes: 22 additions & 9 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1887,15 +1887,13 @@ class GeneratedRTChecks {

auto DiffChecks = RtPtrChecking.getDiffChecks();
if (DiffChecks) {
Value *RuntimeVF = nullptr;
MemRuntimeCheckCond = addDiffRuntimeChecks(
MemCheckBlock->getTerminator(), *DiffChecks, MemCheckExp,
[VF, &RuntimeVF](IRBuilderBase &B, unsigned Bits) {
if (!RuntimeVF)
RuntimeVF = getRuntimeVF(B, B.getIntNTy(Bits), VF);
return RuntimeVF;
},
IC);
LLVMContext &Ctx = MemCheckBlock->getContext();
auto UseLoopDependenceMask = [&](unsigned AccessSize) {
return isLoopDependenceMaskCheap(Ctx, VF, IC, AccessSize);
};
MemRuntimeCheckCond =
addDiffRuntimeChecks(MemCheckBlock->getTerminator(), *DiffChecks,
MemCheckExp, VF, IC, UseLoopDependenceMask);
} else {
MemRuntimeCheckCond = addRuntimeChecks(
MemCheckBlock->getTerminator(), L, RtPtrChecking.getChecks(),
Expand Down Expand Up @@ -1947,6 +1945,21 @@ class GeneratedRTChecks {
OuterLoop = L->getParentLoop();
}

bool isLoopDependenceMaskCheap(LLVMContext &Ctx, ElementCount VF, unsigned IC,
unsigned AccessSize) {
if (ForceTargetInstructionCost.getNumOccurrences() > 0)
return ForceTargetInstructionCost <= 1;
VectorType *MaskTy = VectorType::get(Type::getInt1Ty(Ctx), VF * IC);
Value *AccessSizeVal = ConstantInt::get(Type::getInt64Ty(Ctx), AccessSize);
Value *NullPtr = ConstantPointerNull::get(PointerType::getUnqual(Ctx));
// The pointer values should not change the cost. The access size (constant)
// is needed to by targets to cost the mask.
IntrinsicCostAttributes ICA(Intrinsic::loop_dependence_war_mask, MaskTy,
{NullPtr, NullPtr, AccessSizeVal});
InstructionCost Cost = TTI->getIntrinsicInstrCost(ICA, CostKind);
return Cost.isValid() && Cost <= 1;
}

InstructionCost getCost() {
if (SCEVCheckBlock || MemCheckBlock)
LLVM_DEBUG(dbgs() << "Calculating cost of runtime checks:\n");
Expand Down
47 changes: 47 additions & 0 deletions llvm/test/Analysis/CostModel/loop-dep-mask-no_info.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size %s -S -o - | FileCheck %s --check-prefix=CHECK-SIZE
; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput %s -S -o - | FileCheck %s --check-prefix=CHECK-THROUGHPUT

define void @loop_dependence_war_mask(ptr %a, ptr %b) {
; CHECK-SIZE-LABEL: 'loop_dependence_war_mask'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-THROUGHPUT-LABEL: 'loop_dependence_war_mask'
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%res1 = call <16 x i1> @llvm.loop.dependence.war.mask.v16i1(ptr %a, ptr %b, i64 1)
%res2 = call <8 x i1> @llvm.loop.dependence.war.mask.v8i1(ptr %a, ptr %b, i64 2)
%res3 = call <4 x i1> @llvm.loop.dependence.war.mask.v4i1(ptr %a, ptr %b, i64 4)
%res4 = call <2 x i1> @llvm.loop.dependence.war.mask.v2i1(ptr %a, ptr %b, i64 8)
ret void
}

define void @loop_dependence_raw_mask(ptr %a, ptr %b) {
; CHECK-SIZE-LABEL: 'loop_dependence_raw_mask'
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
; CHECK-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
; CHECK-THROUGHPUT-LABEL: 'loop_dependence_raw_mask'
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
; CHECK-THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
;
%res1 = call <16 x i1> @llvm.loop.dependence.raw.mask.v16i1(ptr %a, ptr %b, i64 1)
%res2 = call <8 x i1> @llvm.loop.dependence.raw.mask.v8i1(ptr %a, ptr %b, i64 2)
%res3 = call <4 x i1> @llvm.loop.dependence.raw.mask.v4i1(ptr %a, ptr %b, i64 4)
%res4 = call <2 x i1> @llvm.loop.dependence.raw.mask.v2i1(ptr %a, ptr %b, i64 8)
ret void
}
Loading
Loading