Skip to content
This repository was archived by the owner on Feb 5, 2019. It is now read-only.

Commit 51d3739

Browse files
committed
[PGOMemOPSize] Preserve the DominatorTree
Summary: PGOMemOPSize only modifies CFG in a couple of places; thus we can preserve the DominatorTree with little effort. When optimizing SQLite with -O3, this patch can decrease 3.8% of the numbers of nodes traversed by DFS and 5.7% of the times DominatorTreeBase::recalculation is called. Reviewers: kuhar, davide, dmgreen Reviewed By: dmgreen Subscribers: mzolotukhin, vsk, llvm-commits Differential Revision: https://reviews.llvm.org/D48914 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@336522 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 7388807 commit 51d3739

File tree

6 files changed

+36
-17
lines changed

6 files changed

+36
-17
lines changed

lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "llvm/IR/BasicBlock.h"
2626
#include "llvm/IR/CallSite.h"
2727
#include "llvm/IR/DerivedTypes.h"
28+
#include "llvm/IR/DomTreeUpdater.h"
29+
#include "llvm/IR/Dominators.h"
2830
#include "llvm/IR/Function.h"
2931
#include "llvm/IR/IRBuilder.h"
3032
#include "llvm/IR/InstVisitor.h"
@@ -112,6 +114,7 @@ class PGOMemOPSizeOptLegacyPass : public FunctionPass {
112114
AU.addRequired<BlockFrequencyInfoWrapperPass>();
113115
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
114116
AU.addPreserved<GlobalsAAWrapperPass>();
117+
AU.addPreserved<DominatorTreeWrapperPass>();
115118
}
116119
};
117120
} // end anonymous namespace
@@ -133,8 +136,8 @@ namespace {
133136
class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
134137
public:
135138
MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
136-
OptimizationRemarkEmitter &ORE)
137-
: Func(Func), BFI(BFI), ORE(ORE), Changed(false) {
139+
OptimizationRemarkEmitter &ORE, DominatorTree *DT)
140+
: Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
138141
ValueDataArray =
139142
llvm::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
140143
// Get the MemOPSize range information from option MemOPSizeRange,
@@ -170,6 +173,7 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
170173
Function &Func;
171174
BlockFrequencyInfo &BFI;
172175
OptimizationRemarkEmitter &ORE;
176+
DominatorTree *DT;
173177
bool Changed;
174178
std::vector<MemIntrinsic *> WorkList;
175179
// Start of the previse range.
@@ -336,15 +340,16 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
336340
LLVM_DEBUG(dbgs() << *BB << "\n");
337341
auto OrigBBFreq = BFI.getBlockFreq(BB);
338342

339-
BasicBlock *DefaultBB = SplitBlock(BB, MI);
343+
BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
340344
BasicBlock::iterator It(*MI);
341345
++It;
342346
assert(It != DefaultBB->end());
343-
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It));
347+
BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
344348
MergeBB->setName("MemOP.Merge");
345349
BFI.setBlockFreq(MergeBB, OrigBBFreq.getFrequency());
346350
DefaultBB->setName("MemOP.Default");
347351

352+
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
348353
auto &Ctx = Func.getContext();
349354
IRBuilder<> IRB(BB);
350355
BB->getTerminator()->eraseFromParent();
@@ -361,6 +366,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
361366

362367
LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
363368

369+
std::vector<DominatorTree::UpdateType> Updates;
370+
if (DT)
371+
Updates.reserve(2 * SizeIds.size());
372+
364373
for (uint64_t SizeId : SizeIds) {
365374
BasicBlock *CaseBB = BasicBlock::Create(
366375
Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
@@ -375,8 +384,15 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
375384
IRBuilder<> IRBCase(CaseBB);
376385
IRBCase.CreateBr(MergeBB);
377386
SI->addCase(CaseSizeId, CaseBB);
387+
if (DT) {
388+
Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
389+
Updates.push_back({DominatorTree::Insert, BB, CaseBB});
390+
}
378391
LLVM_DEBUG(dbgs() << *CaseBB << "\n");
379392
}
393+
DTU.applyUpdates(Updates);
394+
Updates.clear();
395+
380396
setProfMetadata(Func.getParent(), SI, CaseCounts, MaxCount);
381397

382398
LLVM_DEBUG(dbgs() << *BB << "\n");
@@ -397,13 +413,14 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
397413
} // namespace
398414

399415
static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
400-
OptimizationRemarkEmitter &ORE) {
416+
OptimizationRemarkEmitter &ORE,
417+
DominatorTree *DT) {
401418
if (DisableMemOPOPT)
402419
return false;
403420

404421
if (F.hasFnAttribute(Attribute::OptimizeForSize))
405422
return false;
406-
MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE);
423+
MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
407424
MemOPSizeOpt.perform();
408425
return MemOPSizeOpt.isChanged();
409426
}
@@ -412,7 +429,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
412429
BlockFrequencyInfo &BFI =
413430
getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
414431
auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
415-
return PGOMemOPSizeOptImpl(F, BFI, ORE);
432+
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
433+
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
434+
return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
416435
}
417436

418437
namespace llvm {
@@ -422,11 +441,13 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
422441
FunctionAnalysisManager &FAM) {
423442
auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
424443
auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
425-
bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE);
444+
auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
445+
bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
426446
if (!Changed)
427447
return PreservedAnalyses::all();
428448
auto PA = PreservedAnalyses();
429449
PA.preserve<GlobalsAA>();
450+
PA.preserve<DominatorTreeAnalysis>();
430451
return PA;
431452
}
432453
} // namespace llvm

test/Other/opt-O2-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,6 @@
8080
; CHECK-NEXT: Lazy Block Frequency Analysis
8181
; CHECK-NEXT: Optimization Remark Emitter
8282
; CHECK-NEXT: PGOMemOPSize
83-
; CHECK-NEXT: Dominator Tree Construction
8483
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
8584
; CHECK-NEXT: Function Alias Analysis Results
8685
; CHECK-NEXT: Natural Loop Information

test/Other/opt-O3-pipeline.ll

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@
8484
; CHECK-NEXT: Lazy Block Frequency Analysis
8585
; CHECK-NEXT: Optimization Remark Emitter
8686
; CHECK-NEXT: PGOMemOPSize
87-
; CHECK-NEXT: Dominator Tree Construction
8887
; CHECK-NEXT: Basic Alias Analysis (stateless AA impl)
8988
; CHECK-NEXT: Function Alias Analysis Results
9089
; CHECK-NEXT: Natural Loop Information

test/Transforms/PGOProfile/memop_clone.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: opt < %s -pgo-memop-opt -S | FileCheck %s
1+
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -S | FileCheck %s
22

33
define i32 @test(i8* %a, i8* %b) !prof !1 {
44
; CHECK_LABEL: test

test/Transforms/PGOProfile/memop_size_opt.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
1-
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
2-
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
3-
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
1+
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
2+
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
3+
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
44
; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
5-
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
5+
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
66
; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
77

88

test/Transforms/PGOProfile/memop_size_opt_zero.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; Test to ensure the pgo memop optimization pass doesn't try to scale
22
; up a value profile with a 0 count, which would lead to divide by 0.
3-
; RUN: opt < %s -passes=pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
4-
; RUN: opt < %s -pgo-memop-opt -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
3+
; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
4+
; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=1 -S | FileCheck %s --check-prefix=MEMOP_OPT
55

66
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
77
target triple = "x86_64-unknown-linux-gnu"

0 commit comments

Comments
 (0)