diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 6110bda02406d..9fe7d590eb6f5 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -223,6 +223,8 @@ extern cl::opt EnableMatrix; extern cl::opt DisablePreInliner; extern cl::opt PreInlineThreshold; + +extern cl::opt SYCLOptimizationMode; } // namespace llvm void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, @@ -271,78 +273,88 @@ PassBuilder::buildO1FunctionSimplificationPipeline(OptimizationLevel Level, // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. - FPM.addPass(ReassociatePass()); - - // Add the primary loop simplification pipeline. - // FIXME: Currently this is split into two loop pass pipelines because we run - // some function passes in between them. These can and should be removed - // and/or replaced by scheduling the loop pass equivalents in the correct - // positions. But those equivalent passes aren't powerful enough yet. - // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still - // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to - // fully replace `SimplifyCFGPass`, and the closest to the other we have is - // `LoopInstSimplify`. - LoopPassManager LPM1, LPM2; - - // Simplify the loop body. We do this initially to clean up after other loop - // passes run, either when iterating on a loop or on inner loops with - // implications on the outer loop. - LPM1.addPass(LoopInstSimplifyPass()); - LPM1.addPass(LoopSimplifyCFGPass()); - - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - - LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, - isLTOPreLink(Phase))); - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM1.addPass(SimpleLoopUnswitchPass()); - if (EnableLoopFlatten) - LPM1.addPass(LoopFlattenPass()); - - LPM2.addPass(LoopIdiomRecognizePass()); - LPM2.addPass(IndVarSimplifyPass()); - - for (auto &C : LateLoopOptimizationsEPCallbacks) - C(LPM2, Level); - - LPM2.addPass(LoopDeletionPass()); - - if (EnableLoopInterchange) - LPM2.addPass(LoopInterchangePass()); - - // Do not enable unrolling in PreLinkThinLTO phase during sample PGO - // because it changes IR to makes profile annotation in back compile - // inaccurate. The normal unroller doesn't pay attention to forced full unroll - // attributes so we need to make sure and allow the full unroll pass to pay - // attention to it. - if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) - LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /* OnlyWhenForced= */ !PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll)); - - for (auto &C : LoopOptimizerEndEPCallbacks) - C(LPM2, Level); - - // We provide the opt remark emitter pass for LICM to use. We only need to do - // this once as it is immutable. - FPM.addPass( - RequireAnalysisPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), - /*UseMemorySSA=*/true, - /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); - FPM.addPass(InstCombinePass()); - // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. - // *All* loop passes must preserve it, in order to be able to use it. - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), - /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); + if (!SYCLOptimizationMode) { + // FIXME: re-association increases variables liveness and therefore register + // pressure. + FPM.addPass(ReassociatePass()); + + // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop + // optimizations rely on TTI, which is not accurate for SPIR target. + + // Add the primary loop simplification pipeline. + // FIXME: Currently this is split into two loop pass pipelines because we + // run some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet + // to fully replace `SimplifyCFGPass`, and the closest to the other we have + // is `LoopInstSimplify`. + LoopPassManager LPM1, LPM2; + + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + + // Try to remove as much code from the loop header as possible, + // to reduce amount of IR that will have to be duplicated. + // TODO: Investigate promotion cap for O1. + LPM1.addPass( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + + LPM1.addPass(LoopRotatePass(/* Disable header duplication */ true, + isLTOPreLink(Phase))); + // TODO: Investigate promotion cap for O1. + LPM1.addPass( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(SimpleLoopUnswitchPass()); + if (EnableLoopFlatten) + LPM1.addPass(LoopFlattenPass()); + + LPM2.addPass(LoopIdiomRecognizePass()); + LPM2.addPass(IndVarSimplifyPass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + LPM2.addPass(LoopDeletionPass()); + + if (EnableLoopInterchange) + LPM2.addPass(LoopInterchangePass()); + + // Do not enable unrolling in PreLinkThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. The normal unroller doesn't pay attention to forced full + // unroll attributes so we need to make sure and allow the full unroll pass + // to pay attention to it. + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); + + // We provide the opt remark emitter pass for LICM to use. We only need to + // do this once as it is immutable. + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM1), + /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + // The loop passes in LPM2 (LoopFullUnrollPass) do not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM2), + /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false)); + } // Delete small array after loop unroll. FPM.addPass(SROAPass()); @@ -443,81 +455,92 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, // Form canonically associated expression trees, and simplify the trees using // basic mathematical properties. For example, this will form (nearly) // minimal multiplication trees. - FPM.addPass(ReassociatePass()); - - // Add the primary loop simplification pipeline. - // FIXME: Currently this is split into two loop pass pipelines because we run - // some function passes in between them. These can and should be removed - // and/or replaced by scheduling the loop pass equivalents in the correct - // positions. But those equivalent passes aren't powerful enough yet. - // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still - // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to - // fully replace `SimplifyCFGPass`, and the closest to the other we have is - // `LoopInstSimplify`. - LoopPassManager LPM1, LPM2; - - // Simplify the loop body. We do this initially to clean up after other loop - // passes run, either when iterating on a loop or on inner loops with - // implications on the outer loop. - LPM1.addPass(LoopInstSimplifyPass()); - LPM1.addPass(LoopSimplifyCFGPass()); - - // Try to remove as much code from the loop header as possible, - // to reduce amount of IR that will have to be duplicated. - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + if (!SYCLOptimizationMode) { + // FIXME: re-association increases variables liveness and therefore register + // pressure. + FPM.addPass(ReassociatePass()); + + // Do not run loop pass pipeline in "SYCL Optimization Mode". Loop + // optimizations rely on TTI, which is not accurate for SPIR target. + + // Add the primary loop simplification pipeline. + // FIXME: Currently this is split into two loop pass pipelines because we + // run some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet + // to fully replace `SimplifyCFGPass`, and the closest to the other we have + // is `LoopInstSimplify`. + LoopPassManager LPM1, LPM2; + + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + + // Try to remove as much code from the loop header as possible, + // to reduce amount of IR that will have to be duplicated. + // TODO: Investigate promotion cap for O1. + LPM1.addPass( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + + // Disable header duplication in loop rotation at -Oz. + LPM1.addPass( + LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); + // TODO: Investigate promotion cap for O1. + LPM1.addPass( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(SimpleLoopUnswitchPass(/* NonTrivial */ Level == + OptimizationLevel::O3 && + EnableO3NonTrivialUnswitching)); + if (EnableLoopFlatten) + LPM1.addPass(LoopFlattenPass()); + + LPM2.addPass(LoopIdiomRecognizePass()); + LPM2.addPass(IndVarSimplifyPass()); - // Disable header duplication in loop rotation at -Oz. - LPM1.addPass( - LoopRotatePass(Level != OptimizationLevel::Oz, isLTOPreLink(Phase))); - // TODO: Investigate promotion cap for O1. - LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); - LPM1.addPass( - SimpleLoopUnswitchPass(/* NonTrivial */ Level == OptimizationLevel::O3 && - EnableO3NonTrivialUnswitching)); - if (EnableLoopFlatten) - LPM1.addPass(LoopFlattenPass()); - - LPM2.addPass(LoopIdiomRecognizePass()); - LPM2.addPass(IndVarSimplifyPass()); - - for (auto &C : LateLoopOptimizationsEPCallbacks) - C(LPM2, Level); - - LPM2.addPass(LoopDeletionPass()); - - if (EnableLoopInterchange) - LPM2.addPass(LoopInterchangePass()); - - // Do not enable unrolling in PreLinkThinLTO phase during sample PGO - // because it changes IR to makes profile annotation in back compile - // inaccurate. The normal unroller doesn't pay attention to forced full unroll - // attributes so we need to make sure and allow the full unroll pass to pay - // attention to it. - if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || - PGOOpt->Action != PGOOptions::SampleUse) - LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), - /* OnlyWhenForced= */ !PTO.LoopUnrolling, - PTO.ForgetAllSCEVInLoopUnroll)); - - for (auto &C : LoopOptimizerEndEPCallbacks) - C(LPM2, Level); - - // We provide the opt remark emitter pass for LICM to use. We only need to do - // this once as it is immutable. - FPM.addPass( - RequireAnalysisPass()); - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), - /*UseMemorySSA=*/true, - /*UseBlockFrequencyInfo=*/true)); - FPM.addPass(SimplifyCFGPass()); - FPM.addPass(InstCombinePass()); - // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, - // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. - // *All* loop passes must preserve it, in order to be able to use it. - FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), - /*UseMemorySSA=*/false, - /*UseBlockFrequencyInfo=*/false)); + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + + LPM2.addPass(LoopDeletionPass()); + + if (EnableLoopInterchange) + LPM2.addPass(LoopInterchangePass()); + + // Do not enable unrolling in PreLinkThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. The normal unroller doesn't pay attention to forced full + // unroll attributes so we need to make sure and allow the full unroll pass + // to pay attention to it. + if (Phase != ThinOrFullLTOPhase::ThinLTOPreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) + LPM2.addPass(LoopFullUnrollPass(Level.getSpeedupLevel(), + /* OnlyWhenForced= */ !PTO.LoopUnrolling, + PTO.ForgetAllSCEVInLoopUnroll)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); + + // We provide the opt remark emitter pass for LICM to use. We only need to + // do this once as it is immutable. + FPM.addPass( + RequireAnalysisPass()); + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM1), + /*UseMemorySSA=*/true, + /*UseBlockFrequencyInfo=*/true)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + // The loop passes in LPM2 (LoopIdiomRecognizePass, IndVarSimplifyPass, + // LoopDeletionPass and LoopFullUnrollPass) do not preserve MemorySSA. + // *All* loop passes must preserve it, in order to be able to use it. + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM2), + /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false)); + } // Delete small array after loop unroll. FPM.addPass(SROAPass()); @@ -1162,29 +1185,32 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level, for (auto &C : VectorizerStartEPCallbacks) C(OptimizePM, Level); - LoopPassManager LPM; - // First rotate loops that may have been un-rotated by prior passes. - // Disable header duplication at -Oz. - LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); - // Some loops may have become dead by now. Try to delete them. - // FIXME: see discussion in https://reviews.llvm.org/D112851, - // this may need to be revisited once we run GVN before loop deletion - // in the simplification pipeline. - LPM.addPass(LoopDeletionPass()); - OptimizePM.addPass(createFunctionToLoopPassAdaptor( - std::move(LPM), /*UseMemorySSA=*/false, /*UseBlockFrequencyInfo=*/false)); - - // Distribute loops to allow partial vectorization. I.e. isolate dependences - // into separate loop that would otherwise inhibit vectorization. This is - // currently only performed for loops marked with the metadata - // llvm.loop.distribute=true or when -enable-loop-distribute is specified. - OptimizePM.addPass(LoopDistributePass()); - - // Populates the VFABI attribute with the scalar-to-vector mappings - // from the TargetLibraryInfo. - OptimizePM.addPass(InjectTLIMappings()); - - addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); + if (!SYCLOptimizationMode) { + LoopPassManager LPM; + // First rotate loops that may have been un-rotated by prior passes. + // Disable header duplication at -Oz. + LPM.addPass(LoopRotatePass(Level != OptimizationLevel::Oz, LTOPreLink)); + // Some loops may have become dead by now. Try to delete them. + // FIXME: see discussion in https://reviews.llvm.org/D112851, + // this may need to be revisited once we run GVN before loop deletion + // in the simplification pipeline. + LPM.addPass(LoopDeletionPass()); + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), /*UseMemorySSA=*/false, + /*UseBlockFrequencyInfo=*/false)); + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + OptimizePM.addPass(LoopDistributePass()); + + // Populates the VFABI attribute with the scalar-to-vector mappings + // from the TargetLibraryInfo. + OptimizePM.addPass(InjectTLIMappings()); + + addVectorPasses(Level, OptimizePM, /* IsFullLTO */ false); + } // LoopSink pass sinks instructions hoisted by LICM, which serves as a // canonicalization pass that enables other optimizations. As a result, diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 1819c3c720092..e4431520151ef 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -72,9 +72,8 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt - SYCLOptimizationMode("sycl-opt", cl::init(false), cl::Hidden, - cl::desc("Enable SYCL optimization mode.")); +cl::opt SYCLOptimizationMode("sycl-opt", cl::init(false), cl::Hidden, + cl::desc("Enable SYCL optimization mode.")); cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); diff --git a/sycl/test/esimd/lower-external-funcs.cpp b/sycl/test/esimd/lower-external-funcs.cpp index c3c5c82d45f76..8ba5a9362af18 100644 --- a/sycl/test/esimd/lower-external-funcs.cpp +++ b/sycl/test/esimd/lower-external-funcs.cpp @@ -1,4 +1,8 @@ -// RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -x c++ %s -o %t +// RUN: %clangxx -fsycl -fsycl-device-only -flegacy-pass-manager -S -emit-llvm -x c++ %s -o %t-lgcy +// RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t-lgcy -o %t-lgcy.table +// RUN: FileCheck %s -input-file=%t-lgcy_esimd_0.ll + +// RUN: %clangxx -fsycl -fsycl-device-only -fno-legacy-pass-manager -S -emit-llvm -x c++ %s -o %t // RUN: sycl-post-link -split-esimd -lower-esimd -O2 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll diff --git a/sycl/test/esimd/spirv_intrins_trans.cpp b/sycl/test/esimd/spirv_intrins_trans.cpp index 8aa6dc17d8d0a..fa5e1b0a11ab9 100644 --- a/sycl/test/esimd/spirv_intrins_trans.cpp +++ b/sycl/test/esimd/spirv_intrins_trans.cpp @@ -1,4 +1,8 @@ -// RUN: %clangxx -fsycl -fsycl-device-only -S -emit-llvm -x c++ %s -o %t +// RUN: %clangxx -fsycl -fsycl-device-only -flegacy-pass-manager -S -emit-llvm -x c++ %s -o %t-lgcy +// RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t-lgcy -o %t-lgcy.table +// RUN: FileCheck %s -input-file=%t-lgcy_esimd_0.ll + +// RUN: %clangxx -fsycl -fsycl-device-only -fno-legacy-pass-manager -S -emit-llvm -x c++ %s -o %t // RUN: sycl-post-link -split-esimd -lower-esimd -O0 -S %t -o %t.table // RUN: FileCheck %s -input-file=%t_esimd_0.ll diff --git a/sycl/test/esimd/vadd.cpp b/sycl/test/esimd/vadd.cpp index 90dcaf9ab3421..be5025ab2dd2a 100644 --- a/sycl/test/esimd/vadd.cpp +++ b/sycl/test/esimd/vadd.cpp @@ -1,11 +1,20 @@ -// RUN: %clangxx -fsycl %s -o %t.out +// RUN: %clangxx -fsycl -flegacy-pass-manager %s -o %t-lgcy.out +// RUN: %RUN_ON_HOST %t-lgcy.out + +// RUN: %clangxx -fsycl -fno-legacy-pass-manager %s -o %t.out // RUN: %RUN_ON_HOST %t.out -// Check that the code compiles with -O0 and -g -// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -O0 -// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -O0 -g +// Check that the code compiles with -O0 and -g on both legacy and new Pass +// Managers +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -fno-legacy-pass-manager -O0 +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -flegacy-pass-manager -O0 +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -fno-legacy-pass-manager -O0 -g +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -flegacy-pass-manager -O0 -g + // Check that the code compiles with device code instrumentation enabled -// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl \ +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -fno-legacy-pass-manager \ +// RUN: -fsycl-instrument-device-code +// RUN: %clangxx -I %sycl_include %s -o %t.out -fsycl -flegacy-pass-manager \ // RUN: -fsycl-instrument-device-code #include