From c9fa4aaa1566a50405966693e5547012cc12db88 Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Tue, 11 Jan 2022 17:33:06 +0300 Subject: [PATCH 1/6] [SYCL] Turn off SimplifyCFG pass in SYCL mode. --- clang/test/CodeGenSYCL/simplifycfg.cpp | 35 +++++++++++++++++++ .../lib/Transforms/IPO/PassManagerBuilder.cpp | 7 ++-- 2 files changed, 40 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGenSYCL/simplifycfg.cpp diff --git a/clang/test/CodeGenSYCL/simplifycfg.cpp b/clang/test/CodeGenSYCL/simplifycfg.cpp new file mode 100644 index 0000000000000..18409fefb769c --- /dev/null +++ b/clang/test/CodeGenSYCL/simplifycfg.cpp @@ -0,0 +1,35 @@ +// RUN: %clangxx -fsycl -fsycl-device-only %s -O3 -S -o - | FileCheck %s +// +// This test checks that shift_group_left (which is _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j in SPIRV) +// is called twice after O3 optimizations. +// +// Usually clang with SimplifyCFG pass optimizes constructs like: +// if (i % 2 == 0) +// func(); +// else +// func(); +// +// into one simple func() invocation. +// This behaviour might be wrong in cases when func's behaviour depends on +// a place where it is written. +// There is a relevant discussion about introducing +// a reliable tool for such cases: https://reviews.llvm.org/D85603 + +// CHECK: {{.*}} call spir_func i32 @_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j +// CHECK: {{.*}} call spir_func i32 @_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j + + +#include + +int main() { + sycl::queue q; + int* output = sycl::malloc_shared(1, q); + q.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> it){ + int i = it.get_global_id(0); + if (i % 2 == 0) { + output[0] = sycl::shift_group_left(it.get_sub_group(), 1, 1); + } else { + output[0] = sycl::shift_group_left(it.get_sub_group(), 1, 1); + } + }).wait(); +} diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index 1819c3c720092..dbfa243745f73 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -542,8 +542,11 @@ void PassManagerBuilder::addFunctionSimplificationPasses( MPM.add(createLoopRerollPass()); // Merge & remove BBs and sink & hoist common instructions. - MPM.add(createCFGSimplificationPass( - SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + if (SYCLOptimizationMode) + MPM.add(createCFGSimplificationPass()); + else + MPM.add(createCFGSimplificationPass( + SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); From 80b1377e15bb71b3d4d7a0f5051efb04d06d3c62 Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Wed, 26 Jan 2022 13:51:03 +0300 Subject: [PATCH 2/6] simplify test. reduce the number of turned off passes. add turn off for new PM --- clang/test/CodeGenSYCL/simplifycfg.cpp | 31 ++++++++----------- llvm/lib/Passes/PassBuilderPipelines.cpp | 11 +++++-- .../lib/Transforms/IPO/PassManagerBuilder.cpp | 19 ++++++------ 3 files changed, 32 insertions(+), 29 deletions(-) diff --git a/clang/test/CodeGenSYCL/simplifycfg.cpp b/clang/test/CodeGenSYCL/simplifycfg.cpp index 18409fefb769c..a0623212b9600 100644 --- a/clang/test/CodeGenSYCL/simplifycfg.cpp +++ b/clang/test/CodeGenSYCL/simplifycfg.cpp @@ -1,7 +1,7 @@ -// RUN: %clangxx -fsycl -fsycl-device-only %s -O3 -S -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -flegacy-pass-manager %s -O3 -S -o - | FileCheck %s +// RUN: %clangxx -fsycl -fsycl-device-only -fno-legacy-pass-manager %s -O3 -S -o - | FileCheck %s // -// This test checks that shift_group_left (which is _Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j in SPIRV) -// is called twice after O3 optimizations. +// This test checks that foo (which is @_Z3foov) is called twice after O3 optimizations. // // Usually clang with SimplifyCFG pass optimizes constructs like: // if (i % 2 == 0) @@ -15,21 +15,16 @@ // There is a relevant discussion about introducing // a reliable tool for such cases: https://reviews.llvm.org/D85603 -// CHECK: {{.*}} call spir_func i32 @_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j -// CHECK: {{.*}} call spir_func i32 @_Z32__spirv_SubgroupShuffleDownINTELIiET_S0_S0_j +// CHECK: tail call spir_func void @_Z3foov() +// CHECK: tail call spir_func void @_Z3foov() +SYCL_EXTERNAL void foo(); -#include - -int main() { - sycl::queue q; - int* output = sycl::malloc_shared(1, q); - q.parallel_for(sycl::nd_range<1>{1, 1}, [=](sycl::nd_item<1> it){ - int i = it.get_global_id(0); - if (i % 2 == 0) { - output[0] = sycl::shift_group_left(it.get_sub_group(), 1, 1); - } else { - output[0] = sycl::shift_group_left(it.get_sub_group(), 1, 1); - } - }).wait(); +SYCL_EXTERNAL void bar(int i) { + if (i % 2 == 0) { + foo(); + } else { + foo(); + } } + diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 6110bda02406d..77095e1fbd548 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -223,6 +223,8 @@ extern cl::opt EnableMatrix; extern cl::opt DisablePreInliner; extern cl::opt PreInlineThreshold; + +extern cl::opt SYCLOptimizationMode; } // namespace llvm void PassBuilder::invokePeepholeEPCallbacks(FunctionPassManager &FPM, @@ -575,8 +577,12 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, for (auto &C : ScalarOptimizerLateEPCallbacks) C(FPM, Level); - FPM.addPass(SimplifyCFGPass( + if (SYCLOptimizationMode) + FPM.addPass(SimplifyCFGPass()); + else + FPM.addPass(SimplifyCFGPass( SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -1029,7 +1035,8 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + if (!SYCLOptimizationMode) + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() .forwardSwitchCondToPhi(true) .convertSwitchToLookupTable(true) .needCanonicalLoops(false) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index dbfa243745f73..f0f998130dbda 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -72,9 +72,8 @@ static cl::opt RunLoopRerolling("reroll-loops", cl::Hidden, cl::desc("Run the loop rerolling pass")); -static cl::opt - SYCLOptimizationMode("sycl-opt", cl::init(false), cl::Hidden, - cl::desc("Enable SYCL optimization mode.")); +cl::opt SYCLOptimizationMode("sycl-opt", cl::init(false), cl::Hidden, + cl::desc("Enable SYCL optimization mode.")); cl::opt RunNewGVN("enable-newgvn", cl::init(false), cl::Hidden, cl::desc("Run the NewGVN pass")); @@ -547,6 +546,7 @@ void PassManagerBuilder::addFunctionSimplificationPasses( else MPM.add(createCFGSimplificationPass( SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + // Clean up after everything. MPM.add(createInstructionCombiningPass()); addExtensionsToPM(EP_Peephole, MPM); @@ -610,12 +610,13 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - PM.add(createCFGSimplificationPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); + if (!SYCLOptimizationMode) + PM.add(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); if (IsFullLTO) { PM.add(createSCCPPass()); // Propagate exposed constants From e8dd5fd20d516addb9c5069540894ee1356721cf Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Thu, 27 Jan 2022 12:41:05 +0300 Subject: [PATCH 3/6] apply clang-format --- clang/test/CodeGenSYCL/simplifycfg.cpp | 1 - llvm/lib/Passes/PassBuilderPipelines.cpp | 12 ++++++------ 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/test/CodeGenSYCL/simplifycfg.cpp b/clang/test/CodeGenSYCL/simplifycfg.cpp index a0623212b9600..4ddd82f1039e3 100644 --- a/clang/test/CodeGenSYCL/simplifycfg.cpp +++ b/clang/test/CodeGenSYCL/simplifycfg.cpp @@ -27,4 +27,3 @@ SYCL_EXTERNAL void bar(int i) { foo(); } } - diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 77095e1fbd548..553fd8e5cc483 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -581,7 +581,7 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, FPM.addPass(SimplifyCFGPass()); else FPM.addPass(SimplifyCFGPass( - SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); + SimplifyCFGOptions().hoistCommonInsts(true).sinkCommonInsts(true))); FPM.addPass(InstCombinePass()); invokePeepholeEPCallbacks(FPM, Level); @@ -1037,11 +1037,11 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // before SLP vectorization. if (!SYCLOptimizationMode) FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); if (IsFullLTO) { FPM.addPass(SCCPPass()); From c60f3f984814a2bb1c771eda3b0d83b43becc3a9 Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Thu, 27 Jan 2022 15:17:45 +0300 Subject: [PATCH 4/6] swtich clangxx to clang_cc1 in simplifycfg.cpp --- clang/test/CodeGenSYCL/simplifycfg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/test/CodeGenSYCL/simplifycfg.cpp b/clang/test/CodeGenSYCL/simplifycfg.cpp index 4ddd82f1039e3..ffa208119d3ed 100644 --- a/clang/test/CodeGenSYCL/simplifycfg.cpp +++ b/clang/test/CodeGenSYCL/simplifycfg.cpp @@ -1,5 +1,5 @@ -// RUN: %clangxx -fsycl -fsycl-device-only -flegacy-pass-manager %s -O3 -S -o - | FileCheck %s -// RUN: %clangxx -fsycl -fsycl-device-only -fno-legacy-pass-manager %s -O3 -S -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -flegacy-pass-manager -mllvm -sycl-opt %s -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 -fsycl-is-device -triple spir64-unknown-unknown -fno-legacy-pass-manager -mllvm -sycl-opt %s -emit-llvm -o - | FileCheck %s // // This test checks that foo (which is @_Z3foov) is called twice after O3 optimizations. // From 43680ab365082563cbfe3cc74ecb6870ac5bb17f Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Mon, 31 Jan 2022 15:13:31 +0300 Subject: [PATCH 5/6] remove unnecessary modification --- llvm/lib/Passes/PassBuilderPipelines.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Passes/PassBuilderPipelines.cpp b/llvm/lib/Passes/PassBuilderPipelines.cpp index 5ee0df93c1593..ccc601d94a7b6 100644 --- a/llvm/lib/Passes/PassBuilderPipelines.cpp +++ b/llvm/lib/Passes/PassBuilderPipelines.cpp @@ -1056,13 +1056,12 @@ void PassBuilder::addVectorPasses(OptimizationLevel Level, // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - if (!SYCLOptimizationMode) - FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); + FPM.addPass(SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); if (IsFullLTO) { FPM.addPass(SCCPPass()); From 632e7a20208cddb6a4a6d56066800973bc4d4459 Mon Sep 17 00:00:00 2001 From: Maksim Sabianin Date: Mon, 31 Jan 2022 15:48:28 +0300 Subject: [PATCH 6/6] remove unnecessary modifications --- llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp index f0f998130dbda..e4d08feae9ed6 100644 --- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -610,13 +610,12 @@ void PassManagerBuilder::addVectorPasses(legacy::PassManagerBase &PM, // convert to more optimized IR using more aggressive simplify CFG options. // The extra sinking transform can create larger basic blocks, so do this // before SLP vectorization. - if (!SYCLOptimizationMode) - PM.add(createCFGSimplificationPass(SimplifyCFGOptions() - .forwardSwitchCondToPhi(true) - .convertSwitchToLookupTable(true) - .needCanonicalLoops(false) - .hoistCommonInsts(true) - .sinkCommonInsts(true))); + PM.add(createCFGSimplificationPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(true) + .convertSwitchToLookupTable(true) + .needCanonicalLoops(false) + .hoistCommonInsts(true) + .sinkCommonInsts(true))); if (IsFullLTO) { PM.add(createSCCPPass()); // Propagate exposed constants