@@ -337,6 +337,10 @@ static cl::opt<bool> EnableStructurizerWorkarounds(
337337 cl::desc (" Enable workarounds for the StructurizeCFG pass" ), cl::init(true ),
338338 cl::Hidden);
339339
340+ static cl::opt<bool > EnableSwLowerLDS (" amdgpu-enable-sw-lower-lds" ,
341+ cl::desc (" Enable sw lower lds pass" ),
342+ cl::init(true ), cl::Hidden);
343+
340344static cl::opt<bool , true > EnableLowerModuleLDS (
341345 " amdgpu-enable-lower-module-lds" , cl::desc(" Enable lower module lds pass" ),
342346 cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true ),
@@ -736,6 +740,8 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
736740 // We want to support the -lto-partitions=N option as "best effort".
737741 // For that, we need to lower LDS earlier in the pipeline before the
738742 // module is partitioned for codegen.
743+ if (EnableSwLowerLDS)
744+ PM.addPass (AMDGPUSwLowerLDSPass (*this ));
739745 if (EnableLowerModuleLDS)
740746 PM.addPass (AMDGPULowerModuleLDSPass (*this ));
741747 });
@@ -1032,6 +1038,10 @@ void AMDGPUPassConfig::addIRPasses() {
10321038 // Replace OpenCL enqueued block function pointers with global variables.
10331039 addPass (createAMDGPUOpenCLEnqueuedBlockLoweringPass ());
10341040
1041+ // Lower LDS accesses to global memory if address sanitizer is enabled.
1042+ if (EnableSwLowerLDS)
1043+ addPass (createAMDGPUSwLowerLDSLegacyPass (&TM));
1044+
10351045 // Runs before PromoteAlloca so the latter can account for function uses
10361046 if (EnableLowerModuleLDS) {
10371047 addPass (createAMDGPULowerModuleLDSLegacyPass (&TM));
0 commit comments