@@ -2075,6 +2075,15 @@ void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D,
20752075 if (const auto *C = D.getSingleClause <OMPOrderClause>())
20762076 if (C->getKind () == OMPC_ORDER_concurrent)
20772077 LoopStack.setParallel (/* Enable=*/ true );
2078+ if ((D.getDirectiveKind () == OMPD_simd ||
2079+ (getLangOpts ().OpenMPSimd &&
2080+ isOpenMPSimdDirective (D.getDirectiveKind ()))) &&
2081+ llvm::any_of (D.getClausesOfKind <OMPReductionClause>(),
2082+ [](const OMPReductionClause *C) {
2083+ return C->getModifier () == OMPC_REDUCTION_inscan;
2084+ }))
2085+ // Disable parallel access in case of prefix sum.
2086+ LoopStack.setParallel (/* Enable=*/ false );
20782087}
20792088
20802089void CodeGenFunction::EmitOMPSimdFinal (
@@ -2270,6 +2279,8 @@ static void emitOMPSimdRegion(CodeGenFunction &CGF, const OMPLoopDirective &S,
22702279}
22712280
22722281void CodeGenFunction::EmitOMPSimdDirective (const OMPSimdDirective &S) {
2282+ ParentLoopDirectiveForScanRegion ScanRegion (*this , S);
2283+ OMPFirstScanLoop = true ;
22732284 auto &&CodeGen = [&S](CodeGenFunction &CGF, PrePostActionTy &Action) {
22742285 emitOMPSimdRegion (CGF, S, Action);
22752286 };
@@ -4191,14 +4202,15 @@ void CodeGenFunction::EmitOMPDepobjDirective(const OMPDepobjDirective &S) {
41914202}
41924203
41934204void CodeGenFunction::EmitOMPScanDirective (const OMPScanDirective &S) {
4194- // Do not emit code for non-simd directives in simd-only mode.
4195- if (getLangOpts ().OpenMPSimd && !OMPParentLoopDirectiveForScan)
4205+ if (!OMPParentLoopDirectiveForScan)
41964206 return ;
41974207 const OMPExecutableDirective &ParentDir = *OMPParentLoopDirectiveForScan;
4208+ bool IsInclusive = S.hasClausesOfKind <OMPInclusiveClause>();
41984209 SmallVector<const Expr *, 4 > Shareds;
41994210 SmallVector<const Expr *, 4 > Privates;
42004211 SmallVector<const Expr *, 4 > LHSs;
42014212 SmallVector<const Expr *, 4 > RHSs;
4213+ SmallVector<const Expr *, 4 > ReductionOps;
42024214 SmallVector<const Expr *, 4 > CopyOps;
42034215 SmallVector<const Expr *, 4 > CopyArrayTemps;
42044216 SmallVector<const Expr *, 4 > CopyArrayElems;
@@ -4209,13 +4221,109 @@ void CodeGenFunction::EmitOMPScanDirective(const OMPScanDirective &S) {
42094221 Privates.append (C->privates ().begin (), C->privates ().end ());
42104222 LHSs.append (C->lhs_exprs ().begin (), C->lhs_exprs ().end ());
42114223 RHSs.append (C->rhs_exprs ().begin (), C->rhs_exprs ().end ());
4224+ ReductionOps.append (C->reduction_ops ().begin (), C->reduction_ops ().end ());
42124225 CopyOps.append (C->copy_ops ().begin (), C->copy_ops ().end ());
42134226 CopyArrayTemps.append (C->copy_array_temps ().begin (),
42144227 C->copy_array_temps ().end ());
42154228 CopyArrayElems.append (C->copy_array_elems ().begin (),
42164229 C->copy_array_elems ().end ());
42174230 }
4218- bool IsInclusive = S.hasClausesOfKind <OMPInclusiveClause>();
4231+ if (ParentDir.getDirectiveKind () == OMPD_simd ||
4232+ (getLangOpts ().OpenMPSimd &&
4233+ isOpenMPSimdDirective (ParentDir.getDirectiveKind ()))) {
4234+ // For simd directive and simd-based directives in simd only mode, use the
4235+ // following codegen:
4236+ // int x = 0;
4237+ // #pragma omp simd reduction(inscan, +: x)
4238+ // for (..) {
4239+ // <first part>
4240+ // #pragma omp scan inclusive(x)
4241+ // <second part>
4242+ // }
4243+ // is transformed to:
4244+ // int x = 0;
4245+ // for (..) {
4246+ // int x_priv = 0;
4247+ // <first part>
4248+ // x = x_priv + x;
4249+ // x_priv = x;
4250+ // <second part>
4251+ // }
4252+ // and
4253+ // int x = 0;
4254+ // #pragma omp simd reduction(inscan, +: x)
4255+ // for (..) {
4256+ // <first part>
4257+ // #pragma omp scan exclusive(x)
4258+ // <second part>
4259+ // }
4260+ // to
4261+ // int x = 0;
4262+ // for (..) {
4263+ // int x_priv = 0;
4264+ // <second part>
4265+ // int temp = x;
4266+ // x = x_priv + x;
4267+ // x_priv = temp;
4268+ // <first part>
4269+ // }
4270+ llvm::BasicBlock *OMPScanReduce = createBasicBlock (" omp.inscan.reduce" );
4271+ EmitBranch (IsInclusive
4272+ ? OMPScanReduce
4273+ : BreakContinueStack.back ().ContinueBlock .getBlock ());
4274+ EmitBlock (OMPScanDispatch);
4275+ {
4276+ // New scope for correct construction/destruction of temp variables for
4277+ // exclusive scan.
4278+ LexicalScope Scope (*this , S.getSourceRange ());
4279+ EmitBranch (IsInclusive ? OMPBeforeScanBlock : OMPAfterScanBlock);
4280+ EmitBlock (OMPScanReduce);
4281+ if (!IsInclusive) {
4282+ // Create temp var and copy LHS value to this temp value.
4283+ // TMP = LHS;
4284+ for (unsigned I = 0 , E = CopyArrayElems.size (); I < E; ++I) {
4285+ const Expr *PrivateExpr = Privates[I];
4286+ const Expr *TempExpr = CopyArrayTemps[I];
4287+ EmitAutoVarDecl (
4288+ *cast<VarDecl>(cast<DeclRefExpr>(TempExpr)->getDecl ()));
4289+ LValue DestLVal = EmitLValue (TempExpr);
4290+ LValue SrcLVal = EmitLValue (LHSs[I]);
4291+ EmitOMPCopy (PrivateExpr->getType (), DestLVal.getAddress (*this ),
4292+ SrcLVal.getAddress (*this ),
4293+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl ()),
4294+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl ()),
4295+ CopyOps[I]);
4296+ }
4297+ }
4298+ CGM.getOpenMPRuntime ().emitReduction (
4299+ *this , ParentDir.getEndLoc (), Privates, LHSs, RHSs, ReductionOps,
4300+ {/* WithNowait=*/ true , /* SimpleReduction=*/ true , OMPD_simd});
4301+ for (unsigned I = 0 , E = CopyArrayElems.size (); I < E; ++I) {
4302+ const Expr *PrivateExpr = Privates[I];
4303+ LValue DestLVal;
4304+ LValue SrcLVal;
4305+ if (IsInclusive) {
4306+ DestLVal = EmitLValue (RHSs[I]);
4307+ SrcLVal = EmitLValue (LHSs[I]);
4308+ } else {
4309+ const Expr *TempExpr = CopyArrayTemps[I];
4310+ DestLVal = EmitLValue (RHSs[I]);
4311+ SrcLVal = EmitLValue (TempExpr);
4312+ }
4313+ EmitOMPCopy (PrivateExpr->getType (), DestLVal.getAddress (*this ),
4314+ SrcLVal.getAddress (*this ),
4315+ cast<VarDecl>(cast<DeclRefExpr>(LHSs[I])->getDecl ()),
4316+ cast<VarDecl>(cast<DeclRefExpr>(RHSs[I])->getDecl ()),
4317+ CopyOps[I]);
4318+ }
4319+ }
4320+ EmitBranch (IsInclusive ? OMPAfterScanBlock : OMPBeforeScanBlock);
4321+ OMPScanExitBlock = IsInclusive
4322+ ? BreakContinueStack.back ().ContinueBlock .getBlock ()
4323+ : OMPScanReduce;
4324+ EmitBlock (OMPAfterScanBlock);
4325+ return ;
4326+ }
42194327 if (!IsInclusive) {
42204328 EmitBranch (BreakContinueStack.back ().ContinueBlock .getBlock ());
42214329 EmitBlock (OMPScanExitBlock);
@@ -6313,6 +6421,7 @@ void CodeGenFunction::EmitSimpleOMPExecutableDirective(
63136421 }
63146422 if (isOpenMPSimdDirective (D.getDirectiveKind ())) {
63156423 (void )GlobalsScope.Privatize ();
6424+ ParentLoopDirectiveForScanRegion ScanRegion (CGF, D);
63166425 emitOMPSimdRegion (CGF, cast<OMPLoopDirective>(D), Action);
63176426 } else {
63186427 if (const auto *LD = dyn_cast<OMPLoopDirective>(&D)) {
0 commit comments