diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 55b00b50d0411..d5ab575f50417 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1975,6 +1975,21 @@ def : MutualExclusions<[SYCLIntelFPGAIVDep, def : MutualExclusions<[SYCLIntelFPGAMaxConcurrency, SYCLIntelFPGADisableLoopPipelining]>; +def SYCLIntelFPGALoopCount : StmtAttr { + let Spellings = [CXX11<"intel", "loop_count_min">, + CXX11<"intel", "loop_count_max">, + CXX11<"intel", "loop_count_avg">]; + let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], + ErrorDiag, "'for', 'while', and 'do' statements">; + let Accessors = [Accessor<"isMin", [CXX11<"intel", "loop_count_min">]>, + Accessor<"isMax", [CXX11<"intel", "loop_count_max">]>, + Accessor<"isAvg", [CXX11<"intel", "loop_count_avg">]>]; + let Args = [ExprArgument<"NTripCount">]; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; + let IsStmtDependent = 1; + let Documentation = [SYCLIntelFPGALoopCountAttrDocs]; +} + def : MutualExclusions<[SYCLIntelFPGAMaxConcurrency, SYCLIntelFPGADisableLoopPipelining]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 73cd65eb7aa4f..c1c5a97ef4a85 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2951,6 +2951,41 @@ or ``ivdep``. }]; } +def SYCLIntelFPGALoopCountAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "intel::loop_count_min, intel::loop_count_max, intel::loop_count_avg"; + let Content = [{ +The loop count attributes specify the minimum, maximum, or average number of +iterations for a ``for`` loop. These are hints that the user specify that can be +used by some of the loop optimization to make decisions such as if the loop +should be unrolled. It is a way for the user to provide some information without +using PGO. + +.. code-block:: c++ + + void foo(int *array, size_t n) { + [[intel::loop_count_min(4)]] for (int i = 0; i < n; ++i) array[i] = 0; + } + + void zoo(int *array, size_t n) { + [[intel::loop_count_max(10)]] for (int i = 0; i < n; ++i) array[i] = 0; + } + + void goo(int *array, size_t n) { + [[intel::loop_count_min(3)]] + [[intel::loop_count_max(10)]] + [[intel::loop_count_avg(5)]] + for (int i = 0; i < n; ++i) array[i] = 0; + } + + template + void bar() { + [[intel::loop_count_avg(N)]] for(;;) { } + } + + }]; +} + def SYCLIntelFPGAMaxInterleavingAttrDocs : Documentation { let Category = DocCatVariable; let Heading = "intel::max_interleaving"; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index f1414c3af6e6a..e0f699c808605 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2121,6 +2121,9 @@ class Sema final { OpenCLUnrollHintAttr * BuildOpenCLLoopUnrollHintAttr(const AttributeCommonInfo &A, Expr *E); + SYCLIntelFPGALoopCountAttr * + BuildSYCLIntelFPGALoopCount(const AttributeCommonInfo &CI, Expr *E); + bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc); bool CheckFunctionReturnType(QualType T, SourceLocation Loc); @@ -13439,7 +13442,8 @@ FPGALoopAttrT *Sema::BuildSYCLIntelFPGALoopAttr(const AttributeCommonInfo &A, A.getParsedKind() == ParsedAttr::AT_SYCLIntelFPGAMaxInterleaving || A.getParsedKind() == - ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations) { + ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations || + A.getParsedKind() == ParsedAttr::AT_SYCLIntelFPGALoopCount) { if (Val < 0) { Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer) << A.getAttrName() << /* non-negative */ 1; diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index fb3fd46ca293a..ea63a29868263 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -605,6 +605,12 @@ MDNode *LoopInfo::createMetadata( LoopProperties.push_back(MDNode::get(Ctx, Vals)); } + for (auto &VC : Attrs.SYCLIntelFPGAVariantCount) { + Metadata *Vals[] = {MDString::get(Ctx, VC.first), + ConstantAsMetadata::get(ConstantInt::get( + llvm::Type::getInt32Ty(Ctx), VC.second))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), AdditionalLoopProperties.end()); return createFullUnrollMetadata(Attrs, LoopProperties, HasUserTransforms); @@ -621,10 +627,11 @@ LoopAttributes::LoopAttributes(bool IsParallel) SYCLLoopCoalesceNLevels(0), SYCLLoopPipeliningDisable(false), SYCLMaxInterleavingEnable(false), SYCLMaxInterleavingNInvocations(0), SYCLSpeculatedIterationsEnable(false), - SYCLSpeculatedIterationsNIterations(0), UnrollCount(0), - UnrollAndJamCount(0), DistributeEnable(LoopAttributes::Unspecified), - PipelineDisabled(false), PipelineInitiationInterval(0), - SYCLNofusionEnable(false), MustProgress(false) {} + SYCLSpeculatedIterationsNIterations(0), SYCLIntelFPGAVariantCount(false), + UnrollCount(0), UnrollAndJamCount(0), + DistributeEnable(LoopAttributes::Unspecified), PipelineDisabled(false), + PipelineInitiationInterval(0), SYCLNofusionEnable(false), + MustProgress(false) {} void LoopAttributes::clear() { IsParallel = false; @@ -643,6 +650,7 @@ void LoopAttributes::clear() { SYCLMaxInterleavingNInvocations = 0; SYCLSpeculatedIterationsEnable = false; SYCLSpeculatedIterationsNIterations = 0; + SYCLIntelFPGAVariantCount.clear(); UnrollCount = 0; UnrollAndJamCount = 0; VectorizeEnable = LoopAttributes::Unspecified; @@ -680,8 +688,9 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, Attrs.SYCLMaxInterleavingNInvocations == 0 && Attrs.SYCLSpeculatedIterationsEnable == false && Attrs.SYCLSpeculatedIterationsNIterations == 0 && - Attrs.UnrollCount == 0 && Attrs.UnrollAndJamCount == 0 && - !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && + Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 && + Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && + Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && Attrs.VectorizeEnable == LoopAttributes::Unspecified && Attrs.UnrollEnable == LoopAttributes::Unspecified && @@ -1030,6 +1039,19 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, ->getSExtValue()); } + if (const auto *IntelFPGALoopCountAvg = + dyn_cast(A)) { + unsigned int Count = IntelFPGALoopCountAvg->getNTripCount() + ->getIntegerConstantExpr(Ctx) + ->getSExtValue(); + const char *Var = IntelFPGALoopCountAvg->isMax() + ? "llvm.loop.intel.loopcount_max" + : IntelFPGALoopCountAvg->isMin() + ? "llvm.loop.intel.loopcount_min" + : "llvm.loop.intel.loopcount_avg"; + setSYCLIntelFPGAVariantCount(Var, Count); + } + if (const auto *IntelFPGALoopCoalesce = dyn_cast(A)) { if (auto *LCE = IntelFPGALoopCoalesce->getNExpr()) diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index 2aa04eb5974a2..317972a34ebeb 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -117,6 +117,10 @@ struct LoopAttributes { /// Value for llvm.loop.max_concurrency.count metadata. unsigned SYCLMaxConcurrencyNThreads; + /// Value for count variant (min/max/avg) and count metadata. + llvm::SmallVector, 2> + SYCLIntelFPGAVariantCount; + /// Flag for llvm.loop.coalesce metadata. bool SYCLLoopCoalesceEnable; @@ -404,6 +408,11 @@ class LoopInfoStack { StagedAttrs.SYCLSpeculatedIterationsNIterations = C; } + /// Set value of variant and loop count for the next loop pushed. + void setSYCLIntelFPGAVariantCount(const char *Var, unsigned int Count) { + StagedAttrs.SYCLIntelFPGAVariantCount.push_back({Var, Count}); + } + /// Set the unroll count for the next loop pushed. void setUnrollCount(unsigned C) { StagedAttrs.UnrollCount = C; } diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 8b7b9e04db1f0..fad4c2533c638 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -243,6 +243,56 @@ static Attr *handleIntelFPGAIVDepAttr(Sema &S, Stmt *St, const ParsedAttr &A) { NumArgs == 2 ? A.getArgAsExpr(1) : nullptr); } +static void +CheckForDuplicateSYCLIntelLoopCountAttrs(Sema &S, + ArrayRef Attrs) { + // Create a list of SYCLIntelFPGALoopCount attributes only. + SmallVector OnlyLoopCountAttrs; + llvm::transform( + Attrs, std::back_inserter(OnlyLoopCountAttrs), [](const Attr *A) { + return dyn_cast_or_null(A); + }); + OnlyLoopCountAttrs.erase( + std::remove(OnlyLoopCountAttrs.begin(), OnlyLoopCountAttrs.end(), + static_cast(nullptr)), + OnlyLoopCountAttrs.end()); + if (OnlyLoopCountAttrs.empty()) + return; + + unsigned int MinCount = 0; + unsigned int MaxCount = 0; + unsigned int AvgCount = 0; + for (const auto *A : OnlyLoopCountAttrs) { + const auto *At = dyn_cast(A); + At->isMin() ? MinCount++ : At->isMax() ? MaxCount++ : AvgCount++; + if (MinCount > 1 || MaxCount > 1 || AvgCount > 1) + S.Diag(A->getLocation(), diag::err_sycl_loop_attr_duplication) << 1 << A; + } +} + +static SYCLIntelFPGALoopCountAttr * +handleIntelFPGALoopCountAttr(Sema &S, Stmt *St, const ParsedAttr &A) { + Expr *E = A.getArgAsExpr(0); + if (E && !E->isInstantiationDependent()) { + Optional ArgVal = + E->getIntegerConstantExpr(S.getASTContext()); + + if (!ArgVal) { + S.Diag(E->getExprLoc(), diag::err_attribute_argument_type) + << A << AANT_ArgumentIntegerConstant << E->getSourceRange(); + return nullptr; + } + + if (ArgVal->getSExtValue() < 0) { + S.Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer) + << A << /* non-negative */ 1; + return nullptr; + } + } + return new (S.Context) + SYCLIntelFPGALoopCountAttr(S.Context, A, A.getArgAsExpr(0)); +} + static Attr *handleIntelFPGANofusionAttr(Sema &S, Stmt *St, const ParsedAttr &A) { return new (S.Context) SYCLIntelFPGANofusionAttr(S.Context, A); @@ -558,6 +608,7 @@ static void CheckForIncompatibleSYCLLoopAttributes( Attrs); CheckForDuplicationSYCLLoopAttribute( S, Attrs); + CheckForDuplicateSYCLIntelLoopCountAttrs(S, Attrs); CheckForDuplicationSYCLLoopAttribute(S, Attrs, false); CheckRedundantSYCLIntelFPGAIVDepAttrs(S, Attrs); CheckForDuplicationSYCLLoopAttribute(S, Attrs); @@ -687,6 +738,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, case ParsedAttr::AT_SYCLIntelFPGASpeculatedIterations: return handleIntelFPGALoopAttr(S, St, A); + case ParsedAttr::AT_SYCLIntelFPGALoopCount: + return handleIntelFPGALoopCountAttr(S, St, A); case ParsedAttr::AT_OpenCLUnrollHint: case ParsedAttr::AT_LoopUnrollHint: return handleLoopUnrollHint(S, St, A, Range); diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 6f6fbf1960c04..ba407993e90e9 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1093,6 +1093,8 @@ namespace { const SYCLIntelFPGASpeculatedIterationsAttr * TransformSYCLIntelFPGASpeculatedIterationsAttr( const SYCLIntelFPGASpeculatedIterationsAttr *SI); + const SYCLIntelFPGALoopCountAttr * + TransformSYCLIntelFPGALoopCountAttr(const SYCLIntelFPGALoopCountAttr *SI); ExprResult TransformPredefinedExpr(PredefinedExpr *E); ExprResult TransformDeclRefExpr(DeclRefExpr *E); @@ -1618,6 +1620,15 @@ TemplateInstantiator::TransformSYCLIntelFPGASpeculatedIterationsAttr( *SI, TransformedExpr); } +const SYCLIntelFPGALoopCountAttr * +TemplateInstantiator::TransformSYCLIntelFPGALoopCountAttr( + const SYCLIntelFPGALoopCountAttr *LCA) { + Expr *TransformedExpr = + getDerived().TransformExpr(LCA->getNTripCount()).get(); + return getSema().BuildSYCLIntelFPGALoopAttr( + *LCA, TransformedExpr); +} + const LoopUnrollHintAttr *TemplateInstantiator::TransformLoopUnrollHintAttr( const LoopUnrollHintAttr *LU) { Expr *TransformedExpr = diff --git a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp index 8fb40e31ada3d..a0dfafdbcd92f 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp @@ -14,6 +14,9 @@ // CHECK: br label %for.cond2, !llvm.loop ![[MD_MI_2:[0-9]+]] // CHECK: br label %for.cond, !llvm.loop ![[MD_SI:[0-9]+]] // CHECK: br label %for.cond2, !llvm.loop ![[MD_SI_2:[0-9]+]] +// CHECK: br label %for.cond, !llvm.loop ![[MD_LCA:[0-9]+]] +// CHECK: br label %for.cond2, !llvm.loop ![[MD_LCA_1:[0-9]+]] +// CHECK: br label %for.cond13, !llvm.loop ![[MD_LCA_2:[0-9]+]] void disable_loop_pipelining() { int a[10]; @@ -109,6 +112,25 @@ void speculated_iterations() { a[i] = 0; } +template +void loop_count_control() { + int a[10]; + // CHECK: ![[MD_LCA]] = distinct !{![[MD_LCA]], ![[MP:[0-9]+]], ![[MD_loop_count_avg:[0-9]+]]} + // CHECK-NEXT: ![[MD_loop_count_avg]] = !{!"llvm.loop.intel.loopcount_avg", i32 12} + [[intel::loop_count_avg(A)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // CHECK: ![[MD_LCA_1]] = distinct !{![[MD_LCA_1]], ![[MP:[0-9]+]], ![[MD_loop_count_max:[0-9]+]]} + // CHECK-NEXT: ![[MD_loop_count_max]] = !{!"llvm.loop.intel.loopcount_max", i32 4} + [[intel::loop_count_max(4)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // CHECK: ![[MD_LCA_2]] = distinct !{![[MD_LCA_2]], ![[MP:[0-9]+]], ![[MD_loop_count_min:[0-9]+]], ![[MD_loop_count_max_1:[0-9]+]], ![[MD_loop_count_avg_1:[0-9]+]]} + // CHECK: ![[MD_loop_count_min]] = !{!"llvm.loop.intel.loopcount_min", i32 4} + // CHECK: ![[MD_loop_count_max_1]] = !{!"llvm.loop.intel.loopcount_max", i32 40} + // CHECK-NEXT: ![[MD_loop_count_avg_1]] = !{!"llvm.loop.intel.loopcount_avg", i32 21} + [[intel::loop_count_min(4)]] [[intel::loop_count_max(40)]] [[intel::loop_count_avg(21)]] for (int i = 0; i != 10; ++i) + a[i] = 0; +} + template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); @@ -123,6 +145,7 @@ int main() { loop_coalesce<2>(); max_interleaving<3>(); speculated_iterations<4>(); + loop_count_control<12>(); }); return 0; } diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 6bec33ff00b7a..ce26f4d977780 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -22,6 +22,8 @@ void foo() { [[intel::speculated_iterations(6)]] int j[10]; // expected-error@+1 {{'nofusion' attribute cannot be applied to a declaration}} [[intel::nofusion]] int k[10]; + // expected-error@+1{{'loop_count_avg' attribute cannot be applied to a declaration}} + [[intel::loop_count_avg(6)]] int p[10]; } // Test for deprecated spelling of Intel FPGA loop attributes @@ -119,6 +121,9 @@ void boo() { // expected-error@+1 {{'nofusion' attribute takes no arguments}} [[intel::nofusion(0)]] for (int i = 0; i != 10; ++i) a[i] = 0; + // expected-error@+1 {{'loop_count_avg' attribute takes one argument}} + [[intel::loop_count_avg(3, 6)]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for incorrect argument value for Intel FPGA loop attributes @@ -196,6 +201,15 @@ void goo() { // no diagnostics are expected [[intel::nofusion]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::loop_count_avg(0)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // expected-error@+1 {{'loop_count_avg' attribute requires a non-negative integral compile time constant expression}} + [[intel::loop_count_avg(-1)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // expected-error@+1 {{'loop_count_avg' attribute requires an integer constant}} + [[intel::loop_count_avg("abc")]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for Intel FPGA loop attributes duplication @@ -304,6 +318,11 @@ void zoo() { // expected-error@+1 {{duplicate Intel FPGA loop attribute 'nofusion'}} [[intel::nofusion]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::loop_count_avg(2)]] + // expected-error@+1{{duplicate Intel FPGA loop attribute 'loop_count_avg'}} + [[intel::loop_count_avg(2)]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for Intel FPGA loop attributes compatibility @@ -342,6 +361,17 @@ void loop_attrs_compatibility() { [[intel::disable_loop_pipelining]] [[intel::nofusion]] for (int i = 0; i != 10; ++i) a[i] = 0; + // no diagnostics are expected + [[intel::disable_loop_pipelining]] + [[intel::loop_count_avg(8)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; + [[intel::loop_count_min(8)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; + [[intel::loop_count_max(8)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; } template @@ -402,6 +432,42 @@ void max_concurrency_dependent() { a[i] = 0; } +template +void loop_count_control_dependent() { + int a[10]; + + //expected-error@+1{{'loop_count_avg' attribute requires a non-negative integral compile time constant expression}} + [[intel::loop_count_avg(C)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; + + //expected-error@+1{{'loop_count_min' attribute requires a non-negative integral compile time constant expression}} + [[intel::loop_count_min(C)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; + + //expected-error@+1{{'loop_count_max' attribute requires a non-negative integral compile time constant expression}} + [[intel::loop_count_max(C)]] + for (int i = 0; i != 10; ++i) + a[i] = 0; + + [[intel::loop_count_avg(A)]] + //expected-error@+1{{duplicate Intel FPGA loop attribute 'loop_count_avg'}} + [[intel::loop_count_avg(B)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + + [[intel::loop_count_min(A)]] + //expected-error@+1{{duplicate Intel FPGA loop attribute 'loop_count_min'}} + [[intel::loop_count_min(B)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + + [[intel::loop_count_max(A)]] + //expected-error@+1{{duplicate Intel FPGA loop attribute 'loop_count_max'}} + [[intel::loop_count_max(B)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + +} + int main() { deviceQueue.submit([&](sycl::handler &h) { h.single_task([]() { @@ -419,7 +485,10 @@ int main() { //expected-note@-1 +{{in instantiation of function template specialization}} max_concurrency_dependent<1, 4, -2>(); //expected-note@-1 +{{in instantiation of function template specialization}} - }); + + loop_count_control_dependent<3, 2, -1>(); + //expected-note@-1{{in instantiation of function template specialization 'loop_count_control_dependent<3, 2, -1>' requested here}} +}); }); return 0;