@@ -2219,6 +2219,15 @@ program is ill-formed and no diagnostic is required.
22192219
22202220The ``intel::kernel_args_restrict`` attribute has an effect when applied to a
22212221function, and no effect otherwise.
2222+
2223+ .. code-block:: c++
2224+
2225+ [[intel::kernel_args_restrict]] void func() {}
2226+
2227+ struct bar {
2228+ [[intel::kernel_args_restrict]] void operator()() const {}
2229+ };
2230+
22222231 }];
22232232}
22242233
@@ -2230,6 +2239,25 @@ Applies to a device function/lambda function. Indicates the number of work
22302239items that should be processed in parallel. Valid values are positive integers.
22312240If ``intel::num_simd_work_items`` is applied to a function called from a
22322241device kernel, the attribute is not ignored and it is propagated to the kernel.
2242+
2243+ .. code-block:: c++
2244+
2245+ [[intel::num_simd_work_items(4)]] void foo() {}
2246+
2247+ template<int N>
2248+ [[intel::num_simd_work_items(N)]] void bar() {}
2249+
2250+ class Foo {
2251+ public:
2252+ [[intel::num_simd_work_items(6)]] void operator()() const {}
2253+ };
2254+
2255+ template <int N>
2256+ class Functor {
2257+ public:
2258+ [[intel::num_simd_work_items(N)]] void operator()() const {}
2259+ };
2260+
22332261 }];
22342262}
22352263
@@ -2285,14 +2313,64 @@ those device functions, such that the kernel attributes are the sum of all
22852313attributes of all device functions called in this kernel.
22862314See section 6.7 Attributes for more details.
22872315
2288- As Intel extension, ``[[intel::reqd_work_group_size(X, Y, Z)]]`` spelling is allowed
2289- which features optional arguments `Y` and `Z`, those simplifies its usage if
2290- only 1- or 2-dimensional ND-range is assumed by a programmer. These arguments
2291- defaults to ``1``.
2316+ .. code-block:: c++
2317+
2318+ [[cl::reqd_work_group_size(4, 4, 4)]] void foo() {}
2319+
2320+ class Foo {
2321+ public:
2322+ [[cl::reqd_work_group_size(2, 2, 2)]] void operator()() const {}
2323+ };
2324+
2325+ template <int N, int N1, int N2>
2326+ class Functor {
2327+ public:
2328+ [[cl::reqd_work_group_size(N, N1, N2)]] void operator()() const {}
2329+ };
2330+
2331+ template <int N, int N1, int N2>
2332+ [[cl::reqd_work_group_size(N, N1, N2)]] void func() {}
2333+
2334+ As an Intel extension, the ``[[intel::reqd_work_group_size(X, Y, Z)]]``
2335+ spelling is supported. This spelling allows the Y and Z arguments to be
2336+ optional. If not provided by the user, the value of Y and Z defaults to 1.
2337+ This simplifies usage of the attribute when a 1- or 2-dimensional ND-range
2338+ is assumed.
2339+
2340+ .. code-block:: c++
2341+
2342+ [[intel::reqd_work_group_size(5)]]
2343+ // identical to [[intel::reqd_work_group_size(5, 1, 1)]]
2344+ void quux() {}
2345+
2346+ [[intel::reqd_work_group_size(5, 5)]]
2347+ // identical to [[intel::reqd_work_group_size(5, 5, 1)]]
2348+ void qux() {}
2349+
2350+ [[intel::reqd_work_group_size(4, 4, 4)]] void foo() {}
2351+
2352+ class Foo {
2353+ public:
2354+ [[intel::reqd_work_group_size(2, 2, 2)]] void operator()() const {}
2355+ };
2356+
2357+ template <int X, int Y, int Z>
2358+ class Functor {
2359+ public:
2360+ [[intel::reqd_work_group_size(X, Y, Z)]] void operator()() const {}
2361+ };
2362+
2363+ template <int X, int Y, int Z>
2364+ [[intel::reqd_work_group_size(X, Y, Z)]] void func() {}
22922365
22932366In OpenCL C, this attribute is available in GNU spelling
22942367(``__attribute__((reqd_work_group_size(X, Y, Z)))``), see section
229523686.7.2 Optional Attribute Qualifiers of OpenCL 1.2 specification for details.
2369+
2370+ .. code-block:: c++
2371+
2372+ __kernel __attribute__((reqd_work_group_size(8, 16, 32))) void test() {}
2373+
22962374 }];
22972375}
22982376
@@ -2306,6 +2384,25 @@ reqd_work_group_size, but allows work groups that are smaller or equal to the
23062384specified sizes.
23072385If ``intel::max_work_group_size`` is applied to a function called from a
23082386device kernel, the attribute is not ignored and it is propagated to the kernel.
2387+
2388+ .. code-block:: c++
2389+
2390+ [[intel::max_work_group_size(4, 4, 4)]] void foo() {}
2391+
2392+ class Foo {
2393+ public:
2394+ [[intel::max_work_group_size(2, 2, 2)]] void operator()() const {}
2395+ };
2396+
2397+ template <int N, int N1, int N2>
2398+ class Functor {
2399+ public:
2400+ [[intel::max_work_group_size(N, N1, N2)]] void operator()() const {}
2401+ };
2402+
2403+ template <int N, int N1, int N2>
2404+ [[intel::max_work_group_size(N, N1, N2)]] void func() {}
2405+
23092406 }];
23102407}
23112408
@@ -2316,12 +2413,42 @@ def SYCLIntelMaxGlobalWorkDimAttrDocs : Documentation {
23162413Applies to a device function/lambda function or function call operator (of a
23172414function object). Indicates the largest valid global work dimension that will be
23182415accepted when running the kernel on a device. Valid values are integers in a
2319- range of [0, 3]. A kernel with max_global_work_dim(0) must be invoked with a
2416+ range of [0, 3].
2417+ If ``intel::max_global_work_dim`` is applied to a function called from a
2418+ device kernel, the attribute is not ignored and it is propagated to the kernel.
2419+
2420+ .. code-block:: c++
2421+
2422+ [[intel::max_global_work_dim(1)]] void foo() {}
2423+
2424+ template<int N>
2425+ [[intel::max_global_work_dim(N)]] void bar() {}
2426+
2427+ class Foo {
2428+ public:
2429+ [[intel::max_global_work_dim(1)]] void operator()() const {}
2430+ };
2431+
2432+ template <int N>
2433+ class Functor {
2434+ public:
2435+ [[intel::max_global_work_dim(N)]] void operator()() const {}
2436+ };
2437+
2438+ A kernel with ``intel::max_global_work_dim(0)`` must be invoked with a
23202439'single_task' and if ``intel::max_work_group_size`` or
23212440``cl::reqd_work_group_size`` are applied to the kernel as well - they shall
23222441have arguments of (1, 1, 1).
2323- If ``intel::max_global_work_dim`` is applied to a function called from a
2324- device kernel, the attribute is not ignored and it is propagated to the kernel.
2442+
2443+ .. code-block:: c++
2444+
2445+ struct TRIFuncObjGood {
2446+ [[intel::max_global_work_dim(0)]]
2447+ [[intel::max_work_group_size(1, 1, 1)]]
2448+ [[cl::reqd_work_group_size(1, 1, 1)]]
2449+ void operator()() const {}
2450+ };
2451+
23252452 }];
23262453}
23272454
@@ -2344,6 +2471,25 @@ This attribute enables communication of the desired maximum frequency of the
23442471device operation, guiding the FPGA backend to insert the appropriate number of
23452472registers to break-up the combinational logic circuit, and thereby controlling
23462473the length of the longest combinational path.
2474+
2475+ .. code-block:: c++
2476+
2477+ [[intel::scheduler_target_fmax_mhz(4)]] void foo() {}
2478+
2479+ template<int N>
2480+ [[intel::scheduler_target_fmax_mhz(N)]] void bar() {}
2481+
2482+ class Foo {
2483+ public:
2484+ [[intel::scheduler_target_fmax_mhz(6)]] void operator()() const {}
2485+ };
2486+
2487+ template <int N>
2488+ class Functor {
2489+ public:
2490+ [[intel::scheduler_target_fmax_mhz(N)]] void operator()() const {}
2491+ };
2492+
23472493 }];
23482494}
23492495
@@ -2355,6 +2501,29 @@ Applies to a device function/lambda function or function call operator (of a
23552501function object). If 1, compiler doesn't use the global work offset values for
23562502the device function. Valid values are 0 and 1. If used without argument, value
23572503of 1 is set implicitly.
2504+
2505+ .. code-block:: c++
2506+
2507+ [[intel::no_global_work_offset]]
2508+ // identical to [[intel::no_global_work_offset(1)]]
2509+ void quux() {}
2510+
2511+ [[intel::no_global_work_offset(0)]] void foo() {}
2512+
2513+ class Foo {
2514+ public:
2515+ [[intel::no_global_work_offset(1)]] void operator()() const {}
2516+ };
2517+
2518+ template <int N>
2519+ class Functor {
2520+ public:
2521+ [[intel::no_global_work_offset(N)]] void operator()() const {}
2522+ };
2523+
2524+ template <int N>
2525+ [[intel::no_global_work_offset(N)]] void func() {}
2526+
23582527 }];
23592528}
23602529
@@ -2645,6 +2814,15 @@ optimization.
26452814This attribute allows to pass name and address of the function to a special
26462815``cl::sycl::intel::get_device_func_ptr`` API call which extracts the device
26472816function pointer for the specified function.
2817+
2818+ .. code-block:: c++
2819+
2820+ [[intel::device_indirectly_callable]] int func3() {}
2821+
2822+ class A {
2823+ [[intel::device_indirectly_callable]] A() {}
2824+ };
2825+
26482826 }];
26492827}
26502828
0 commit comments