From 7e6d39bf692a2867aab16e9781ad52f30b5fafad Mon Sep 17 00:00:00 2001
From: Hartmut Kaiser <hartmut.kaiser@gmail.com>
Date: Mon, 3 Oct 2022 16:23:47 -0500
Subject: [PATCH 1/2] Expose available cache sizes from topology object

---
 .../examples/system_characteristics.hpp       |  13 ++
 .../include/hpx/topology/topology.hpp         |   9 ++
 libs/core/topology/src/topology.cpp           | 129 ++++++++++++++----
 3 files changed, 121 insertions(+), 30 deletions(-)

diff --git a/libs/core/resource_partitioner/examples/system_characteristics.hpp b/libs/core/resource_partitioner/examples/system_characteristics.hpp
index 70e150f4c0dc..f4d16b0ea7f6 100644
--- a/libs/core/resource_partitioner/examples/system_characteristics.hpp
+++ b/libs/core/resource_partitioner/examples/system_characteristics.hpp
@@ -50,4 +50,17 @@ void print_system_characteristics()
 
     //! -------------------------------------- topology
     topo.print_hwloc(std::cout);
+
+    //! -------------------------------------- cache sizes
+    hpx::threads::mask_type core0 = topo.get_core_affinity_mask(0);
+    std::cout << "[System Cache sizes (core 0)]\n"
+              << "L1 Cache: " << topo.get_cache_size(core0, 1) << "\n"
+              << "L2 Cache: " << topo.get_cache_size(core0, 2) << "\n"
+              << "L3 Cache: " << topo.get_cache_size(core0, 3) << "\n\n";
+
+    hpx::threads::mask_type machine = topo.get_machine_affinity_mask();
+    std::cout << "[System Cache sizes (all available cores)]\n"
+              << "L1 Cache: " << topo.get_cache_size(machine, 1) << "\n"
+              << "L2 Cache: " << topo.get_cache_size(machine, 2) << "\n"
+              << "L3 Cache: " << topo.get_cache_size(machine, 3) << "\n\n";
 }
diff --git a/libs/core/topology/include/hpx/topology/topology.hpp b/libs/core/topology/include/hpx/topology/topology.hpp
index a68c53b72ee2..64af890487a4 100644
--- a/libs/core/topology/include/hpx/topology/topology.hpp
+++ b/libs/core/topology/include/hpx/topology/topology.hpp
@@ -266,6 +266,9 @@ namespace hpx { namespace threads {
         std::size_t get_pu_number(std::size_t num_core, std::size_t num_pu,
             error_code& ec = throws) const;
 
+        /// Return the size of the cache associated with the given mask.
+        std::size_t get_cache_size(mask_type mask, int level) const;
+
         mask_type get_cpubind_mask(error_code& ec = throws) const;
         mask_type get_cpubind_mask(
             std::thread& handle, error_code& ec = throws) const;
@@ -339,8 +342,12 @@ namespace hpx { namespace threads {
 
         void extract_node_mask(hwloc_obj_t parent, mask_type& mask) const;
 
+        std::size_t get_number_of_core_pus_locked(std::size_t core) const;
+
         std::size_t extract_node_count(
             hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const;
+        std::size_t extract_node_count_locked(
+            hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const;
 
         mask_type init_machine_affinity_mask() const;
         mask_type init_socket_affinity_mask(std::size_t num_thread) const
@@ -364,6 +371,8 @@ namespace hpx { namespace threads {
 
         void init_num_of_pus();
 
+        hwloc_obj_t get_pu_obj(std::size_t num_core) const;
+
         hwloc_topology_t topo;
 
         // We need to define a constant pu offset.
diff --git a/libs/core/topology/src/topology.cpp b/libs/core/topology/src/topology.cpp
index bd6d1bad6ffd..07afe94a666c 100644
--- a/libs/core/topology/src/topology.cpp
+++ b/libs/core/topology/src/topology.cpp
@@ -691,11 +691,9 @@ namespace hpx { namespace threads {
         }
     }    // }}}
 
-    std::size_t topology::extract_node_count(
+    std::size_t topology::extract_node_count_locked(
         hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const
     {    // {{{
-        hwloc_obj_t obj;
-
         if (parent == nullptr)
         {
             return count;
@@ -706,37 +704,29 @@ namespace hpx { namespace threads {
             return count;
         }
 
-        {
-            std::unique_lock<mutex_type> lk(topo_mtx);
-            obj = hwloc_get_next_child(topo, parent, nullptr);
-        }
+        hwloc_obj_t obj = hwloc_get_next_child(topo, parent, nullptr);
 
         while (obj)
         {
             if (hwloc_compare_types(type, obj->type) == 0)
             {
-                /*
-                do {
-                    ++count;
-                    {
-                        std::unique_lock<mutex_type> lk(topo_mtx);
-                        obj = hwloc_get_next_child(topo, parent, obj);
-                    }
-                } while (obj != nullptr && hwloc_compare_types(type, obj->type) == 0);
-                return count;
-                */
                 ++count;
             }
 
-            count = extract_node_count(obj, type, count);
-
-            std::unique_lock<mutex_type> lk(topo_mtx);
+            count = extract_node_count_locked(obj, type, count);
             obj = hwloc_get_next_child(topo, parent, obj);
         }
 
         return count;
     }    // }}}
 
+    std::size_t topology::extract_node_count(
+        hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const
+    {    // {{{
+        std::unique_lock<mutex_type> lk(topo_mtx);
+        return extract_node_count_locked(parent, type, count);
+    }    // }}}
+
     std::size_t topology::get_number_of_sockets() const
     {
         int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET);
@@ -844,26 +834,27 @@ namespace hpx { namespace threads {
         return num_of_pus_;
     }
 
-    std::size_t topology::get_number_of_core_pus(std::size_t core) const
+    std::size_t topology::get_number_of_core_pus_locked(std::size_t core) const
     {
-        hwloc_obj_t core_obj = nullptr;
-
-        {
-            std::unique_lock<mutex_type> lk(topo_mtx);
-            core_obj = hwloc_get_obj_by_type(
-                topo, HWLOC_OBJ_CORE, static_cast<unsigned>(core));
-        }
+        hwloc_obj_t core_obj = hwloc_get_obj_by_type(
+            topo, HWLOC_OBJ_CORE, static_cast<unsigned>(core));
 
         if (!use_pus_as_cores_ && core_obj)
         {
             HPX_ASSERT(core == detail::get_index(core_obj));
             std::size_t pu_count = 0;
-            return extract_node_count(core_obj, HWLOC_OBJ_PU, pu_count);
+            return extract_node_count_locked(core_obj, HWLOC_OBJ_PU, pu_count);
         }
 
         return std::size_t(1);
     }
 
+    std::size_t topology::get_number_of_core_pus(std::size_t core) const
+    {
+        std::unique_lock<mutex_type> lk(topo_mtx);
+        return get_number_of_core_pus_locked(core);
+    }
+
     std::size_t topology::get_number_of_socket_cores(
         std::size_t num_socket) const
     {
@@ -1456,12 +1447,90 @@ namespace hpx { namespace threads {
 #endif
     }
 
-    /// Free memory that was previously allocated by allocate
+    // Free memory that was previously allocated by allocate
     void topology::deallocate(void* addr, std::size_t len) const noexcept
     {
         hwloc_free(topo, addr, len);
     }
 
+    ////////////////////////////////////////////////////////////////////////////
+    hwloc_obj_t topology::get_pu_obj(std::size_t num_pu) const
+    {
+        hwloc_obj_t pu_obj = hwloc_get_obj_by_type(
+            topo, HWLOC_OBJ_CORE, static_cast<unsigned>(num_pu));
+
+        if (pu_obj == nullptr)
+        {
+            HPX_THROW_EXCEPTION(no_success, "topology::get_core_obj",
+                "Couldn't find required object representing the given core in "
+                "topology");
+        }
+
+        return pu_obj;
+    }
+
+    template <typename F>
+    static void iterate(hwloc_bitmap_t cpuset, F&& f)
+    {
+        for (auto id = hwloc_bitmap_first(cpuset);
+             (unsigned) id != (unsigned) -1; id = hwloc_bitmap_next(cpuset, id))
+        {
+            if (hwloc_bitmap_isset(cpuset, id))
+            {
+                f(id);
+            }
+        }
+    }
+
+    static auto num_set_bits(hwloc_bitmap_t cpuset)
+    {
+        std::size_t count = 0;
+        iterate(cpuset, [&](auto) { ++count; });
+        return count;
+    }
+
+    // Return the size of the cache associated with the given cpuset.
+    std::size_t topology::get_cache_size(mask_type mask, int level) const
+    {
+        if (level < 1 || level > 5)
+        {
+            return 0;
+        }
+
+        std::unique_lock<mutex_type> lk(topo_mtx);
+
+        hwloc_bitmap_t cpuset = mask_to_bitmap(mask, HWLOC_OBJ_PU);
+        std::size_t cache_size = 0;
+
+        iterate(cpuset, [&](auto num_pu) {
+            hwloc_obj_t pu_obj = hwloc_get_obj_by_type(
+                topo, HWLOC_OBJ_PU, static_cast<unsigned>(num_pu));
+            if (pu_obj == nullptr)
+                return;
+
+            hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE;
+            if (level == 2)
+                type = HWLOC_OBJ_L2CACHE;
+            else if (level == 3)
+                type = HWLOC_OBJ_L3CACHE;
+            else if (level == 4)
+                type = HWLOC_OBJ_L4CACHE;
+            else if (level == 5)
+                type = HWLOC_OBJ_L5CACHE;
+
+            hwloc_obj_t cache_obj =
+                hwloc_get_ancestor_obj_by_type(topo, type, pu_obj);
+            if (cache_obj == nullptr)
+                return;
+
+            cache_size += std::size_t(cache_obj->attr->cache.size) /
+                num_set_bits(cache_obj->cpuset);
+        });
+
+        hwloc_bitmap_free(cpuset);
+        return cache_size;
+    }
+
     ///////////////////////////////////////////////////////////////////////////
     hwloc_bitmap_t topology::mask_to_bitmap(
         mask_cref_type mask, hwloc_obj_type_t htype) const

From 37a248d553f36abcc303f1253d20f601373d5ece Mon Sep 17 00:00:00 2001
From: Hartmut Kaiser <hartmut.kaiser@gmail.com>
Date: Tue, 4 Oct 2022 11:49:05 -0500
Subject: [PATCH 2/2] Adding get_processing_units_mask scheduling property

- adding support for this to all executors, where possible
---
 examples/1d_stencil/CMakeLists.txt            |  2 +-
 .../include/hpx/affinity/affinity_data.hpp    | 29 ++++++----
 libs/core/affinity/src/affinity_data.cpp      |  2 +-
 libs/core/asio/include/hpx/asio/asio_util.hpp |  3 +-
 .../asio/include/hpx/asio/map_hostnames.hpp   |  3 +-
 libs/core/asio/src/asio_util.cpp              |  4 +-
 .../hpx/async_base/scheduling_properties.hpp  | 47 +++++++++++++++
 libs/core/compute_local/CMakeLists.txt        |  1 +
 .../host/block_fork_join_executor.hpp         | 10 +++-
 .../datastructures/detail/dynamic_bitset.hpp  |  3 +
 .../hpx/executors/annotating_executor.hpp     | 20 +++++--
 .../executors/explicit_scheduler_executor.hpp | 20 +++++--
 .../hpx/executors/fork_join_executor.hpp      | 17 +++++-
 .../hpx/executors/parallel_executor.hpp       | 37 ++++++++++--
 .../restricted_thread_pool_executor.hpp       | 35 +++++++++---
 .../hpx/executors/scheduler_executor.hpp      | 19 +++++--
 .../hpx/executors/sequenced_executor.hpp      | 18 ++++++
 .../hpx/executors/thread_pool_scheduler.hpp   | 28 ++++++++-
 .../tests/unit/fork_join_executor.cpp         | 23 +++++++-
 .../tests/unit/parallel_executor.cpp          | 26 ++++++++-
 .../detail/partitioner.hpp                    |  4 +-
 .../src/detail_partitioner.cpp                |  8 ++-
 .../include/hpx/synchronization/once.hpp      |  2 +-
 .../hpx/threading_base/thread_pool_base.hpp   |  9 ++-
 .../threading_base/src/thread_pool_base.cpp   | 40 +++++++++++--
 libs/core/timed_execution/CMakeLists.txt      |  3 +-
 .../hpx/timed_execution/timed_executors.hpp   | 19 +++++--
 libs/core/topology/CMakeLists.txt             |  8 ++-
 .../hpx/topology/scheduling_properties.hpp    | 57 +++++++++++++++++++
 libs/core/topology/src/topology.cpp           | 50 ++++++++++++----
 30 files changed, 465 insertions(+), 82 deletions(-)
 create mode 100644 libs/core/topology/include/hpx/topology/scheduling_properties.hpp

diff --git a/examples/1d_stencil/CMakeLists.txt b/examples/1d_stencil/CMakeLists.txt
index 856dd3685e79..bf535bf00716 100644
--- a/examples/1d_stencil/CMakeLists.txt
+++ b/examples/1d_stencil/CMakeLists.txt
@@ -58,7 +58,7 @@ foreach(example_program ${example_programs})
   add_hpx_executable(
     ${example_program} INTERNAL_FLAGS
     SOURCES ${sources} ${${example_program}_FLAGS}
-    FOLDER "Examples/1D Stencil/${example_program}"
+    FOLDER "Examples/1D Stencil"
   )
 
   add_hpx_example_target_dependencies("1d_stencil" ${example_program})
diff --git a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
index e452087139a0..a221c0bbe7c0 100644
--- a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
+++ b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2017 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -89,21 +89,28 @@ namespace hpx { namespace threads { namespace policies { namespace detail {
             std::size_t num_thread, std::size_t hardware_concurrency) const;
 
     private:
-        std::size_t num_threads_;    ///< number of processing units managed
-        std::size_t
-            pu_offset_;          ///< offset of the first processing unit to use
-        std::size_t pu_step_;    ///< step between used processing units
+        ///< number of processing units managed
+        std::size_t num_threads_;
+
+        ///< offset of the first processing unit to use
+        std::size_t pu_offset_;
+
+        ///< step between used processing units
+        std::size_t pu_step_;
         std::size_t used_cores_;
         std::string affinity_domain_;
         std::vector<mask_type> affinity_masks_;
         std::vector<std::size_t> pu_nums_;
-        mask_type
-            no_affinity_;    ///< mask of processing units which have no affinity
-        bool
-            use_process_mask_;    ///< use the process CPU mask to limit available PUs
+
+        ///< mask of processing units which have no affinity
+        mask_type no_affinity_;
+
+        ///< use the process CPU mask to limit available PUs
+        bool use_process_mask_;
         std::size_t num_pus_needed_;
-        static std::atomic<int>
-            instance_number_counter_;    ///< counter for instance numbers
+
+        ///< counter for instance numbers
+        static std::atomic<int> instance_number_counter_;
     };
 }}}}    // namespace hpx::threads::policies::detail
 
diff --git a/libs/core/affinity/src/affinity_data.cpp b/libs/core/affinity/src/affinity_data.cpp
index 2a3bde4ba7c3..2bb3f2f79f20 100644
--- a/libs/core/affinity/src/affinity_data.cpp
+++ b/libs/core/affinity/src/affinity_data.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2017 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
diff --git a/libs/core/asio/include/hpx/asio/asio_util.hpp b/libs/core/asio/include/hpx/asio/asio_util.hpp
index 23b372f50745..e03f8da80f36 100644
--- a/libs/core/asio/include/hpx/asio/asio_util.hpp
+++ b/libs/core/asio/include/hpx/asio/asio_util.hpp
@@ -27,7 +27,8 @@ namespace hpx { namespace util {
 
     ///////////////////////////////////////////////////////////////////////////
     HPX_CORE_EXPORT bool get_endpoint(std::string const& addr,
-        std::uint16_t port, asio::ip::tcp::endpoint& ep, bool force_ipv4 = false);
+        std::uint16_t port, asio::ip::tcp::endpoint& ep,
+        bool force_ipv4 = false);
 
     HPX_CORE_EXPORT std::string get_endpoint_name(
         asio::ip::tcp::endpoint const& ep);
diff --git a/libs/core/asio/include/hpx/asio/map_hostnames.hpp b/libs/core/asio/include/hpx/asio/map_hostnames.hpp
index e5f66a786d02..e42311a26578 100644
--- a/libs/core/asio/include/hpx/asio/map_hostnames.hpp
+++ b/libs/core/asio/include/hpx/asio/map_hostnames.hpp
@@ -27,7 +27,8 @@ namespace hpx { namespace util {
             transform_function_type;
 
         map_hostnames(bool debug = false)
-          : ipv4_(false), debug_(debug)
+          : ipv4_(false)
+          , debug_(debug)
         {
         }
 
diff --git a/libs/core/asio/src/asio_util.cpp b/libs/core/asio/src/asio_util.cpp
index 89af33ca1ece..e9293b5de1db 100644
--- a/libs/core/asio/src/asio_util.cpp
+++ b/libs/core/asio/src/asio_util.cpp
@@ -112,8 +112,8 @@ namespace hpx { namespace util {
 
             asio::ip::tcp::resolver::iterator it = resolver.resolve(query);
 
-            while (force_ipv4 &&
-                it != tcp::resolver::iterator() && !it->endpoint().address().is_v4())
+            while (force_ipv4 && it != tcp::resolver::iterator() &&
+                !it->endpoint().address().is_v4())
             {
                 ++it;
             }
diff --git a/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp b/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp
index 6c1cb5ee7e61..8d60563294f3 100644
--- a/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp
+++ b/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2020 ETH Zurich
+//  Copyright (c) 2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -10,8 +11,21 @@
 #include <hpx/coroutines/thread_enums.hpp>
 #include <hpx/functional/detail/tag_fallback_invoke.hpp>
 
+#include <type_traits>
+
 namespace hpx { namespace execution { namespace experimental {
 
+    ///////////////////////////////////////////////////////////////////////////
+    template <typename Property, typename Enable = void>
+    struct is_scheduling_property : std::false_type
+    {
+    };
+
+    template <typename Property>
+    inline constexpr bool is_scheduling_property_v =
+        is_scheduling_property<Property>::value;
+
+    ///////////////////////////////////////////////////////////////////////////
     namespace detail {
 
         template <typename Tag, typename... Args>
@@ -41,6 +55,16 @@ namespace hpx { namespace execution { namespace experimental {
         };
     }    // namespace detail
 
+    ///////////////////////////////////////////////////////////////////////////
+    template <typename Property>
+    struct is_scheduling_property<Property,
+        std::enable_if_t<
+            std::is_base_of_v<detail::property_base<Property>, Property>>>
+      : std::true_type
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
     inline constexpr struct with_priority_t final
       : detail::property_base<with_priority_t>
     {
@@ -59,6 +83,12 @@ namespace hpx { namespace execution { namespace experimental {
         }
     } get_priority{};
 
+    template <>
+    struct is_scheduling_property<get_priority_t> : std::true_type
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
     inline constexpr struct with_stacksize_t final
       : detail::property_base<with_stacksize_t>
     {
@@ -77,6 +107,12 @@ namespace hpx { namespace execution { namespace experimental {
         }
     } get_stacksize{};
 
+    template <>
+    struct is_scheduling_property<get_stacksize_t> : std::true_type
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
     inline constexpr struct with_hint_t final
       : detail::property_base<with_hint_t>
     {
@@ -95,6 +131,12 @@ namespace hpx { namespace execution { namespace experimental {
         }
     } get_hint{};
 
+    template <>
+    struct is_scheduling_property<get_hint_t> : std::true_type
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
     inline constexpr struct with_annotation_t final
       : detail::property_base<with_annotation_t>
     {
@@ -112,4 +154,9 @@ namespace hpx { namespace execution { namespace experimental {
             return nullptr;
         }
     } get_annotation{};
+
+    template <>
+    struct is_scheduling_property<get_annotation_t> : std::true_type
+    {
+    };
 }}}    // namespace hpx::execution::experimental
diff --git a/libs/core/compute_local/CMakeLists.txt b/libs/core/compute_local/CMakeLists.txt
index 74f548f1f70d..5fff3e86b037 100644
--- a/libs/core/compute_local/CMakeLists.txt
+++ b/libs/core/compute_local/CMakeLists.txt
@@ -48,6 +48,7 @@ add_hpx_module(
     hpx_algorithms
     hpx_allocator_support
     hpx_async_combinators
+    hpx_concepts
     hpx_config
     hpx_datastructures
     hpx_execution
diff --git a/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp b/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp
index 55f0c9c3a0c6..c1fd8553d647 100644
--- a/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp
+++ b/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp
@@ -20,8 +20,9 @@
 #include <hpx/executors/fork_join_executor.hpp>
 #include <hpx/iterator_support/counting_shape.hpp>
 #include <hpx/iterator_support/iterator_range.hpp>
+#include <hpx/modules/concepts.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/resource_partitioner/detail/partitioner.hpp>
-#include <hpx/topology/cpu_mask.hpp>
 
 #include <chrono>
 #include <cstddef>
@@ -273,6 +274,7 @@ namespace hpx::execution::experimental {
         // clang-format off
         template <typename Tag, typename Property,
             HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
                 hpx::functional::is_tag_invocable_v<
                     Tag, fork_join_executor, Property>
             )>
@@ -281,20 +283,22 @@ namespace hpx::execution::experimental {
             block_fork_join_executor const& exec, Property&& prop) noexcept
         {
             auto exec_with_prop = exec;
-            exec_with_prop.exec_ = tag(exec.exec_, HPX_FORWARD(Property, prop));
+            exec_with_prop.exec_ = hpx::functional::tag_invoke(
+                tag, exec.exec_, HPX_FORWARD(Property, prop));
             return exec_with_prop;
         }
 
         // clang-format off
         template <typename Tag,
             HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
                 hpx::functional::is_tag_invocable_v<Tag, fork_join_executor>
             )>
         // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, block_fork_join_executor const& exec) noexcept
         {
-            return tag(exec.exec_);
+            return hpx::functional::tag_invoke(tag, exec.exec_);
         }
 
     private:
diff --git a/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp b/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp
index da699248ea78..a9f220b14563 100644
--- a/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp
+++ b/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp
@@ -2160,7 +2160,10 @@ namespace hpx::detail {
         block_width_type const extra_bits = count_extra_bits();
 
         if (extra_bits != 0)
+        {
+            // NOLINTNEXTLINE(stringop-overflow=)
             highest_block() &= (Block(1) << extra_bits) - 1;
+        }
     }
 
     // check class invariants
diff --git a/libs/core/executors/include/hpx/executors/annotating_executor.hpp b/libs/core/executors/include/hpx/executors/annotating_executor.hpp
index 03a0cf8ecfb1..1eff3a180680 100644
--- a/libs/core/executors/include/hpx/executors/annotating_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/annotating_executor.hpp
@@ -14,6 +14,8 @@
 #include <hpx/execution_base/execution.hpp>
 #include <hpx/execution_base/traits/is_executor.hpp>
 #include <hpx/functional/tag_invoke.hpp>
+#include <hpx/modules/concepts.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/threading_base/annotated_function.hpp>
 #include <hpx/type_support/always_void.hpp>
 
@@ -188,9 +190,13 @@ namespace hpx { namespace execution { namespace experimental {
         }
 
         // support all properties exposed by the wrapped executor
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<hpx::functional::
-                    is_tag_invocable_v<Tag, BaseExecutor, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, BaseExecutor, Property>
+            )>
+        // clang-format on
         friend annotating_executor tag_invoke(
             Tag tag, annotating_executor const& exec, Property&& prop)
         {
@@ -198,13 +204,17 @@ namespace hpx { namespace execution { namespace experimental {
                 tag, exec.exec_, HPX_FORWARD(Property, prop)));
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, BaseExecutor>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, BaseExecutor>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, annotating_executor const& exec)
         {
-            return hpx::functional::tag_invoke(tag, exec.policy_);
+            return hpx::functional::tag_invoke(tag, exec.exec_);
         }
 
     private:
diff --git a/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp b/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp
index 76cfc1d3561f..b453dc1755d9 100644
--- a/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp
@@ -9,7 +9,6 @@
 #pragma once
 
 #include <hpx/config.hpp>
-#include <hpx/concepts/concepts.hpp>
 #include <hpx/datastructures/tuple.hpp>
 #include <hpx/execution/algorithms/bulk.hpp>
 #include <hpx/execution/algorithms/keep_future.hpp>
@@ -28,6 +27,8 @@
 #include <hpx/functional/deferred_call.hpp>
 #include <hpx/functional/invoke_fused.hpp>
 #include <hpx/functional/tag_invoke.hpp>
+#include <hpx/modules/concepts.hpp>
+#include <hpx/modules/topology.hpp>
 
 #include <cstddef>
 #include <exception>
@@ -92,9 +93,14 @@ namespace hpx::execution::experimental {
         }
 
         // support all properties exposed by the wrapped scheduler
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<hpx::functional::
-                    is_tag_invocable_v<Tag, BaseScheduler, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<
+                    Tag, BaseScheduler, Property>
+            )>
+        // clang-format on
         friend explicit_scheduler_executor tag_invoke(
             Tag tag, explicit_scheduler_executor const& exec, Property&& prop)
         {
@@ -102,9 +108,13 @@ namespace hpx::execution::experimental {
                 tag, exec.sched_, HPX_FORWARD(Property, prop)));
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, BaseScheduler>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, BaseScheduler>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, explicit_scheduler_executor const& exec)
         {
diff --git a/libs/core/executors/include/hpx/executors/fork_join_executor.hpp b/libs/core/executors/include/hpx/executors/fork_join_executor.hpp
index d3c00ac30579..92506c2d9aee 100644
--- a/libs/core/executors/include/hpx/executors/fork_join_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/fork_join_executor.hpp
@@ -1,4 +1,5 @@
 //  Copyright (c) 2020 ETH Zurich
+//  Copyright (c) 2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -26,12 +27,11 @@
 #include <hpx/modules/format.hpp>
 #include <hpx/modules/hardware.hpp>
 #include <hpx/modules/itt_notify.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/resource_partitioner/detail/partitioner.hpp>
 #include <hpx/synchronization/spinlock.hpp>
 #include <hpx/threading/thread.hpp>
 #include <hpx/threading_base/annotated_function.hpp>
-#include <hpx/topology/cpu_mask.hpp>
-#include <hpx/topology/topology.hpp>
 
 #include <atomic>
 #include <chrono>
@@ -858,6 +858,19 @@ namespace hpx { namespace execution { namespace experimental {
             return exec.shared_data_->annotation_;
         }
 
+        friend auto tag_invoke(
+            hpx::execution::experimental::get_processing_units_mask_t,
+            fork_join_executor const& exec) noexcept
+        {
+            return exec.shared_data_->pu_mask_;
+        }
+
+        friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t,
+            fork_join_executor const& exec) noexcept
+        {
+            return exec.shared_data_->pu_mask_;
+        }
+
         /// \cond NOINTERNAL
         enum class init_mode
         {
diff --git a/libs/core/executors/include/hpx/executors/parallel_executor.hpp b/libs/core/executors/include/hpx/executors/parallel_executor.hpp
index a49c43d2f989..ca6fcb7e0405 100644
--- a/libs/core/executors/include/hpx/executors/parallel_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/parallel_executor.hpp
@@ -30,6 +30,8 @@
 #include <hpx/futures/future.hpp>
 #include <hpx/futures/traits/future_traits.hpp>
 #include <hpx/iterator_support/range.hpp>
+#include <hpx/modules/concepts.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/serialization/serialize.hpp>
 #include <hpx/threading_base/annotated_function.hpp>
 #include <hpx/threading_base/scheduler_base.hpp>
@@ -164,9 +166,13 @@ namespace hpx { namespace execution {
         // property implementations
 
         // support all properties exposed by the embedded policy
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, Policy, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, Policy, Property>
+            )>
+        // clang-format on
         friend parallel_policy_executor tag_invoke(
             Tag tag, parallel_policy_executor const& exec, Property&& prop)
         {
@@ -176,9 +182,13 @@ namespace hpx { namespace execution {
             return exec_with_prop;
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, Policy>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, Policy>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, parallel_policy_executor const& exec)
         {
@@ -229,6 +239,25 @@ namespace hpx { namespace execution {
             return exec.get_num_cores();
         }
 
+        friend auto tag_invoke(
+            hpx::execution::experimental::get_processing_units_mask_t,
+            parallel_policy_executor const& exec)
+        {
+            auto pool = exec.pool_ ?
+                exec.pool_ :
+                threads::detail::get_self_or_default_pool();
+            return pool->get_used_processing_units(exec.get_num_cores(), false);
+        }
+
+        friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t,
+            parallel_policy_executor const& exec)
+        {
+            auto pool = exec.pool_ ?
+                exec.pool_ :
+                threads::detail::get_self_or_default_pool();
+            return pool->get_used_processing_units(exec.get_num_cores(), true);
+        }
+
     public:
         // backwards compatibility support, will be removed in the future
         template <typename Parameters>
diff --git a/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp b/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp
index 7c421809bf80..73c7e08bf21c 100644
--- a/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp
@@ -14,6 +14,7 @@
 #include <hpx/execution/execution.hpp>
 #include <hpx/execution/executors/execution_parameters.hpp>
 #include <hpx/executors/parallel_executor.hpp>
+#include <hpx/modules/concepts.hpp>
 
 #include <atomic>
 #include <cstddef>
@@ -97,6 +98,11 @@ namespace hpx::parallel::execution {
                     hpx::parallel::execution::processing_units_count(exec_)));
         }
 
+        std::int16_t get_current_thread_num() const
+        {
+            return static_cast<std::int16_t>(first_thread_ + os_thread_++);
+        }
+
         embedded_executor generate_executor(std::uint16_t thread_num) const
         {
             return hpx::execution::experimental::with_hint(
@@ -107,25 +113,36 @@ namespace hpx::parallel::execution {
         // property implementations
 
         // support all properties exposed by the embedded executor
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<hpx::functional::
-                    is_tag_invocable_v<Tag, embedded_executor, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<
+                    Tag, embedded_executor, Property>
+            )>
+        // clang-format on
         friend restricted_policy_executor tag_invoke(
-            Tag, restricted_policy_executor const& exec, Property&& prop)
+            Tag tag, restricted_policy_executor const& exec, Property&& prop)
         {
             auto exec_with_prop = exec;
-            exec_with_prop.exec_ =
-                Tag{}(exec.exec_, HPX_FORWARD(Property, prop));
+            exec_with_prop.exec_ = hpx::functional::tag_invoke(tag,
+                exec.generate_executor(exec.get_current_thread_num()),
+                HPX_FORWARD(Property, prop));
             return exec_with_prop;
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, embedded_executor>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, embedded_executor>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(
-            Tag, restricted_policy_executor const& exec)
+            Tag tag, restricted_policy_executor const& exec)
         {
-            return Tag{}(exec.exec_);
+            return hpx::functional::tag_invoke(
+                tag, exec.generate_executor(exec.get_current_thread_num()));
         }
 
         // executor API
diff --git a/libs/core/executors/include/hpx/executors/scheduler_executor.hpp b/libs/core/executors/include/hpx/executors/scheduler_executor.hpp
index 3c6fd853ed13..5112f471ed8b 100644
--- a/libs/core/executors/include/hpx/executors/scheduler_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/scheduler_executor.hpp
@@ -27,6 +27,8 @@
 #include <hpx/functional/deferred_call.hpp>
 #include <hpx/functional/invoke_fused.hpp>
 #include <hpx/functional/tag_invoke.hpp>
+#include <hpx/modules/concepts.hpp>
+#include <hpx/modules/topology.hpp>
 
 #include <exception>
 #include <string>
@@ -108,9 +110,14 @@ namespace hpx::execution::experimental {
         }
 
         // support all properties exposed by the wrapped scheduler
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<hpx::functional::
-                    is_tag_invocable_v<Tag, BaseScheduler, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<
+                    Tag, BaseScheduler, Property>
+            )>
+        // clang-format on
         friend scheduler_executor tag_invoke(
             Tag tag, scheduler_executor const& exec, Property&& prop)
         {
@@ -118,9 +125,13 @@ namespace hpx::execution::experimental {
                 tag, exec.sched_, HPX_FORWARD(Property, prop)));
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, BaseScheduler>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, BaseScheduler>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, scheduler_executor const& exec)
         {
diff --git a/libs/core/executors/include/hpx/executors/sequenced_executor.hpp b/libs/core/executors/include/hpx/executors/sequenced_executor.hpp
index 90ac4af1701f..5bff64141a3f 100644
--- a/libs/core/executors/include/hpx/executors/sequenced_executor.hpp
+++ b/libs/core/executors/include/hpx/executors/sequenced_executor.hpp
@@ -18,10 +18,13 @@
 #include <hpx/functional/deferred_call.hpp>
 #include <hpx/functional/invoke.hpp>
 #include <hpx/futures/future.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/pack_traversal/unwrap.hpp>
 #include <hpx/serialization/serialize.hpp>
 #include <hpx/threading_base/annotated_function.hpp>
+#include <hpx/threading_base/detail/get_default_pool.hpp>
 #include <hpx/threading_base/thread_description.hpp>
+#include <hpx/threading_base/thread_num_tss.hpp>
 #include <hpx/type_support/unused.hpp>
 
 #include <cstddef>
@@ -218,6 +221,21 @@ namespace hpx { namespace execution {
             return 1;
         }
 
+        friend auto tag_invoke(
+            hpx::execution::experimental::get_processing_units_mask_t,
+            sequenced_executor const&)
+        {
+            return threads::detail::get_self_or_default_pool()
+                ->get_used_processing_unit(hpx::get_worker_thread_num(), false);
+        }
+
+        friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t,
+            sequenced_executor const&)
+        {
+            return threads::detail::get_self_or_default_pool()
+                ->get_used_processing_unit(hpx::get_worker_thread_num(), true);
+        }
+
     private:
         friend class hpx::serialization::access;
 
diff --git a/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp b/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp
index 8e5b1cf47711..8c6ce12db7a9 100644
--- a/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp
+++ b/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp
@@ -20,6 +20,7 @@
 #include <hpx/execution_base/completion_signatures.hpp>
 #include <hpx/execution_base/receiver.hpp>
 #include <hpx/execution_base/sender.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/threading_base/annotated_function.hpp>
 #include <hpx/threading_base/register_thread.hpp>
 
@@ -101,6 +102,7 @@ namespace hpx::execution::experimental {
         // clang-format off
         template <typename Tag, typename Property,
             HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
                 hpx::functional::is_tag_invocable_v<Tag, Policy, Property>
             )>
         // clang-format on
@@ -108,21 +110,22 @@ namespace hpx::execution::experimental {
             thread_pool_policy_scheduler const& scheduler, Property&& prop)
         {
             auto scheduler_with_prop = scheduler;
-            scheduler_with_prop.policy_ =
-                tag(scheduler.policy_, HPX_FORWARD(Property, prop));
+            scheduler_with_prop.policy_ = hpx::functional::tag_invoke(
+                tag, scheduler.policy_, HPX_FORWARD(Property, prop));
             return scheduler_with_prop;
         }
 
         // clang-format off
         template <typename Tag,
             HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
                 hpx::functional::is_tag_invocable_v<Tag, Policy>
             )>
         // clang-format on
         friend decltype(auto) tag_invoke(
             Tag tag, thread_pool_policy_scheduler const& scheduler)
         {
-            return tag(scheduler.policy_);
+            return hpx::functional::tag_invoke(tag, scheduler.policy_);
         }
 
         friend constexpr thread_pool_policy_scheduler tag_invoke(
@@ -174,6 +177,25 @@ namespace hpx::execution::experimental {
         }
 #endif
 
+        friend auto tag_invoke(
+            hpx::execution::experimental::get_processing_units_mask_t,
+            thread_pool_policy_scheduler const& exec)
+        {
+            auto pool = exec.pool_ ?
+                exec.pool_ :
+                threads::detail::get_self_or_default_pool();
+            return pool->get_used_processing_units(exec.get_num_cores(), false);
+        }
+
+        friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t,
+            thread_pool_policy_scheduler const& exec)
+        {
+            auto pool = exec.pool_ ?
+                exec.pool_ :
+                threads::detail::get_self_or_default_pool();
+            return pool->get_used_processing_units(exec.get_num_cores(), true);
+        }
+
         template <typename F>
         void execute(F&& f, Policy const& policy) const
         {
diff --git a/libs/core/executors/tests/unit/fork_join_executor.cpp b/libs/core/executors/tests/unit/fork_join_executor.cpp
index 53a4ed652837..c572643f42f3 100644
--- a/libs/core/executors/tests/unit/fork_join_executor.cpp
+++ b/libs/core/executors/tests/unit/fork_join_executor.cpp
@@ -1,5 +1,5 @@
 //  Copyright (c)      2020 ETH Zurich
-//  Copyright (c) 2007-2016 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -27,6 +27,25 @@ using hpx::execution::experimental::fork_join_executor;
 
 static std::atomic<std::size_t> count{0};
 
+///////////////////////////////////////////////////////////////////////////////
+template <typename... ExecutorArgs>
+void test_processing_mask(ExecutorArgs&&... args)
+{
+    std::cerr << "test_processing_mask\n";
+
+    auto& rp = hpx::resource::get_partitioner();
+    auto const& expected_mask =
+        rp.get_used_pus_mask(hpx::get_worker_thread_num());
+
+    fork_join_executor exec{expected_mask, std::forward<ExecutorArgs>(args)...};
+    auto pus_mask =
+        hpx::execution::experimental::get_processing_units_mask(exec);
+    HPX_TEST(pus_mask == expected_mask);
+
+    auto cores_mask = hpx::execution::experimental::get_cores_mask(exec);
+    HPX_TEST(cores_mask == expected_mask);
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 void bulk_test(int, int passed_through)    //-V813
 {
@@ -173,6 +192,8 @@ void test_executor(hpx::threads::thread_priority priority,
     test_bulk_async(priority, stacksize, schedule);
     test_bulk_sync_exception(priority, stacksize, schedule);
     test_bulk_async_exception(priority, stacksize, schedule);
+
+    test_processing_mask(priority, stacksize, schedule);
 }
 
 ///////////////////////////////////////////////////////////////////////////////
diff --git a/libs/core/executors/tests/unit/parallel_executor.cpp b/libs/core/executors/tests/unit/parallel_executor.cpp
index 21e2b6b57354..49ab699e561c 100644
--- a/libs/core/executors/tests/unit/parallel_executor.cpp
+++ b/libs/core/executors/tests/unit/parallel_executor.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2016 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -162,6 +162,28 @@ void static_check_executor()
         "is_bulk_two_way_executor_v<executor>");
 }
 
+void test_processing_mask()
+{
+    hpx::execution::parallel_executor exec;
+
+    {
+        auto pool = hpx::threads::detail::get_self_or_default_pool();
+        auto expected_mask =
+            pool->get_used_processing_units(pool->get_os_thread_count(), false);
+        auto mask =
+            hpx::execution::experimental::get_processing_units_mask(exec);
+        HPX_TEST(mask == expected_mask);
+    }
+
+    {
+        auto pool = hpx::threads::detail::get_self_or_default_pool();
+        auto expected_mask =
+            pool->get_used_processing_units(pool->get_os_thread_count(), true);
+        auto mask = hpx::execution::experimental::get_cores_mask(exec);
+        HPX_TEST(mask == expected_mask);
+    }
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 int hpx_main()
 {
@@ -175,6 +197,8 @@ int hpx_main()
     test_bulk_async();
     test_bulk_then();
 
+    test_processing_mask();
+
     return hpx::local::finalize();
 }
 
diff --git a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
index 50e5a464ada9..34648a316819 100644
--- a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
+++ b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp
@@ -1,4 +1,5 @@
-//  Copyright (c)      2017 Shoshana Jakobovits
+//  Copyright (c) 2017 Shoshana Jakobovits
+//  Copyright (c) 2017-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -155,6 +156,7 @@ namespace hpx { namespace resource { namespace detail {
         threads::mask_cref_type get_pu_mask(
             std::size_t global_thread_num) const;
         std::size_t get_thread_occupancy(std::size_t pu_num) const;
+        threads::mask_type get_used_pus_mask(std::size_t pu_num) const;
 
         void init(resource::partitioner_mode rpmode, hpx::util::section cfg,
             hpx::threads::policies::detail::affinity_data affinity_data);
diff --git a/libs/core/resource_partitioner/src/detail_partitioner.cpp b/libs/core/resource_partitioner/src/detail_partitioner.cpp
index a158c048b1d9..0cb712ba4c18 100644
--- a/libs/core/resource_partitioner/src/detail_partitioner.cpp
+++ b/libs/core/resource_partitioner/src/detail_partitioner.cpp
@@ -1,4 +1,5 @@
-//  Copyright (c)      2017 Shoshana Jakobovits
+//  Copyright (c) 2017 Shoshana Jakobovits
+//  Copyright (c) 2017-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -868,6 +869,11 @@ namespace hpx { namespace resource { namespace detail {
         return affinity_data_.get_thread_occupancy(topo_, pu_num);
     }
 
+    threads::mask_type partitioner::get_used_pus_mask(std::size_t pu_num) const
+    {
+        return affinity_data_.get_used_pus_mask(topo_, pu_num);
+    }
+
     threads::mask_cref_type partitioner::get_pu_mask(
         std::size_t global_thread_num) const
     {
diff --git a/libs/core/synchronization/include/hpx/synchronization/once.hpp b/libs/core/synchronization/include/hpx/synchronization/once.hpp
index c813ff6f9e5b..29659398159b 100644
--- a/libs/core/synchronization/include/hpx/synchronization/once.hpp
+++ b/libs/core/synchronization/include/hpx/synchronization/once.hpp
@@ -79,7 +79,7 @@ namespace hpx {
     ///          active call, with no additional synchronization.
     ///
     /// \param flag    an object, for which exactly one function gets executed
-    /// \param f	   Callable object to invoke
+    /// \param f       Callable object to invoke
     /// \param args... arguments to pass to the function
     ///
     /// \throws std::system_error if any condition prevents calls to \a call_once
diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
index d1cf2c8f58c8..d757500016ae 100644
--- a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
+++ b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp
@@ -1,5 +1,5 @@
 //  Copyright (c)      2018 Mikael Simberg
-//  Copyright (c) 2007-2017 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -217,7 +217,12 @@ namespace hpx { namespace threads {
             return nullptr;
         }
 
-        mask_type get_used_processing_units() const;
+        mask_type get_used_processing_units(bool full_cores = false) const;
+        mask_type get_used_processing_units(
+            std::size_t num_cores, bool full_cores = false) const;
+        mask_type get_used_processing_unit(
+            std::size_t thread_num, bool full_cores = false) const;
+
         hwloc_bitmap_ptr get_numa_domain_bitmap() const;
 
         // performance counters
diff --git a/libs/core/threading_base/src/thread_pool_base.cpp b/libs/core/threading_base/src/thread_pool_base.cpp
index 04a364abf38e..d9404fca0f1e 100644
--- a/libs/core/threading_base/src/thread_pool_base.cpp
+++ b/libs/core/threading_base/src/thread_pool_base.cpp
@@ -1,4 +1,4 @@
-//  Copyright (c) 2007-2017 Hartmut Kaiser
+//  Copyright (c) 2007-2022 Hartmut Kaiser
 //
 //  SPDX-License-Identifier: BSL-1.0
 //  Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -36,7 +36,8 @@ namespace hpx { namespace threads {
     }
 
     ///////////////////////////////////////////////////////////////////////////
-    mask_type thread_pool_base::get_used_processing_units() const
+    mask_type thread_pool_base::get_used_processing_units(
+        std::size_t num_cores, bool full_cores) const
     {
         auto const& topo = create_topology();
         auto const sched = get_scheduler();
@@ -44,19 +45,46 @@ namespace hpx { namespace threads {
         mask_type used_processing_units = mask_type();
         threads::resize(used_processing_units, hardware_concurrency());
 
-        for (std::size_t thread_num = 0; thread_num < get_os_thread_count();
-             ++thread_num)
+        std::size_t max_cores = get_os_thread_count();
+        for (std::size_t thread_num = 0;
+             thread_num != max_cores && num_cores != 0; ++thread_num)
         {
             if (sched->get_state(thread_num).load() <= hpx::state::suspended)
             {
-                used_processing_units |= affinity_data_.get_pu_mask(
-                    topo, thread_num + get_thread_offset());
+                if (!full_cores)
+                {
+                    used_processing_units |= affinity_data_.get_pu_mask(
+                        topo, thread_num + get_thread_offset());
+                }
+                else
+                {
+                    used_processing_units |= topo.get_core_affinity_mask(
+                        thread_num + get_thread_offset());
+                }
+                --num_cores;
             }
         }
 
         return used_processing_units;
     }
 
+    mask_type thread_pool_base::get_used_processing_units(bool full_cores) const
+    {
+        return get_used_processing_units(get_os_thread_count(), full_cores);
+    }
+
+    mask_type thread_pool_base::get_used_processing_unit(
+        std::size_t thread_num, bool full_cores) const
+    {
+        auto const& topo = create_topology();
+        if (!full_cores)
+        {
+            return affinity_data_.get_pu_mask(
+                topo, thread_num + get_thread_offset());
+        }
+        return topo.get_core_affinity_mask(thread_num + get_thread_offset());
+    }
+
     hwloc_bitmap_ptr thread_pool_base::get_numa_domain_bitmap() const
     {
         auto const& topo = create_topology();
diff --git a/libs/core/timed_execution/CMakeLists.txt b/libs/core/timed_execution/CMakeLists.txt
index b6b761f34061..3e79351cb2f3 100644
--- a/libs/core/timed_execution/CMakeLists.txt
+++ b/libs/core/timed_execution/CMakeLists.txt
@@ -28,6 +28,7 @@ add_hpx_module(
   GLOBAL_HEADER_GEN ON
   HEADERS ${timed_execution_headers}
   COMPAT_HEADERS ${timed_execution_compat_headers}
-  MODULE_DEPENDENCIES hpx_execution hpx_executors hpx_threading hpx_timing
+  MODULE_DEPENDENCIES hpx_concepts hpx_execution hpx_executors hpx_threading
+                      hpx_timing hpx_topology
   CMAKE_SUBDIRS examples tests
 )
diff --git a/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp b/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp
index 76da1d5f6694..1dd67650c9c8 100644
--- a/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp
+++ b/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp
@@ -16,7 +16,9 @@
 #include <hpx/executors/sequenced_executor.hpp>
 #include <hpx/functional/bind.hpp>
 #include <hpx/futures/future.hpp>
+#include <hpx/modules/concepts.hpp>
 #include <hpx/modules/threading.hpp>
+#include <hpx/modules/topology.hpp>
 #include <hpx/timed_execution/timed_execution.hpp>
 #include <hpx/timing/steady_clock.hpp>
 #include <hpx/type_support/detail/wrap_int.hpp>
@@ -543,9 +545,14 @@ namespace hpx { namespace parallel { namespace execution {
         }
 
         // support all properties exposed by the wrapped executor
+        // clang-format off
         template <typename Tag, typename Property,
-            typename Enable = std::enable_if_t<hpx::functional::
-                    is_tag_invocable_v<Tag, BaseExecutor, Property>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<
+                    Tag, BaseExecutor, Property>
+            )>
+        // clang-format on
         friend timed_executor tag_invoke(
             Tag tag, timed_executor const& exec, Property&& prop)
         {
@@ -553,9 +560,13 @@ namespace hpx { namespace parallel { namespace execution {
                 tag, exec.exec_, HPX_FORWARD(Property, prop)));
         }
 
+        // clang-format off
         template <typename Tag,
-            typename Enable = std::enable_if_t<
-                hpx::functional::is_tag_invocable_v<Tag, BaseExecutor>>>
+            HPX_CONCEPT_REQUIRES_(
+                hpx::execution::experimental::is_scheduling_property_v<Tag> &&
+                hpx::functional::is_tag_invocable_v<Tag, BaseExecutor>
+            )>
+        // clang-format on
         friend decltype(auto) tag_invoke(Tag tag, timed_executor const& exec)
         {
             return hpx::functional::tag_invoke(tag, exec.exec_);
diff --git a/libs/core/topology/CMakeLists.txt b/libs/core/topology/CMakeLists.txt
index 804428dfc077..6e955f0b6d1b 100644
--- a/libs/core/topology/CMakeLists.txt
+++ b/libs/core/topology/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2021 The STE||AR-Group
+# Copyright (c) 2019-2022 The STE||AR-Group
 #
 # SPDX-License-Identifier: BSL-1.0
 # Distributed under the Boost Software License, Version 1.0. (See accompanying
@@ -29,7 +29,10 @@ if(HPX_TOPOLOGY_WITH_ADDITIONAL_HWLOC_TESTING)
 endif()
 
 # Default location is $HPX_ROOT/libs/topology/include
-set(topology_headers hpx/topology/cpu_mask.hpp hpx/topology/topology.hpp)
+set(topology_headers
+    hpx/topology/cpu_mask.hpp hpx/topology/scheduling_properties.hpp
+    hpx/topology/topology.hpp
+)
 
 # Default location is $HPX_ROOT/libs/topology/include_compatibility
 # cmake-format: off
@@ -53,6 +56,7 @@ add_hpx_module(
   COMPAT_HEADERS ${topology_compat_headers}
   MODULE_DEPENDENCIES
     hpx_assertion
+    hpx_async_base
     hpx_config
     hpx_concurrency
     hpx_errors
diff --git a/libs/core/topology/include/hpx/topology/scheduling_properties.hpp b/libs/core/topology/include/hpx/topology/scheduling_properties.hpp
new file mode 100644
index 000000000000..bc4f88d4d368
--- /dev/null
+++ b/libs/core/topology/include/hpx/topology/scheduling_properties.hpp
@@ -0,0 +1,57 @@
+//  Copyright (c) 2022 Hartmut Kaiser
+//
+//  SPDX-License-Identifier: BSL-1.0
+//  Distributed under the Boost Software License, Version 1.0. (See accompanying
+//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#pragma once
+
+#include <hpx/config.hpp>
+#include <hpx/async_base/scheduling_properties.hpp>
+#include <hpx/functional/detail/tag_fallback_invoke.hpp>
+#include <hpx/topology/topology.hpp>
+
+#include <type_traits>
+
+namespace hpx::execution::experimental {
+
+    ///////////////////////////////////////////////////////////////////////////
+    inline constexpr struct get_processing_units_mask_t final
+      : hpx::functional::detail::tag_fallback<get_processing_units_mask_t>
+    {
+    private:
+        // simply return machine affinity mask if get_processing_units_mask is
+        // not supported
+        template <typename Target>
+        friend HPX_FORCEINLINE decltype(auto) tag_fallback_invoke(
+            get_processing_units_mask_t, Target&&) noexcept
+        {
+            return hpx::threads::create_topology().get_machine_affinity_mask();
+        }
+    } get_processing_units_mask{};
+
+    template <>
+    struct is_scheduling_property<get_processing_units_mask_t> : std::true_type
+    {
+    };
+
+    ///////////////////////////////////////////////////////////////////////////
+    inline constexpr struct get_cores_mask_t final
+      : hpx::functional::detail::tag_fallback<get_cores_mask_t>
+    {
+    private:
+        // simply return machine affinity mask if get_cores_mask is not
+        // supported
+        template <typename Target>
+        friend HPX_FORCEINLINE decltype(auto) tag_fallback_invoke(
+            get_cores_mask_t, Target&&) noexcept
+        {
+            return hpx::threads::create_topology().get_machine_affinity_mask();
+        }
+    } get_cores_mask{};
+
+    template <>
+    struct is_scheduling_property<get_cores_mask_t> : std::true_type
+    {
+    };
+}    // namespace hpx::execution::experimental
diff --git a/libs/core/topology/src/topology.cpp b/libs/core/topology/src/topology.cpp
index 07afe94a666c..b16e270f93e6 100644
--- a/libs/core/topology/src/topology.cpp
+++ b/libs/core/topology/src/topology.cpp
@@ -1502,22 +1502,39 @@ namespace hpx { namespace threads {
         hwloc_bitmap_t cpuset = mask_to_bitmap(mask, HWLOC_OBJ_PU);
         std::size_t cache_size = 0;
 
+#if HWLOC_API_VERSION >= 0x00020000
+        hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE;
+        switch (level)
+        {
+        case 2:
+            type = HWLOC_OBJ_L2CACHE;
+            break;
+
+        case 3:
+            type = HWLOC_OBJ_L3CACHE;
+            break;
+
+        case 4:
+            type = HWLOC_OBJ_L4CACHE;
+            break;
+
+        case 5:
+            type = HWLOC_OBJ_L5CACHE;
+            break;
+
+        default:
+            break;
+        }
+#endif
+
         iterate(cpuset, [&](auto num_pu) {
             hwloc_obj_t pu_obj = hwloc_get_obj_by_type(
                 topo, HWLOC_OBJ_PU, static_cast<unsigned>(num_pu));
+
+#if HWLOC_API_VERSION >= 0x00020000
             if (pu_obj == nullptr)
                 return;
 
-            hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE;
-            if (level == 2)
-                type = HWLOC_OBJ_L2CACHE;
-            else if (level == 3)
-                type = HWLOC_OBJ_L3CACHE;
-            else if (level == 4)
-                type = HWLOC_OBJ_L4CACHE;
-            else if (level == 5)
-                type = HWLOC_OBJ_L5CACHE;
-
             hwloc_obj_t cache_obj =
                 hwloc_get_ancestor_obj_by_type(topo, type, pu_obj);
             if (cache_obj == nullptr)
@@ -1525,6 +1542,19 @@ namespace hpx { namespace threads {
 
             cache_size += std::size_t(cache_obj->attr->cache.size) /
                 num_set_bits(cache_obj->cpuset);
+#else
+            // traverse up until found the requested cache level
+            int levels = 0;
+            for (hwloc_obj_t obj = pu_obj; obj != nullptr && levels < level;
+                 obj = obj->parent)
+            {
+                if (obj->type != HWLOC_OBJ_CACHE || ++levels != level)
+                    continue;
+
+                cache_size += std::size_t(obj->attr->cache.size) /
+                    num_set_bits(obj->cpuset);
+            }
+#endif
         });
 
         hwloc_bitmap_free(cpuset);