From 7e6d39bf692a2867aab16e9781ad52f30b5fafad Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Mon, 3 Oct 2022 16:23:47 -0500 Subject: [PATCH 1/2] Expose available cache sizes from topology object --- .../examples/system_characteristics.hpp | 13 ++ .../include/hpx/topology/topology.hpp | 9 ++ libs/core/topology/src/topology.cpp | 129 ++++++++++++++---- 3 files changed, 121 insertions(+), 30 deletions(-) diff --git a/libs/core/resource_partitioner/examples/system_characteristics.hpp b/libs/core/resource_partitioner/examples/system_characteristics.hpp index 70e150f4c0dc..f4d16b0ea7f6 100644 --- a/libs/core/resource_partitioner/examples/system_characteristics.hpp +++ b/libs/core/resource_partitioner/examples/system_characteristics.hpp @@ -50,4 +50,17 @@ void print_system_characteristics() //! -------------------------------------- topology topo.print_hwloc(std::cout); + + //! -------------------------------------- cache sizes + hpx::threads::mask_type core0 = topo.get_core_affinity_mask(0); + std::cout << "[System Cache sizes (core 0)]\n" + << "L1 Cache: " << topo.get_cache_size(core0, 1) << "\n" + << "L2 Cache: " << topo.get_cache_size(core0, 2) << "\n" + << "L3 Cache: " << topo.get_cache_size(core0, 3) << "\n\n"; + + hpx::threads::mask_type machine = topo.get_machine_affinity_mask(); + std::cout << "[System Cache sizes (all available cores)]\n" + << "L1 Cache: " << topo.get_cache_size(machine, 1) << "\n" + << "L2 Cache: " << topo.get_cache_size(machine, 2) << "\n" + << "L3 Cache: " << topo.get_cache_size(machine, 3) << "\n\n"; } diff --git a/libs/core/topology/include/hpx/topology/topology.hpp b/libs/core/topology/include/hpx/topology/topology.hpp index a68c53b72ee2..64af890487a4 100644 --- a/libs/core/topology/include/hpx/topology/topology.hpp +++ b/libs/core/topology/include/hpx/topology/topology.hpp @@ -266,6 +266,9 @@ namespace hpx { namespace threads { std::size_t get_pu_number(std::size_t num_core, std::size_t num_pu, error_code& ec = throws) const; + /// Return the size of the cache associated with the given mask. + std::size_t get_cache_size(mask_type mask, int level) const; + mask_type get_cpubind_mask(error_code& ec = throws) const; mask_type get_cpubind_mask( std::thread& handle, error_code& ec = throws) const; @@ -339,8 +342,12 @@ namespace hpx { namespace threads { void extract_node_mask(hwloc_obj_t parent, mask_type& mask) const; + std::size_t get_number_of_core_pus_locked(std::size_t core) const; + std::size_t extract_node_count( hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const; + std::size_t extract_node_count_locked( + hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const; mask_type init_machine_affinity_mask() const; mask_type init_socket_affinity_mask(std::size_t num_thread) const @@ -364,6 +371,8 @@ namespace hpx { namespace threads { void init_num_of_pus(); + hwloc_obj_t get_pu_obj(std::size_t num_core) const; + hwloc_topology_t topo; // We need to define a constant pu offset. diff --git a/libs/core/topology/src/topology.cpp b/libs/core/topology/src/topology.cpp index bd6d1bad6ffd..07afe94a666c 100644 --- a/libs/core/topology/src/topology.cpp +++ b/libs/core/topology/src/topology.cpp @@ -691,11 +691,9 @@ namespace hpx { namespace threads { } } // }}} - std::size_t topology::extract_node_count( + std::size_t topology::extract_node_count_locked( hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const { // {{{ - hwloc_obj_t obj; - if (parent == nullptr) { return count; @@ -706,37 +704,29 @@ namespace hpx { namespace threads { return count; } - { - std::unique_lock lk(topo_mtx); - obj = hwloc_get_next_child(topo, parent, nullptr); - } + hwloc_obj_t obj = hwloc_get_next_child(topo, parent, nullptr); while (obj) { if (hwloc_compare_types(type, obj->type) == 0) { - /* - do { - ++count; - { - std::unique_lock lk(topo_mtx); - obj = hwloc_get_next_child(topo, parent, obj); - } - } while (obj != nullptr && hwloc_compare_types(type, obj->type) == 0); - return count; - */ ++count; } - count = extract_node_count(obj, type, count); - - std::unique_lock lk(topo_mtx); + count = extract_node_count_locked(obj, type, count); obj = hwloc_get_next_child(topo, parent, obj); } return count; } // }}} + std::size_t topology::extract_node_count( + hwloc_obj_t parent, hwloc_obj_type_t type, std::size_t count) const + { // {{{ + std::unique_lock lk(topo_mtx); + return extract_node_count_locked(parent, type, count); + } // }}} + std::size_t topology::get_number_of_sockets() const { int nobjs = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_SOCKET); @@ -844,26 +834,27 @@ namespace hpx { namespace threads { return num_of_pus_; } - std::size_t topology::get_number_of_core_pus(std::size_t core) const + std::size_t topology::get_number_of_core_pus_locked(std::size_t core) const { - hwloc_obj_t core_obj = nullptr; - - { - std::unique_lock lk(topo_mtx); - core_obj = hwloc_get_obj_by_type( - topo, HWLOC_OBJ_CORE, static_cast(core)); - } + hwloc_obj_t core_obj = hwloc_get_obj_by_type( + topo, HWLOC_OBJ_CORE, static_cast(core)); if (!use_pus_as_cores_ && core_obj) { HPX_ASSERT(core == detail::get_index(core_obj)); std::size_t pu_count = 0; - return extract_node_count(core_obj, HWLOC_OBJ_PU, pu_count); + return extract_node_count_locked(core_obj, HWLOC_OBJ_PU, pu_count); } return std::size_t(1); } + std::size_t topology::get_number_of_core_pus(std::size_t core) const + { + std::unique_lock lk(topo_mtx); + return get_number_of_core_pus_locked(core); + } + std::size_t topology::get_number_of_socket_cores( std::size_t num_socket) const { @@ -1456,12 +1447,90 @@ namespace hpx { namespace threads { #endif } - /// Free memory that was previously allocated by allocate + // Free memory that was previously allocated by allocate void topology::deallocate(void* addr, std::size_t len) const noexcept { hwloc_free(topo, addr, len); } + //////////////////////////////////////////////////////////////////////////// + hwloc_obj_t topology::get_pu_obj(std::size_t num_pu) const + { + hwloc_obj_t pu_obj = hwloc_get_obj_by_type( + topo, HWLOC_OBJ_CORE, static_cast(num_pu)); + + if (pu_obj == nullptr) + { + HPX_THROW_EXCEPTION(no_success, "topology::get_core_obj", + "Couldn't find required object representing the given core in " + "topology"); + } + + return pu_obj; + } + + template + static void iterate(hwloc_bitmap_t cpuset, F&& f) + { + for (auto id = hwloc_bitmap_first(cpuset); + (unsigned) id != (unsigned) -1; id = hwloc_bitmap_next(cpuset, id)) + { + if (hwloc_bitmap_isset(cpuset, id)) + { + f(id); + } + } + } + + static auto num_set_bits(hwloc_bitmap_t cpuset) + { + std::size_t count = 0; + iterate(cpuset, [&](auto) { ++count; }); + return count; + } + + // Return the size of the cache associated with the given cpuset. + std::size_t topology::get_cache_size(mask_type mask, int level) const + { + if (level < 1 || level > 5) + { + return 0; + } + + std::unique_lock lk(topo_mtx); + + hwloc_bitmap_t cpuset = mask_to_bitmap(mask, HWLOC_OBJ_PU); + std::size_t cache_size = 0; + + iterate(cpuset, [&](auto num_pu) { + hwloc_obj_t pu_obj = hwloc_get_obj_by_type( + topo, HWLOC_OBJ_PU, static_cast(num_pu)); + if (pu_obj == nullptr) + return; + + hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE; + if (level == 2) + type = HWLOC_OBJ_L2CACHE; + else if (level == 3) + type = HWLOC_OBJ_L3CACHE; + else if (level == 4) + type = HWLOC_OBJ_L4CACHE; + else if (level == 5) + type = HWLOC_OBJ_L5CACHE; + + hwloc_obj_t cache_obj = + hwloc_get_ancestor_obj_by_type(topo, type, pu_obj); + if (cache_obj == nullptr) + return; + + cache_size += std::size_t(cache_obj->attr->cache.size) / + num_set_bits(cache_obj->cpuset); + }); + + hwloc_bitmap_free(cpuset); + return cache_size; + } + /////////////////////////////////////////////////////////////////////////// hwloc_bitmap_t topology::mask_to_bitmap( mask_cref_type mask, hwloc_obj_type_t htype) const From 37a248d553f36abcc303f1253d20f601373d5ece Mon Sep 17 00:00:00 2001 From: Hartmut Kaiser Date: Tue, 4 Oct 2022 11:49:05 -0500 Subject: [PATCH 2/2] Adding get_processing_units_mask scheduling property - adding support for this to all executors, where possible --- examples/1d_stencil/CMakeLists.txt | 2 +- .../include/hpx/affinity/affinity_data.hpp | 29 ++++++---- libs/core/affinity/src/affinity_data.cpp | 2 +- libs/core/asio/include/hpx/asio/asio_util.hpp | 3 +- .../asio/include/hpx/asio/map_hostnames.hpp | 3 +- libs/core/asio/src/asio_util.cpp | 4 +- .../hpx/async_base/scheduling_properties.hpp | 47 +++++++++++++++ libs/core/compute_local/CMakeLists.txt | 1 + .../host/block_fork_join_executor.hpp | 10 +++- .../datastructures/detail/dynamic_bitset.hpp | 3 + .../hpx/executors/annotating_executor.hpp | 20 +++++-- .../executors/explicit_scheduler_executor.hpp | 20 +++++-- .../hpx/executors/fork_join_executor.hpp | 17 +++++- .../hpx/executors/parallel_executor.hpp | 37 ++++++++++-- .../restricted_thread_pool_executor.hpp | 35 +++++++++--- .../hpx/executors/scheduler_executor.hpp | 19 +++++-- .../hpx/executors/sequenced_executor.hpp | 18 ++++++ .../hpx/executors/thread_pool_scheduler.hpp | 28 ++++++++- .../tests/unit/fork_join_executor.cpp | 23 +++++++- .../tests/unit/parallel_executor.cpp | 26 ++++++++- .../detail/partitioner.hpp | 4 +- .../src/detail_partitioner.cpp | 8 ++- .../include/hpx/synchronization/once.hpp | 2 +- .../hpx/threading_base/thread_pool_base.hpp | 9 ++- .../threading_base/src/thread_pool_base.cpp | 40 +++++++++++-- libs/core/timed_execution/CMakeLists.txt | 3 +- .../hpx/timed_execution/timed_executors.hpp | 19 +++++-- libs/core/topology/CMakeLists.txt | 8 ++- .../hpx/topology/scheduling_properties.hpp | 57 +++++++++++++++++++ libs/core/topology/src/topology.cpp | 50 ++++++++++++---- 30 files changed, 465 insertions(+), 82 deletions(-) create mode 100644 libs/core/topology/include/hpx/topology/scheduling_properties.hpp diff --git a/examples/1d_stencil/CMakeLists.txt b/examples/1d_stencil/CMakeLists.txt index 856dd3685e79..bf535bf00716 100644 --- a/examples/1d_stencil/CMakeLists.txt +++ b/examples/1d_stencil/CMakeLists.txt @@ -58,7 +58,7 @@ foreach(example_program ${example_programs}) add_hpx_executable( ${example_program} INTERNAL_FLAGS SOURCES ${sources} ${${example_program}_FLAGS} - FOLDER "Examples/1D Stencil/${example_program}" + FOLDER "Examples/1D Stencil" ) add_hpx_example_target_dependencies("1d_stencil" ${example_program}) diff --git a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp index e452087139a0..a221c0bbe7c0 100644 --- a/libs/core/affinity/include/hpx/affinity/affinity_data.hpp +++ b/libs/core/affinity/include/hpx/affinity/affinity_data.hpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2017 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -89,21 +89,28 @@ namespace hpx { namespace threads { namespace policies { namespace detail { std::size_t num_thread, std::size_t hardware_concurrency) const; private: - std::size_t num_threads_; ///< number of processing units managed - std::size_t - pu_offset_; ///< offset of the first processing unit to use - std::size_t pu_step_; ///< step between used processing units + ///< number of processing units managed + std::size_t num_threads_; + + ///< offset of the first processing unit to use + std::size_t pu_offset_; + + ///< step between used processing units + std::size_t pu_step_; std::size_t used_cores_; std::string affinity_domain_; std::vector affinity_masks_; std::vector pu_nums_; - mask_type - no_affinity_; ///< mask of processing units which have no affinity - bool - use_process_mask_; ///< use the process CPU mask to limit available PUs + + ///< mask of processing units which have no affinity + mask_type no_affinity_; + + ///< use the process CPU mask to limit available PUs + bool use_process_mask_; std::size_t num_pus_needed_; - static std::atomic - instance_number_counter_; ///< counter for instance numbers + + ///< counter for instance numbers + static std::atomic instance_number_counter_; }; }}}} // namespace hpx::threads::policies::detail diff --git a/libs/core/affinity/src/affinity_data.cpp b/libs/core/affinity/src/affinity_data.cpp index 2a3bde4ba7c3..2bb3f2f79f20 100644 --- a/libs/core/affinity/src/affinity_data.cpp +++ b/libs/core/affinity/src/affinity_data.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2017 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying diff --git a/libs/core/asio/include/hpx/asio/asio_util.hpp b/libs/core/asio/include/hpx/asio/asio_util.hpp index 23b372f50745..e03f8da80f36 100644 --- a/libs/core/asio/include/hpx/asio/asio_util.hpp +++ b/libs/core/asio/include/hpx/asio/asio_util.hpp @@ -27,7 +27,8 @@ namespace hpx { namespace util { /////////////////////////////////////////////////////////////////////////// HPX_CORE_EXPORT bool get_endpoint(std::string const& addr, - std::uint16_t port, asio::ip::tcp::endpoint& ep, bool force_ipv4 = false); + std::uint16_t port, asio::ip::tcp::endpoint& ep, + bool force_ipv4 = false); HPX_CORE_EXPORT std::string get_endpoint_name( asio::ip::tcp::endpoint const& ep); diff --git a/libs/core/asio/include/hpx/asio/map_hostnames.hpp b/libs/core/asio/include/hpx/asio/map_hostnames.hpp index e5f66a786d02..e42311a26578 100644 --- a/libs/core/asio/include/hpx/asio/map_hostnames.hpp +++ b/libs/core/asio/include/hpx/asio/map_hostnames.hpp @@ -27,7 +27,8 @@ namespace hpx { namespace util { transform_function_type; map_hostnames(bool debug = false) - : ipv4_(false), debug_(debug) + : ipv4_(false) + , debug_(debug) { } diff --git a/libs/core/asio/src/asio_util.cpp b/libs/core/asio/src/asio_util.cpp index 89af33ca1ece..e9293b5de1db 100644 --- a/libs/core/asio/src/asio_util.cpp +++ b/libs/core/asio/src/asio_util.cpp @@ -112,8 +112,8 @@ namespace hpx { namespace util { asio::ip::tcp::resolver::iterator it = resolver.resolve(query); - while (force_ipv4 && - it != tcp::resolver::iterator() && !it->endpoint().address().is_v4()) + while (force_ipv4 && it != tcp::resolver::iterator() && + !it->endpoint().address().is_v4()) { ++it; } diff --git a/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp b/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp index 6c1cb5ee7e61..8d60563294f3 100644 --- a/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp +++ b/libs/core/async_base/include/hpx/async_base/scheduling_properties.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2020 ETH Zurich +// Copyright (c) 2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -10,8 +11,21 @@ #include #include +#include + namespace hpx { namespace execution { namespace experimental { + /////////////////////////////////////////////////////////////////////////// + template + struct is_scheduling_property : std::false_type + { + }; + + template + inline constexpr bool is_scheduling_property_v = + is_scheduling_property::value; + + /////////////////////////////////////////////////////////////////////////// namespace detail { template @@ -41,6 +55,16 @@ namespace hpx { namespace execution { namespace experimental { }; } // namespace detail + /////////////////////////////////////////////////////////////////////////// + template + struct is_scheduling_property, Property>>> + : std::true_type + { + }; + + /////////////////////////////////////////////////////////////////////////// inline constexpr struct with_priority_t final : detail::property_base { @@ -59,6 +83,12 @@ namespace hpx { namespace execution { namespace experimental { } } get_priority{}; + template <> + struct is_scheduling_property : std::true_type + { + }; + + /////////////////////////////////////////////////////////////////////////// inline constexpr struct with_stacksize_t final : detail::property_base { @@ -77,6 +107,12 @@ namespace hpx { namespace execution { namespace experimental { } } get_stacksize{}; + template <> + struct is_scheduling_property : std::true_type + { + }; + + /////////////////////////////////////////////////////////////////////////// inline constexpr struct with_hint_t final : detail::property_base { @@ -95,6 +131,12 @@ namespace hpx { namespace execution { namespace experimental { } } get_hint{}; + template <> + struct is_scheduling_property : std::true_type + { + }; + + /////////////////////////////////////////////////////////////////////////// inline constexpr struct with_annotation_t final : detail::property_base { @@ -112,4 +154,9 @@ namespace hpx { namespace execution { namespace experimental { return nullptr; } } get_annotation{}; + + template <> + struct is_scheduling_property : std::true_type + { + }; }}} // namespace hpx::execution::experimental diff --git a/libs/core/compute_local/CMakeLists.txt b/libs/core/compute_local/CMakeLists.txt index 74f548f1f70d..5fff3e86b037 100644 --- a/libs/core/compute_local/CMakeLists.txt +++ b/libs/core/compute_local/CMakeLists.txt @@ -48,6 +48,7 @@ add_hpx_module( hpx_algorithms hpx_allocator_support hpx_async_combinators + hpx_concepts hpx_config hpx_datastructures hpx_execution diff --git a/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp b/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp index 55f0c9c3a0c6..c1fd8553d647 100644 --- a/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp +++ b/libs/core/compute_local/include/hpx/compute_local/host/block_fork_join_executor.hpp @@ -20,8 +20,9 @@ #include #include #include +#include +#include #include -#include #include #include @@ -273,6 +274,7 @@ namespace hpx::execution::experimental { // clang-format off template && hpx::functional::is_tag_invocable_v< Tag, fork_join_executor, Property> )> @@ -281,20 +283,22 @@ namespace hpx::execution::experimental { block_fork_join_executor const& exec, Property&& prop) noexcept { auto exec_with_prop = exec; - exec_with_prop.exec_ = tag(exec.exec_, HPX_FORWARD(Property, prop)); + exec_with_prop.exec_ = hpx::functional::tag_invoke( + tag, exec.exec_, HPX_FORWARD(Property, prop)); return exec_with_prop; } // clang-format off template && hpx::functional::is_tag_invocable_v )> // clang-format on friend decltype(auto) tag_invoke( Tag tag, block_fork_join_executor const& exec) noexcept { - return tag(exec.exec_); + return hpx::functional::tag_invoke(tag, exec.exec_); } private: diff --git a/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp b/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp index da699248ea78..a9f220b14563 100644 --- a/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp +++ b/libs/core/datastructures/include/hpx/datastructures/detail/dynamic_bitset.hpp @@ -2160,7 +2160,10 @@ namespace hpx::detail { block_width_type const extra_bits = count_extra_bits(); if (extra_bits != 0) + { + // NOLINTNEXTLINE(stringop-overflow=) highest_block() &= (Block(1) << extra_bits) - 1; + } } // check class invariants diff --git a/libs/core/executors/include/hpx/executors/annotating_executor.hpp b/libs/core/executors/include/hpx/executors/annotating_executor.hpp index 03a0cf8ecfb1..1eff3a180680 100644 --- a/libs/core/executors/include/hpx/executors/annotating_executor.hpp +++ b/libs/core/executors/include/hpx/executors/annotating_executor.hpp @@ -14,6 +14,8 @@ #include #include #include +#include +#include #include #include @@ -188,9 +190,13 @@ namespace hpx { namespace execution { namespace experimental { } // support all properties exposed by the wrapped executor + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend annotating_executor tag_invoke( Tag tag, annotating_executor const& exec, Property&& prop) { @@ -198,13 +204,17 @@ namespace hpx { namespace execution { namespace experimental { tag, exec.exec_, HPX_FORWARD(Property, prop))); } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke( Tag tag, annotating_executor const& exec) { - return hpx::functional::tag_invoke(tag, exec.policy_); + return hpx::functional::tag_invoke(tag, exec.exec_); } private: diff --git a/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp b/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp index 76cfc1d3561f..b453dc1755d9 100644 --- a/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp +++ b/libs/core/executors/include/hpx/executors/explicit_scheduler_executor.hpp @@ -9,7 +9,6 @@ #pragma once #include -#include #include #include #include @@ -28,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -92,9 +93,14 @@ namespace hpx::execution::experimental { } // support all properties exposed by the wrapped scheduler + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v< + Tag, BaseScheduler, Property> + )> + // clang-format on friend explicit_scheduler_executor tag_invoke( Tag tag, explicit_scheduler_executor const& exec, Property&& prop) { @@ -102,9 +108,13 @@ namespace hpx::execution::experimental { tag, exec.sched_, HPX_FORWARD(Property, prop))); } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke( Tag tag, explicit_scheduler_executor const& exec) { diff --git a/libs/core/executors/include/hpx/executors/fork_join_executor.hpp b/libs/core/executors/include/hpx/executors/fork_join_executor.hpp index d3c00ac30579..92506c2d9aee 100644 --- a/libs/core/executors/include/hpx/executors/fork_join_executor.hpp +++ b/libs/core/executors/include/hpx/executors/fork_join_executor.hpp @@ -1,4 +1,5 @@ // Copyright (c) 2020 ETH Zurich +// Copyright (c) 2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -26,12 +27,11 @@ #include #include #include +#include #include #include #include #include -#include -#include #include #include @@ -858,6 +858,19 @@ namespace hpx { namespace execution { namespace experimental { return exec.shared_data_->annotation_; } + friend auto tag_invoke( + hpx::execution::experimental::get_processing_units_mask_t, + fork_join_executor const& exec) noexcept + { + return exec.shared_data_->pu_mask_; + } + + friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t, + fork_join_executor const& exec) noexcept + { + return exec.shared_data_->pu_mask_; + } + /// \cond NOINTERNAL enum class init_mode { diff --git a/libs/core/executors/include/hpx/executors/parallel_executor.hpp b/libs/core/executors/include/hpx/executors/parallel_executor.hpp index a49c43d2f989..ca6fcb7e0405 100644 --- a/libs/core/executors/include/hpx/executors/parallel_executor.hpp +++ b/libs/core/executors/include/hpx/executors/parallel_executor.hpp @@ -30,6 +30,8 @@ #include #include #include +#include +#include #include #include #include @@ -164,9 +166,13 @@ namespace hpx { namespace execution { // property implementations // support all properties exposed by the embedded policy + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend parallel_policy_executor tag_invoke( Tag tag, parallel_policy_executor const& exec, Property&& prop) { @@ -176,9 +182,13 @@ namespace hpx { namespace execution { return exec_with_prop; } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke( Tag tag, parallel_policy_executor const& exec) { @@ -229,6 +239,25 @@ namespace hpx { namespace execution { return exec.get_num_cores(); } + friend auto tag_invoke( + hpx::execution::experimental::get_processing_units_mask_t, + parallel_policy_executor const& exec) + { + auto pool = exec.pool_ ? + exec.pool_ : + threads::detail::get_self_or_default_pool(); + return pool->get_used_processing_units(exec.get_num_cores(), false); + } + + friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t, + parallel_policy_executor const& exec) + { + auto pool = exec.pool_ ? + exec.pool_ : + threads::detail::get_self_or_default_pool(); + return pool->get_used_processing_units(exec.get_num_cores(), true); + } + public: // backwards compatibility support, will be removed in the future template diff --git a/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp b/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp index 7c421809bf80..73c7e08bf21c 100644 --- a/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp +++ b/libs/core/executors/include/hpx/executors/restricted_thread_pool_executor.hpp @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -97,6 +98,11 @@ namespace hpx::parallel::execution { hpx::parallel::execution::processing_units_count(exec_))); } + std::int16_t get_current_thread_num() const + { + return static_cast(first_thread_ + os_thread_++); + } + embedded_executor generate_executor(std::uint16_t thread_num) const { return hpx::execution::experimental::with_hint( @@ -107,25 +113,36 @@ namespace hpx::parallel::execution { // property implementations // support all properties exposed by the embedded executor + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v< + Tag, embedded_executor, Property> + )> + // clang-format on friend restricted_policy_executor tag_invoke( - Tag, restricted_policy_executor const& exec, Property&& prop) + Tag tag, restricted_policy_executor const& exec, Property&& prop) { auto exec_with_prop = exec; - exec_with_prop.exec_ = - Tag{}(exec.exec_, HPX_FORWARD(Property, prop)); + exec_with_prop.exec_ = hpx::functional::tag_invoke(tag, + exec.generate_executor(exec.get_current_thread_num()), + HPX_FORWARD(Property, prop)); return exec_with_prop; } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke( - Tag, restricted_policy_executor const& exec) + Tag tag, restricted_policy_executor const& exec) { - return Tag{}(exec.exec_); + return hpx::functional::tag_invoke( + tag, exec.generate_executor(exec.get_current_thread_num())); } // executor API diff --git a/libs/core/executors/include/hpx/executors/scheduler_executor.hpp b/libs/core/executors/include/hpx/executors/scheduler_executor.hpp index 3c6fd853ed13..5112f471ed8b 100644 --- a/libs/core/executors/include/hpx/executors/scheduler_executor.hpp +++ b/libs/core/executors/include/hpx/executors/scheduler_executor.hpp @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include @@ -108,9 +110,14 @@ namespace hpx::execution::experimental { } // support all properties exposed by the wrapped scheduler + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v< + Tag, BaseScheduler, Property> + )> + // clang-format on friend scheduler_executor tag_invoke( Tag tag, scheduler_executor const& exec, Property&& prop) { @@ -118,9 +125,13 @@ namespace hpx::execution::experimental { tag, exec.sched_, HPX_FORWARD(Property, prop))); } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke( Tag tag, scheduler_executor const& exec) { diff --git a/libs/core/executors/include/hpx/executors/sequenced_executor.hpp b/libs/core/executors/include/hpx/executors/sequenced_executor.hpp index 90ac4af1701f..5bff64141a3f 100644 --- a/libs/core/executors/include/hpx/executors/sequenced_executor.hpp +++ b/libs/core/executors/include/hpx/executors/sequenced_executor.hpp @@ -18,10 +18,13 @@ #include #include #include +#include #include #include #include +#include #include +#include #include #include @@ -218,6 +221,21 @@ namespace hpx { namespace execution { return 1; } + friend auto tag_invoke( + hpx::execution::experimental::get_processing_units_mask_t, + sequenced_executor const&) + { + return threads::detail::get_self_or_default_pool() + ->get_used_processing_unit(hpx::get_worker_thread_num(), false); + } + + friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t, + sequenced_executor const&) + { + return threads::detail::get_self_or_default_pool() + ->get_used_processing_unit(hpx::get_worker_thread_num(), true); + } + private: friend class hpx::serialization::access; diff --git a/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp b/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp index 8e5b1cf47711..8c6ce12db7a9 100644 --- a/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp +++ b/libs/core/executors/include/hpx/executors/thread_pool_scheduler.hpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -101,6 +102,7 @@ namespace hpx::execution::experimental { // clang-format off template && hpx::functional::is_tag_invocable_v )> // clang-format on @@ -108,21 +110,22 @@ namespace hpx::execution::experimental { thread_pool_policy_scheduler const& scheduler, Property&& prop) { auto scheduler_with_prop = scheduler; - scheduler_with_prop.policy_ = - tag(scheduler.policy_, HPX_FORWARD(Property, prop)); + scheduler_with_prop.policy_ = hpx::functional::tag_invoke( + tag, scheduler.policy_, HPX_FORWARD(Property, prop)); return scheduler_with_prop; } // clang-format off template && hpx::functional::is_tag_invocable_v )> // clang-format on friend decltype(auto) tag_invoke( Tag tag, thread_pool_policy_scheduler const& scheduler) { - return tag(scheduler.policy_); + return hpx::functional::tag_invoke(tag, scheduler.policy_); } friend constexpr thread_pool_policy_scheduler tag_invoke( @@ -174,6 +177,25 @@ namespace hpx::execution::experimental { } #endif + friend auto tag_invoke( + hpx::execution::experimental::get_processing_units_mask_t, + thread_pool_policy_scheduler const& exec) + { + auto pool = exec.pool_ ? + exec.pool_ : + threads::detail::get_self_or_default_pool(); + return pool->get_used_processing_units(exec.get_num_cores(), false); + } + + friend auto tag_invoke(hpx::execution::experimental::get_cores_mask_t, + thread_pool_policy_scheduler const& exec) + { + auto pool = exec.pool_ ? + exec.pool_ : + threads::detail::get_self_or_default_pool(); + return pool->get_used_processing_units(exec.get_num_cores(), true); + } + template void execute(F&& f, Policy const& policy) const { diff --git a/libs/core/executors/tests/unit/fork_join_executor.cpp b/libs/core/executors/tests/unit/fork_join_executor.cpp index 53a4ed652837..c572643f42f3 100644 --- a/libs/core/executors/tests/unit/fork_join_executor.cpp +++ b/libs/core/executors/tests/unit/fork_join_executor.cpp @@ -1,5 +1,5 @@ // Copyright (c) 2020 ETH Zurich -// Copyright (c) 2007-2016 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -27,6 +27,25 @@ using hpx::execution::experimental::fork_join_executor; static std::atomic count{0}; +/////////////////////////////////////////////////////////////////////////////// +template +void test_processing_mask(ExecutorArgs&&... args) +{ + std::cerr << "test_processing_mask\n"; + + auto& rp = hpx::resource::get_partitioner(); + auto const& expected_mask = + rp.get_used_pus_mask(hpx::get_worker_thread_num()); + + fork_join_executor exec{expected_mask, std::forward(args)...}; + auto pus_mask = + hpx::execution::experimental::get_processing_units_mask(exec); + HPX_TEST(pus_mask == expected_mask); + + auto cores_mask = hpx::execution::experimental::get_cores_mask(exec); + HPX_TEST(cores_mask == expected_mask); +} + /////////////////////////////////////////////////////////////////////////////// void bulk_test(int, int passed_through) //-V813 { @@ -173,6 +192,8 @@ void test_executor(hpx::threads::thread_priority priority, test_bulk_async(priority, stacksize, schedule); test_bulk_sync_exception(priority, stacksize, schedule); test_bulk_async_exception(priority, stacksize, schedule); + + test_processing_mask(priority, stacksize, schedule); } /////////////////////////////////////////////////////////////////////////////// diff --git a/libs/core/executors/tests/unit/parallel_executor.cpp b/libs/core/executors/tests/unit/parallel_executor.cpp index 21e2b6b57354..49ab699e561c 100644 --- a/libs/core/executors/tests/unit/parallel_executor.cpp +++ b/libs/core/executors/tests/unit/parallel_executor.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2016 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -162,6 +162,28 @@ void static_check_executor() "is_bulk_two_way_executor_v"); } +void test_processing_mask() +{ + hpx::execution::parallel_executor exec; + + { + auto pool = hpx::threads::detail::get_self_or_default_pool(); + auto expected_mask = + pool->get_used_processing_units(pool->get_os_thread_count(), false); + auto mask = + hpx::execution::experimental::get_processing_units_mask(exec); + HPX_TEST(mask == expected_mask); + } + + { + auto pool = hpx::threads::detail::get_self_or_default_pool(); + auto expected_mask = + pool->get_used_processing_units(pool->get_os_thread_count(), true); + auto mask = hpx::execution::experimental::get_cores_mask(exec); + HPX_TEST(mask == expected_mask); + } +} + /////////////////////////////////////////////////////////////////////////////// int hpx_main() { @@ -175,6 +197,8 @@ int hpx_main() test_bulk_async(); test_bulk_then(); + test_processing_mask(); + return hpx::local::finalize(); } diff --git a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp index 50e5a464ada9..34648a316819 100644 --- a/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp +++ b/libs/core/resource_partitioner/include/hpx/resource_partitioner/detail/partitioner.hpp @@ -1,4 +1,5 @@ -// Copyright (c) 2017 Shoshana Jakobovits +// Copyright (c) 2017 Shoshana Jakobovits +// Copyright (c) 2017-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -155,6 +156,7 @@ namespace hpx { namespace resource { namespace detail { threads::mask_cref_type get_pu_mask( std::size_t global_thread_num) const; std::size_t get_thread_occupancy(std::size_t pu_num) const; + threads::mask_type get_used_pus_mask(std::size_t pu_num) const; void init(resource::partitioner_mode rpmode, hpx::util::section cfg, hpx::threads::policies::detail::affinity_data affinity_data); diff --git a/libs/core/resource_partitioner/src/detail_partitioner.cpp b/libs/core/resource_partitioner/src/detail_partitioner.cpp index a158c048b1d9..0cb712ba4c18 100644 --- a/libs/core/resource_partitioner/src/detail_partitioner.cpp +++ b/libs/core/resource_partitioner/src/detail_partitioner.cpp @@ -1,4 +1,5 @@ -// Copyright (c) 2017 Shoshana Jakobovits +// Copyright (c) 2017 Shoshana Jakobovits +// Copyright (c) 2017-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -868,6 +869,11 @@ namespace hpx { namespace resource { namespace detail { return affinity_data_.get_thread_occupancy(topo_, pu_num); } + threads::mask_type partitioner::get_used_pus_mask(std::size_t pu_num) const + { + return affinity_data_.get_used_pus_mask(topo_, pu_num); + } + threads::mask_cref_type partitioner::get_pu_mask( std::size_t global_thread_num) const { diff --git a/libs/core/synchronization/include/hpx/synchronization/once.hpp b/libs/core/synchronization/include/hpx/synchronization/once.hpp index c813ff6f9e5b..29659398159b 100644 --- a/libs/core/synchronization/include/hpx/synchronization/once.hpp +++ b/libs/core/synchronization/include/hpx/synchronization/once.hpp @@ -79,7 +79,7 @@ namespace hpx { /// active call, with no additional synchronization. /// /// \param flag an object, for which exactly one function gets executed - /// \param f Callable object to invoke + /// \param f Callable object to invoke /// \param args... arguments to pass to the function /// /// \throws std::system_error if any condition prevents calls to \a call_once diff --git a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp index d1cf2c8f58c8..d757500016ae 100644 --- a/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp +++ b/libs/core/threading_base/include/hpx/threading_base/thread_pool_base.hpp @@ -1,5 +1,5 @@ // Copyright (c) 2018 Mikael Simberg -// Copyright (c) 2007-2017 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -217,7 +217,12 @@ namespace hpx { namespace threads { return nullptr; } - mask_type get_used_processing_units() const; + mask_type get_used_processing_units(bool full_cores = false) const; + mask_type get_used_processing_units( + std::size_t num_cores, bool full_cores = false) const; + mask_type get_used_processing_unit( + std::size_t thread_num, bool full_cores = false) const; + hwloc_bitmap_ptr get_numa_domain_bitmap() const; // performance counters diff --git a/libs/core/threading_base/src/thread_pool_base.cpp b/libs/core/threading_base/src/thread_pool_base.cpp index 04a364abf38e..d9404fca0f1e 100644 --- a/libs/core/threading_base/src/thread_pool_base.cpp +++ b/libs/core/threading_base/src/thread_pool_base.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2007-2017 Hartmut Kaiser +// Copyright (c) 2007-2022 Hartmut Kaiser // // SPDX-License-Identifier: BSL-1.0 // Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -36,7 +36,8 @@ namespace hpx { namespace threads { } /////////////////////////////////////////////////////////////////////////// - mask_type thread_pool_base::get_used_processing_units() const + mask_type thread_pool_base::get_used_processing_units( + std::size_t num_cores, bool full_cores) const { auto const& topo = create_topology(); auto const sched = get_scheduler(); @@ -44,19 +45,46 @@ namespace hpx { namespace threads { mask_type used_processing_units = mask_type(); threads::resize(used_processing_units, hardware_concurrency()); - for (std::size_t thread_num = 0; thread_num < get_os_thread_count(); - ++thread_num) + std::size_t max_cores = get_os_thread_count(); + for (std::size_t thread_num = 0; + thread_num != max_cores && num_cores != 0; ++thread_num) { if (sched->get_state(thread_num).load() <= hpx::state::suspended) { - used_processing_units |= affinity_data_.get_pu_mask( - topo, thread_num + get_thread_offset()); + if (!full_cores) + { + used_processing_units |= affinity_data_.get_pu_mask( + topo, thread_num + get_thread_offset()); + } + else + { + used_processing_units |= topo.get_core_affinity_mask( + thread_num + get_thread_offset()); + } + --num_cores; } } return used_processing_units; } + mask_type thread_pool_base::get_used_processing_units(bool full_cores) const + { + return get_used_processing_units(get_os_thread_count(), full_cores); + } + + mask_type thread_pool_base::get_used_processing_unit( + std::size_t thread_num, bool full_cores) const + { + auto const& topo = create_topology(); + if (!full_cores) + { + return affinity_data_.get_pu_mask( + topo, thread_num + get_thread_offset()); + } + return topo.get_core_affinity_mask(thread_num + get_thread_offset()); + } + hwloc_bitmap_ptr thread_pool_base::get_numa_domain_bitmap() const { auto const& topo = create_topology(); diff --git a/libs/core/timed_execution/CMakeLists.txt b/libs/core/timed_execution/CMakeLists.txt index b6b761f34061..3e79351cb2f3 100644 --- a/libs/core/timed_execution/CMakeLists.txt +++ b/libs/core/timed_execution/CMakeLists.txt @@ -28,6 +28,7 @@ add_hpx_module( GLOBAL_HEADER_GEN ON HEADERS ${timed_execution_headers} COMPAT_HEADERS ${timed_execution_compat_headers} - MODULE_DEPENDENCIES hpx_execution hpx_executors hpx_threading hpx_timing + MODULE_DEPENDENCIES hpx_concepts hpx_execution hpx_executors hpx_threading + hpx_timing hpx_topology CMAKE_SUBDIRS examples tests ) diff --git a/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp b/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp index 76da1d5f6694..1dd67650c9c8 100644 --- a/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp +++ b/libs/core/timed_execution/include/hpx/timed_execution/timed_executors.hpp @@ -16,7 +16,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -543,9 +545,14 @@ namespace hpx { namespace parallel { namespace execution { } // support all properties exposed by the wrapped executor + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v< + Tag, BaseExecutor, Property> + )> + // clang-format on friend timed_executor tag_invoke( Tag tag, timed_executor const& exec, Property&& prop) { @@ -553,9 +560,13 @@ namespace hpx { namespace parallel { namespace execution { tag, exec.exec_, HPX_FORWARD(Property, prop))); } + // clang-format off template >> + HPX_CONCEPT_REQUIRES_( + hpx::execution::experimental::is_scheduling_property_v && + hpx::functional::is_tag_invocable_v + )> + // clang-format on friend decltype(auto) tag_invoke(Tag tag, timed_executor const& exec) { return hpx::functional::tag_invoke(tag, exec.exec_); diff --git a/libs/core/topology/CMakeLists.txt b/libs/core/topology/CMakeLists.txt index 804428dfc077..6e955f0b6d1b 100644 --- a/libs/core/topology/CMakeLists.txt +++ b/libs/core/topology/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2021 The STE||AR-Group +# Copyright (c) 2019-2022 The STE||AR-Group # # SPDX-License-Identifier: BSL-1.0 # Distributed under the Boost Software License, Version 1.0. (See accompanying @@ -29,7 +29,10 @@ if(HPX_TOPOLOGY_WITH_ADDITIONAL_HWLOC_TESTING) endif() # Default location is $HPX_ROOT/libs/topology/include -set(topology_headers hpx/topology/cpu_mask.hpp hpx/topology/topology.hpp) +set(topology_headers + hpx/topology/cpu_mask.hpp hpx/topology/scheduling_properties.hpp + hpx/topology/topology.hpp +) # Default location is $HPX_ROOT/libs/topology/include_compatibility # cmake-format: off @@ -53,6 +56,7 @@ add_hpx_module( COMPAT_HEADERS ${topology_compat_headers} MODULE_DEPENDENCIES hpx_assertion + hpx_async_base hpx_config hpx_concurrency hpx_errors diff --git a/libs/core/topology/include/hpx/topology/scheduling_properties.hpp b/libs/core/topology/include/hpx/topology/scheduling_properties.hpp new file mode 100644 index 000000000000..bc4f88d4d368 --- /dev/null +++ b/libs/core/topology/include/hpx/topology/scheduling_properties.hpp @@ -0,0 +1,57 @@ +// Copyright (c) 2022 Hartmut Kaiser +// +// SPDX-License-Identifier: BSL-1.0 +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#pragma once + +#include +#include +#include +#include + +#include + +namespace hpx::execution::experimental { + + /////////////////////////////////////////////////////////////////////////// + inline constexpr struct get_processing_units_mask_t final + : hpx::functional::detail::tag_fallback + { + private: + // simply return machine affinity mask if get_processing_units_mask is + // not supported + template + friend HPX_FORCEINLINE decltype(auto) tag_fallback_invoke( + get_processing_units_mask_t, Target&&) noexcept + { + return hpx::threads::create_topology().get_machine_affinity_mask(); + } + } get_processing_units_mask{}; + + template <> + struct is_scheduling_property : std::true_type + { + }; + + /////////////////////////////////////////////////////////////////////////// + inline constexpr struct get_cores_mask_t final + : hpx::functional::detail::tag_fallback + { + private: + // simply return machine affinity mask if get_cores_mask is not + // supported + template + friend HPX_FORCEINLINE decltype(auto) tag_fallback_invoke( + get_cores_mask_t, Target&&) noexcept + { + return hpx::threads::create_topology().get_machine_affinity_mask(); + } + } get_cores_mask{}; + + template <> + struct is_scheduling_property : std::true_type + { + }; +} // namespace hpx::execution::experimental diff --git a/libs/core/topology/src/topology.cpp b/libs/core/topology/src/topology.cpp index 07afe94a666c..b16e270f93e6 100644 --- a/libs/core/topology/src/topology.cpp +++ b/libs/core/topology/src/topology.cpp @@ -1502,22 +1502,39 @@ namespace hpx { namespace threads { hwloc_bitmap_t cpuset = mask_to_bitmap(mask, HWLOC_OBJ_PU); std::size_t cache_size = 0; +#if HWLOC_API_VERSION >= 0x00020000 + hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE; + switch (level) + { + case 2: + type = HWLOC_OBJ_L2CACHE; + break; + + case 3: + type = HWLOC_OBJ_L3CACHE; + break; + + case 4: + type = HWLOC_OBJ_L4CACHE; + break; + + case 5: + type = HWLOC_OBJ_L5CACHE; + break; + + default: + break; + } +#endif + iterate(cpuset, [&](auto num_pu) { hwloc_obj_t pu_obj = hwloc_get_obj_by_type( topo, HWLOC_OBJ_PU, static_cast(num_pu)); + +#if HWLOC_API_VERSION >= 0x00020000 if (pu_obj == nullptr) return; - hwloc_obj_type_t type = HWLOC_OBJ_L1CACHE; - if (level == 2) - type = HWLOC_OBJ_L2CACHE; - else if (level == 3) - type = HWLOC_OBJ_L3CACHE; - else if (level == 4) - type = HWLOC_OBJ_L4CACHE; - else if (level == 5) - type = HWLOC_OBJ_L5CACHE; - hwloc_obj_t cache_obj = hwloc_get_ancestor_obj_by_type(topo, type, pu_obj); if (cache_obj == nullptr) @@ -1525,6 +1542,19 @@ namespace hpx { namespace threads { cache_size += std::size_t(cache_obj->attr->cache.size) / num_set_bits(cache_obj->cpuset); +#else + // traverse up until found the requested cache level + int levels = 0; + for (hwloc_obj_t obj = pu_obj; obj != nullptr && levels < level; + obj = obj->parent) + { + if (obj->type != HWLOC_OBJ_CACHE || ++levels != level) + continue; + + cache_size += std::size_t(obj->attr->cache.size) / + num_set_bits(obj->cpuset); + } +#endif }); hwloc_bitmap_free(cpuset);