From be543b1117b2e75bea0ba42e31746514d1d1659e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Apr 2024 08:16:12 -0700 Subject: [PATCH 01/93] enable k-1 core --- cpp/src/community/k_truss_impl.cuh | 32 +++++++++++++++++------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 3db9fd70de2..fe1122581ba 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -491,6 +491,7 @@ k_truss(raft::handle_t const& handle, std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; + std::optional> wgts{std::nullopt}; if (graph_view.count_self_loops(handle) > edge_t{0}) { auto [srcs, dsts] = extract_transform_e(handle, @@ -524,30 +525,31 @@ k_truss(raft::handle_t const& handle, modified_graph_view = (*modified_graph).view(); } - // FIXME: Investigate k-1 core failure to yield correct results. // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - /* { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; + auto vertex_partition_range_lasts = renumber_map ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) : std::nullopt; + + rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), + handle.get_stream()); + core_number( + handle, cur_graph_view, core_numbers.data(), k_core_degree_type_t::OUT, size_t{2}, size_t{2}); + + raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - rmm::device_uvector d_core_numbers(cur_graph_view.local_vertex_partition_range_size(), - handle.get_stream()); - raft::device_span core_number_span{d_core_numbers.data(), d_core_numbers.size()}; rmm::device_uvector srcs{0, handle.get_stream()}; rmm::device_uvector dsts{0, handle.get_stream()}; - std::tie(srcs, dsts, std::ignore) = + std::tie(srcs, dsts, wgts) = k_core(handle, cur_graph_view, - std::optional>{std::nullopt}, + edge_weight_view, size_t{k - 1}, std::make_optional(k_core_degree_type_t::OUT), - // Seems like the below argument is required. passing a std::nullopt - // create a compiler error std::make_optional(core_number_span)); if constexpr (multi_gpu) { @@ -561,17 +563,17 @@ k_truss(raft::handle_t const& handle, std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, std::ignore, std::ignore, std::ignore, tmp_renumber_map) = + std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = create_graph_from_edgelist( handle, std::nullopt, std::move(srcs), std::move(dsts), - std::nullopt, + std::move(wgts), std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + false); // FIXME: Renumbering should not be hardcoded. modified_graph_view = (*modified_graph).view(); @@ -584,7 +586,6 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } - */ // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -606,7 +607,10 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector srcs(0, handle.get_stream()); rmm::device_uvector dsts(0, handle.get_stream()); - std::optional> wgts{std::nullopt}; + + edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; if (edge_weight_view) { std::tie(srcs, dsts, wgts) = extract_transform_e( handle, From 327a07e7c9b43ba897306a9e1e7762bd0e45baec Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 26 Apr 2024 09:30:29 -0700 Subject: [PATCH 02/93] perform edge triangle count in chunk --- .../community/edge_triangle_count_impl.cuh | 123 +++++++++++------- 1 file changed, 75 insertions(+), 48 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 1370c1a17f2..e83a86f5de2 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -34,8 +34,9 @@ namespace detail { template struct update_edges_p_r_q_r_num_triangles { - size_t num_edges{}; // rename to num_edges + size_t num_edges{}; const edge_t edge_first_or_second{}; + size_t chunk_start{}; raft::device_span intersection_offsets{}; raft::device_span intersection_indices{}; raft::device_span num_triangles{}; @@ -49,26 +50,26 @@ struct update_edges_p_r_q_r_num_triangles { auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); if (edge_first_or_second == 0) { auto p_r_pair = - thrust::make_tuple(thrust::get<0>(*(edge_first + idx)), intersection_indices[i]); + thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), intersection_indices[i]); // Find its position in 'edges' auto itr_p_r_p_q = thrust::lower_bound(thrust::seq, edge_first, - edge_first + num_edges, // pass the number of vertex pairs + edge_first + num_edges, p_r_pair); assert(*itr_p_r_p_q == p_r_pair); idx = thrust::distance(edge_first, itr_p_r_p_q); } else { auto p_r_pair = - thrust::make_tuple(thrust::get<1>(*(edge_first + idx)), intersection_indices[i]); + thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); // Find its position in 'edges' auto itr_p_r_p_q = thrust::lower_bound(thrust::seq, edge_first, - edge_first + num_edges, // pass the number of vertex pairs + edge_first + num_edges, p_r_pair); assert(*itr_p_r_p_q == p_r_pair); idx = thrust::distance(edge_first, itr_p_r_p_q); @@ -89,52 +90,78 @@ std::enable_if_t> edge_triangle_count_im thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - // FIXME: Perform 'nbr_intersection' in chunks to reduce peak memory. - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first, - edge_first + edgelist_srcs.size(), - std::array{true, true}, - false /*FIXME: pass 'do_expensive_check' as argument*/); + auto approx_edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); + auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; + + size_t prev_chunk_size = 0; + auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); - // Update the number of triangles of each (p, q) edges by looking at their intersection - // size - thrust::adjacent_difference(handle.get_thrust_policy(), - intersection_offsets.begin() + 1, - intersection_offsets.end(), - num_triangles.begin()); - - // Given intersection offsets and indices that are used to update the number of - // triangles of (p, q) edges where `r`s are the intersection indices, update - // the number of triangles of the pairs (p, r) and (q, r). - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - update_edges_p_r_q_r_num_triangles{ - edgelist_srcs.size(), - 0, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - raft::device_span(num_triangles.data(), num_triangles.size()), - edge_first}); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - update_edges_p_r_q_r_num_triangles{ - edgelist_srcs.size(), - 1, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - raft::device_span(num_triangles.data(), num_triangles.size()), - edge_first}); + // Need to ensure that the vector has its values initialized to 0 before incrementing + thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); + + for (size_t i = 0; i < num_chunks; ++i) { + + auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, num_edges); + num_edges -= chunk_size; + + // Perform 'nbr_intersection' in chunks to reduce peak memory. + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + graph_view, + cugraph::edge_dummy_property_t{}.view(), + edge_first + prev_chunk_size, + edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + false /*FIXME: pass 'do_expensive_check' as argument*/); + + // Update the number of triangles of each (p, q) edges by looking at their intersection + // size + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chunk_size), + [chunk_start = prev_chunk_size, + num_triangles = + raft::device_span(num_triangles.data(), num_triangles.size()), + intersection_offsets = raft::device_span( + intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { + num_triangles[chunk_start + i] += (intersection_offsets[i + 1] - intersection_offsets[i]); + }); + + // Given intersection offsets and indices that are used to update the number of + // triangles of (p, q) edges where `r`s are the intersection indices, update + // the number of triangles of the pairs (p, r) and (q, r). + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + update_edges_p_r_q_r_num_triangles{ + edgelist_srcs.size(), + 0, + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(num_triangles.data(), num_triangles.size()), + edge_first}); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + update_edges_p_r_q_r_num_triangles{ + edgelist_srcs.size(), + 1, + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(num_triangles.data(), num_triangles.size()), + edge_first}); + + prev_chunk_size += chunk_size; + } return num_triangles; } From 1f00dd6b6d66eab37e99ee2c3317fdeef7a4b8f3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 4 May 2024 03:40:28 -0700 Subject: [PATCH 03/93] add edge triangle count tests --- .../community/edge_triangle_count_test.cpp | 255 ++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 cpp/tests/community/edge_triangle_count_test.cpp diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp new file mode 100644 index 00000000000..85d9f912941 --- /dev/null +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -0,0 +1,255 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/check_utilities.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +struct EdgeTriangleCount_Usecase { + bool test_weighted_{false}; + bool check_correctness_{true}; +}; + +template +class Tests_EdgeTriangleCount : public ::testing::TestWithParam> { + public: + Tests_EdgeTriangleCount() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // FIXME: There is an utility equivalent functor not + // supporting host vectors. + template + struct host_nearly_equal { + const type_t threshold_ratio; + const type_t threshold_magnitude; + + bool operator()(type_t lhs, type_t rhs) const + { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + } + }; + + + template + std::vector + edge_triangle_count_reference(std::vector h_srcs, + std::vector h_dsts) + { + std::vector edge_triangle_counts(h_srcs.size()); + std::uninitialized_fill(edge_triangle_counts.begin(), edge_triangle_counts.end(), 0); + + for (int i = 0; i < h_srcs.size(); ++i) { // edge centric implementation + // for each edge, find the intersection + auto src = h_srcs[i]; + auto dst = h_dsts[i]; + auto it_src_start = std::lower_bound(h_srcs.begin(), h_srcs.end(), src); + auto src_start = std::distance(h_srcs.begin(), it_src_start); + + auto src_end = src_start + std::distance(it_src_start, std::upper_bound(it_src_start, h_srcs.end(), src)); + + auto it_dst_start = std::lower_bound(h_srcs.begin(), h_srcs.end(), dst); + auto dst_start = std::distance(h_srcs.begin(), it_dst_start); + auto dst_end = dst_start + std::distance(it_dst_start, std::upper_bound(it_dst_start, h_srcs.end(), dst)); + + std::set nbr_intersection; + std::set_intersection(h_dsts.begin() + src_start, + h_dsts.begin() + src_end, + h_dsts.begin() + dst_start, + h_dsts.begin() + dst_end, + std::inserter(nbr_intersection, nbr_intersection.end()) + ); + // Find the supporting edges + for(auto v: nbr_intersection){ + auto it_edge = std::lower_bound(h_dsts.begin() + src_start, h_dsts.begin() + src_end, v); + auto idx_edge = std::distance(h_dsts.begin(), it_edge); + edge_triangle_counts[idx_edge] += 1; + + it_edge = std::lower_bound(h_dsts.begin() + dst_start, h_dsts.begin() + dst_end, v); + idx_edge = std::distance(h_dsts.begin(), it_edge); + } + } + + std::transform(edge_triangle_counts.begin(), edge_triangle_counts.end(), edge_triangle_counts.begin(), [](auto count) { + return count * 3; + }); + return std::move(edge_triangle_counts); + } + + + template + void run_current_test(std::tuple const& param) + { + constexpr bool renumber = false; + auto [edge_triangle_count_usecase, input_usecase] = param; + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("SG Construct graph"); + } + + auto [graph, edge_weight, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, edge_triangle_count_usecase.test_weighted_, renumber, true, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto graph_view = graph.view(); + + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> opt_wgt_v{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); + + auto d_edge_triangle_counts = cugraph::edge_triangle_count( + handle, + graph_view, + raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), + raft::device_span(edgelist_dsts.data(), edgelist_dsts.size())); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("EdgeTriangleCount"); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (edge_triangle_count_usecase.check_correctness_) { + std::optional> modified_graph{std::nullopt}; + std::vector h_srcs(edgelist_srcs.size()); + std::vector h_dsts(edgelist_dsts.size()); + std::tie(h_srcs, h_dsts, std::ignore) = cugraph::test::graph_to_host_coo( + handle, + graph_view, + edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::optional>(std::nullopt)); + + auto h_cugraph_edge_triangle_counts = cugraph::test::to_host(handle, d_edge_triangle_counts); + + auto h_reference_edge_triangle_counts = + edge_triangle_count_reference( + h_srcs, h_dsts); + + for (size_t i = 0; i < h_srcs.size(); ++i) { + ASSERT_EQ(h_cugraph_edge_triangle_counts[i], h_reference_edge_triangle_counts[i]) + << "Edge triangle count values do not match with the reference values."; + } + } + + + } +}; + +using Tests_EdgeTriangleCount_File = Tests_EdgeTriangleCount; +using Tests_EdgeTriangleCount_Rmat = Tests_EdgeTriangleCount; + +TEST_P(Tests_EdgeTriangleCount_File, CheckInt32Int32Float) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} +TEST_P(Tests_EdgeTriangleCount_Rmat, CheckInt32Int32Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} +TEST_P(Tests_EdgeTriangleCount_File, CheckInt64Int64Float) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} +TEST_P(Tests_EdgeTriangleCount_Rmat, CheckInt64Int64Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + + +INSTANTIATE_TEST_SUITE_P( + simple_test, + Tests_EdgeTriangleCount_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true} + ), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_EdgeTriangleCount_Rmat, + // enable correctness checks + ::testing::Combine(::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_EdgeTriangleCount_Rmat, + // disable correctness checks for large graphs + // FIXME: High memory footprint. Perform nbr_intersection in chunks. + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() From da330b344f0264c3673d395cff15fe64db1a3332 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 4 May 2024 03:44:41 -0700 Subject: [PATCH 04/93] move edge triangle count to the stable API --- cpp/include/cugraph/algorithms.hpp | 10 + .../community/edge_triangle_count_impl.cuh | 11 +- cpp/src/community/k_truss_impl.cuh | 199 +++++++++--------- cpp/tests/CMakeLists.txt | 5 +- 4 files changed, 121 insertions(+), 104 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 0caa151daac..3681c3417ab 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2007,6 +2007,16 @@ void triangle_count(raft::handle_t const& handle, raft::device_span counts, bool do_expensive_check = false); +//template // FIXME: is store_transposed + +template +rmm::device_uvector edge_triangle_count( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::device_span edgelist_srcs, + raft::device_span edgelist_dsts); + + /* * @brief Compute K-Truss. * diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index e83a86f5de2..1218682dbae 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -90,11 +90,10 @@ std::enable_if_t> edge_triangle_count_im thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - auto approx_edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); + size_t approx_edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; - size_t prev_chunk_size = 0; auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); @@ -103,10 +102,8 @@ std::enable_if_t> edge_triangle_count_im thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, num_edges); num_edges -= chunk_size; - // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, @@ -168,10 +165,10 @@ std::enable_if_t> edge_triangle_count_im } // namespace detail -template +template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - graph_view_t const& graph_view, + graph_view_t const& graph_view, raft::device_span edgelist_srcs, raft::device_span edgelist_dsts) { diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index fe1122581ba..adf3203855c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -39,14 +39,6 @@ namespace cugraph { -// FIXME : This will be deleted once edge_triangle_count becomes public -template -rmm::device_uvector edge_triangle_count( - raft::handle_t const& handle, - graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts); - template struct unroll_edge { size_t num_valid_edges{}; @@ -442,6 +434,7 @@ struct extract_low_to_high_degree_edges_t { template struct generate_p_r_or_q_r_from_p_q { + size_t chunk_start{}; raft::device_span intersection_offsets{}; raft::device_span intersection_indices{}; raft::device_span invalid_srcs{}; @@ -454,10 +447,10 @@ struct generate_p_r_or_q_r_from_p_q { auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); if constexpr (generate_p_r) { - return thrust::make_tuple(invalid_srcs[idx], intersection_indices[i]); + return thrust::make_tuple(invalid_srcs[chunk_start + idx], intersection_indices[i]); } else { - return thrust::make_tuple(invalid_dsts[idx], intersection_indices[i]); + return thrust::make_tuple(invalid_dsts[chunk_start + idx], intersection_indices[i]); } } }; @@ -682,7 +675,7 @@ k_truss(raft::handle_t const& handle, std::optional>{std::nullopt}, std::optional>(std::nullopt)); - auto num_triangles = edge_triangle_count( + auto num_triangles = edge_triangle_count( handle, cur_graph_view, raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), @@ -730,92 +723,108 @@ k_truss(raft::handle_t const& handle, // nbr_intersection requires the edges to be sort by 'src' // sort the invalid edges by src for nbr intersection - thrust::sort_by_key(handle.get_thrust_policy(), - edge_first + num_valid_edges, - edge_first + edgelist_srcs.size(), - num_triangles.begin() + num_valid_edges); - - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + num_valid_edges, - edge_first + edgelist_srcs.size(), - std::array{true, true}, - do_expensive_check); - - // Update the number of triangles of each (p, q) edges by looking at their intersection - // size. - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_invalid_edges), - [num_triangles = - raft::device_span(num_triangles.data() + num_valid_edges, num_invalid_edges), - intersection_offsets = raft::device_span( - intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { - num_triangles[i] -= intersection_offsets[i + 1] - intersection_offsets[i]; + size_t approx_edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + + size_t prev_chunk_size = 0; + size_t chunk_num_invalid_edges = num_invalid_edges; + auto num_chunks = ((num_invalid_edges % approx_edges_to_intersect_per_iteration) == 0) ? (num_invalid_edges / approx_edges_to_intersect_per_iteration) : (num_invalid_edges / approx_edges_to_intersect_per_iteration) + 1; + + for (size_t i = 0; i < num_chunks; ++i) { + + auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, chunk_num_invalid_edges); + thrust::sort_by_key(handle.get_thrust_policy(), + edge_first + num_valid_edges, + edge_first + edgelist_srcs.size(), + num_triangles.begin() + num_valid_edges); + + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + edge_first + num_valid_edges + prev_chunk_size, + edge_first + num_valid_edges + prev_chunk_size + chunk_size, + std::array{true, true}, + do_expensive_check); + + + // Update the number of triangles of each (p, q) edges by looking at their intersection + // size. + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chunk_size), + [chunk_start = prev_chunk_size, + num_triangles = + raft::device_span(num_triangles.data() + num_valid_edges, num_invalid_edges), + intersection_offsets = raft::device_span( + intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { + num_triangles[chunk_start + i] -= (intersection_offsets[i + 1] - intersection_offsets[i]); }); - - // FIXME: Find a way to not have to maintain a dataframe_buffer - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); - - // Unrolling the edges require the edges to be sorted by destination - // re-sort the invalid edges by 'dst' - thrust::sort_by_key(handle.get_thrust_policy(), + + // FIXME: Find a way to not have to maintain a dataframe_buffer + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), + raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), + raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); + + // Unrolling the edges require the edges to be sorted by destination + // re-sort the invalid edges by 'dst' + thrust::sort_by_key(handle.get_thrust_policy(), + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size(), + num_triangles.begin() + num_valid_edges); + + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span(num_triangles.data(), num_triangles.size()), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + transposed_edge_first, transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size(), - num_triangles.begin() + num_valid_edges); - - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - + transposed_edge_first + edgelist_srcs.size()}); + + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span(num_triangles.data(), num_triangles.size()), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + transposed_edge_first, + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size()}); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } // case 2: unroll (q, r) // For each (q, r) edges to unroll, find the incoming edges to 'r' let's say from 'p' and // create the pair (p, q) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2dcda796f9c..d65eef3c5ae 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -483,8 +483,9 @@ ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) ################################################################################################### -# - Triangle Count tests -------------------------------------------------------------------------- -ConfigureTest(TRIANGLE_COUNT_TEST community/triangle_count_test.cpp) +# - Edge Triangle Count tests -------------------------------------------------------------------------- +ConfigureTest(EDGE_TRIANGLE_COUNT_TEST community/edge_triangle_count_test.cpp) + ################################################################################################### # - K-hop Neighbors tests ------------------------------------------------------------------------- From 913283d45c8a697855b20a5c730149870ea24a6a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 4 May 2024 03:52:09 -0700 Subject: [PATCH 05/93] fix style --- cpp/include/cugraph/algorithms.hpp | 2 - .../community/edge_triangle_count_impl.cuh | 61 ++++----- cpp/src/community/k_truss_impl.cuh | 124 +++++++++--------- .../community/edge_triangle_count_test.cpp | 96 +++++++------- 4 files changed, 139 insertions(+), 144 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 3681c3417ab..3b5824bc5ff 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2007,7 +2007,6 @@ void triangle_count(raft::handle_t const& handle, raft::device_span counts, bool do_expensive_check = false); -//template // FIXME: is store_transposed template rmm::device_uvector edge_triangle_count( @@ -2016,7 +2015,6 @@ rmm::device_uvector edge_triangle_count( raft::device_span edgelist_srcs, raft::device_span edgelist_dsts); - /* * @brief Compute K-Truss. * diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 1218682dbae..06b0f61e1a0 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -49,28 +49,22 @@ struct update_edges_p_r_q_r_num_triangles { thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); if (edge_first_or_second == 0) { - auto p_r_pair = - thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + auto p_r_pair = thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), + intersection_indices[i]); // Find its position in 'edges' auto itr_p_r_p_q = - thrust::lower_bound(thrust::seq, - edge_first, - edge_first + num_edges, - p_r_pair); + thrust::lower_bound(thrust::seq, edge_first, edge_first + num_edges, p_r_pair); assert(*itr_p_r_p_q == p_r_pair); idx = thrust::distance(edge_first, itr_p_r_p_q); } else { - auto p_r_pair = - thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + auto p_r_pair = thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), + intersection_indices[i]); // Find its position in 'edges' auto itr_p_r_p_q = - thrust::lower_bound(thrust::seq, - edge_first, - edge_first + num_edges, - p_r_pair); + thrust::lower_bound(thrust::seq, edge_first, edge_first + num_edges, p_r_pair); assert(*itr_p_r_p_q == p_r_pair); idx = thrust::distance(edge_first, itr_p_r_p_q); } @@ -90,12 +84,14 @@ std::enable_if_t> edge_triangle_count_im thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - size_t approx_edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + size_t approx_edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; + auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) + ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; size_t prev_chunk_size = 0; - auto num_edges = edgelist_srcs.size(); + auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); // Need to ensure that the vector has its values initialized to 0 before incrementing @@ -107,26 +103,25 @@ std::enable_if_t> edge_triangle_count_im // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, - graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + prev_chunk_size, - edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - false /*FIXME: pass 'do_expensive_check' as argument*/); + graph_view, + cugraph::edge_dummy_property_t{}.view(), + edge_first + prev_chunk_size, + edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + false /*FIXME: pass 'do_expensive_check' as argument*/); // Update the number of triangles of each (p, q) edges by looking at their intersection // size thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(chunk_size), - [chunk_start = prev_chunk_size, - num_triangles = - raft::device_span(num_triangles.data(), num_triangles.size()), - intersection_offsets = raft::device_span( - intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { - num_triangles[chunk_start + i] += (intersection_offsets[i + 1] - intersection_offsets[i]); - }); + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(chunk_size), + [chunk_start = prev_chunk_size, + num_triangles = raft::device_span(num_triangles.data(), num_triangles.size()), + intersection_offsets = raft::device_span( + intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { + num_triangles[chunk_start + i] += (intersection_offsets[i + 1] - intersection_offsets[i]); + }); // Given intersection offsets and indices that are used to update the number of // triangles of (p, q) edges where `r`s are the intersection indices, update @@ -156,7 +151,7 @@ std::enable_if_t> edge_triangle_count_im raft::device_span(intersection_indices.data(), intersection_indices.size()), raft::device_span(num_triangles.data(), num_triangles.size()), edge_first}); - + prev_chunk_size += chunk_size; } diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index adf3203855c..86b3c27cc4d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -526,7 +526,7 @@ k_truss(raft::handle_t const& handle, renumber_map ? std::make_optional>(cur_graph_view.vertex_partition_range_lasts()) : std::nullopt; - + rmm::device_uvector core_numbers(cur_graph_view.number_of_vertices(), handle.get_stream()); core_number( @@ -534,16 +534,14 @@ k_truss(raft::handle_t const& handle, raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - rmm::device_uvector srcs{0, handle.get_stream()}; rmm::device_uvector dsts{0, handle.get_stream()}; - std::tie(srcs, dsts, wgts) = - k_core(handle, - cur_graph_view, - edge_weight_view, - size_t{k - 1}, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); + std::tie(srcs, dsts, wgts) = k_core(handle, + cur_graph_view, + edge_weight_view, + size_t{k - 1}, + std::make_optional(k_core_degree_type_t::OUT), + std::make_optional(core_number_span)); if constexpr (multi_gpu) { std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = @@ -566,7 +564,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); // FIXME: Renumbering should not be hardcoded. + false); // FIXME: Renumbering should not be hardcoded. modified_graph_view = (*modified_graph).view(); @@ -725,28 +723,29 @@ k_truss(raft::handle_t const& handle, // sort the invalid edges by src for nbr intersection size_t approx_edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - size_t prev_chunk_size = 0; + + size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = num_invalid_edges; - auto num_chunks = ((num_invalid_edges % approx_edges_to_intersect_per_iteration) == 0) ? (num_invalid_edges / approx_edges_to_intersect_per_iteration) : (num_invalid_edges / approx_edges_to_intersect_per_iteration) + 1; + auto num_chunks = ((num_invalid_edges % approx_edges_to_intersect_per_iteration) == 0) + ? (num_invalid_edges / approx_edges_to_intersect_per_iteration) + : (num_invalid_edges / approx_edges_to_intersect_per_iteration) + 1; for (size_t i = 0; i < num_chunks; ++i) { - - auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto chunk_size = + std::min(approx_edges_to_intersect_per_iteration, chunk_num_invalid_edges); thrust::sort_by_key(handle.get_thrust_policy(), edge_first + num_valid_edges, edge_first + edgelist_srcs.size(), - num_triangles.begin() + num_valid_edges); - + num_triangles.begin() + num_valid_edges); + auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + num_valid_edges + prev_chunk_size, - edge_first + num_valid_edges + prev_chunk_size + chunk_size, - std::array{true, true}, - do_expensive_check); - + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + edge_first + num_valid_edges + prev_chunk_size, + edge_first + num_valid_edges + prev_chunk_size + chunk_size, + std::array{true, true}, + do_expensive_check); // Update the number of triangles of each (p, q) edges by looking at their intersection // size. @@ -756,42 +755,47 @@ k_truss(raft::handle_t const& handle, thrust::make_counting_iterator(chunk_size), [chunk_start = prev_chunk_size, num_triangles = - raft::device_span(num_triangles.data() + num_valid_edges, num_invalid_edges), - intersection_offsets = raft::device_span( - intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { - num_triangles[chunk_start + i] -= (intersection_offsets[i + 1] - intersection_offsets[i]); - }); - + raft::device_span(num_triangles.data() + num_valid_edges, num_invalid_edges), + intersection_offsets = raft::device_span( + intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { + num_triangles[chunk_start + i] -= + (intersection_offsets[i + 1] - intersection_offsets[i]); + }); + // FIXME: Find a way to not have to maintain a dataframe_buffer auto vertex_pair_buffer_p_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), generate_p_r_or_q_r_from_p_q{ prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), raft::device_span(intersection_indices.data(), intersection_indices.size()), raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); + raft::device_span(edgelist_dsts.data() + num_valid_edges, + num_invalid_edges)}); auto vertex_pair_buffer_q_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), generate_p_r_or_q_r_from_p_q{ prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), raft::device_span(intersection_indices.data(), intersection_indices.size()), raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)}); + raft::device_span(edgelist_dsts.data() + num_valid_edges, + num_invalid_edges)}); // Unrolling the edges require the edges to be sorted by destination // re-sort the invalid edges by 'dst' @@ -799,32 +803,32 @@ k_truss(raft::handle_t const& handle, transposed_edge_first + num_valid_edges, transposed_edge_first + edgelist_srcs.size(), num_triangles.begin() + num_valid_edges); - + thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span(num_triangles.data(), num_triangles.size()), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + transposed_edge_first, + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size()}); thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span(num_triangles.data(), num_triangles.size()), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + transposed_edge_first, + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size()}); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } // case 2: unroll (q, r) // For each (q, r) edges to unroll, find the incoming edges to 'r' let's say from 'p' and // create the pair (p, q) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 85d9f912941..864d855753d 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -45,7 +45,8 @@ struct EdgeTriangleCount_Usecase { }; template -class Tests_EdgeTriangleCount : public ::testing::TestWithParam> { +class Tests_EdgeTriangleCount + : public ::testing::TestWithParam> { public: Tests_EdgeTriangleCount() {} @@ -69,57 +70,57 @@ class Tests_EdgeTriangleCount : public ::testing::TestWithParam - std::vector - edge_triangle_count_reference(std::vector h_srcs, - std::vector h_dsts) + std::vector edge_triangle_count_reference(std::vector h_srcs, + std::vector h_dsts) { std::vector edge_triangle_counts(h_srcs.size()); std::uninitialized_fill(edge_triangle_counts.begin(), edge_triangle_counts.end(), 0); - - for (int i = 0; i < h_srcs.size(); ++i) { // edge centric implementation + + for (int i = 0; i < h_srcs.size(); ++i) { // edge centric implementation // for each edge, find the intersection - auto src = h_srcs[i]; - auto dst = h_dsts[i]; + auto src = h_srcs[i]; + auto dst = h_dsts[i]; auto it_src_start = std::lower_bound(h_srcs.begin(), h_srcs.end(), src); - auto src_start = std::distance(h_srcs.begin(), it_src_start); - - auto src_end = src_start + std::distance(it_src_start, std::upper_bound(it_src_start, h_srcs.end(), src)); + auto src_start = std::distance(h_srcs.begin(), it_src_start); + + auto src_end = + src_start + std::distance(it_src_start, std::upper_bound(it_src_start, h_srcs.end(), src)); auto it_dst_start = std::lower_bound(h_srcs.begin(), h_srcs.end(), dst); - auto dst_start = std::distance(h_srcs.begin(), it_dst_start); - auto dst_end = dst_start + std::distance(it_dst_start, std::upper_bound(it_dst_start, h_srcs.end(), dst)); + auto dst_start = std::distance(h_srcs.begin(), it_dst_start); + auto dst_end = + dst_start + std::distance(it_dst_start, std::upper_bound(it_dst_start, h_srcs.end(), dst)); - std::set nbr_intersection; + std::set nbr_intersection; std::set_intersection(h_dsts.begin() + src_start, h_dsts.begin() + src_end, h_dsts.begin() + dst_start, h_dsts.begin() + dst_end, - std::inserter(nbr_intersection, nbr_intersection.end()) - ); + std::inserter(nbr_intersection, nbr_intersection.end())); // Find the supporting edges - for(auto v: nbr_intersection){ - auto it_edge = std::lower_bound(h_dsts.begin() + src_start, h_dsts.begin() + src_end, v); + for (auto v : nbr_intersection) { + auto it_edge = std::lower_bound(h_dsts.begin() + src_start, h_dsts.begin() + src_end, v); auto idx_edge = std::distance(h_dsts.begin(), it_edge); edge_triangle_counts[idx_edge] += 1; - it_edge = std::lower_bound(h_dsts.begin() + dst_start, h_dsts.begin() + dst_end, v); + it_edge = std::lower_bound(h_dsts.begin() + dst_start, h_dsts.begin() + dst_end, v); idx_edge = std::distance(h_dsts.begin(), it_edge); } } - std::transform(edge_triangle_counts.begin(), edge_triangle_counts.end(), edge_triangle_counts.begin(), [](auto count) { - return count * 3; - }); + std::transform(edge_triangle_counts.begin(), + edge_triangle_counts.end(), + edge_triangle_counts.begin(), + [](auto count) { return count * 3; }); return std::move(edge_triangle_counts); } - template - void run_current_test(std::tuple const& param) + void run_current_test( + std::tuple const& param) { - constexpr bool renumber = false; + constexpr bool renumber = false; auto [edge_triangle_count_usecase, input_usecase] = param; raft::handle_t handle{}; HighResTimer hr_timer{}; @@ -143,15 +144,16 @@ class Tests_EdgeTriangleCount : public ::testing::TestWithParam edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::optional> opt_wgt_v{std::nullopt}; + std::optional> opt_wgt_v{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + handle, + graph_view, + edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}); - std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = cugraph::decompress_to_edgelist( - handle, - graph_view, - edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}); - auto d_edge_triangle_counts = cugraph::edge_triangle_count( handle, graph_view, @@ -182,16 +184,13 @@ class Tests_EdgeTriangleCount : public ::testing::TestWithParam( - h_srcs, h_dsts); - + edge_triangle_count_reference(h_srcs, h_dsts); + for (size_t i = 0; i < h_srcs.size(); ++i) { ASSERT_EQ(h_cugraph_edge_triangle_counts[i], h_reference_edge_triangle_counts[i]) << "Edge triangle count values do not match with the reference values."; } } - - } }; @@ -219,25 +218,24 @@ TEST_P(Tests_EdgeTriangleCount_Rmat, CheckInt64Int64Float) override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } - INSTANTIATE_TEST_SUITE_P( simple_test, Tests_EdgeTriangleCount_File, ::testing::Combine( // enable correctness checks ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true} - ), + EdgeTriangleCount_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_EdgeTriangleCount_Rmat, - // enable correctness checks - ::testing::Combine(::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_EdgeTriangleCount_Rmat, + // enable correctness checks + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with From 9d2d4f7feafa6de28cbf4987b4d4e4dc97bca26d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 5 May 2024 01:16:27 -0700 Subject: [PATCH 06/93] update function definition --- cpp/include/cugraph/algorithms.hpp | 4 +--- .../community/edge_triangle_count_impl.cuh | 20 ++++++++++++------- cpp/src/community/edge_triangle_count_sg.cu | 12 +++-------- cpp/src/community/k_truss_impl.cuh | 4 +--- .../community/edge_triangle_count_test.cpp | 6 ++---- 5 files changed, 20 insertions(+), 26 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 3b5824bc5ff..497904d9025 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2011,9 +2011,7 @@ void triangle_count(raft::handle_t const& handle, template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts); + graph_view_t const& graph_view); /* * @brief Compute K-Truss. diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 06b0f61e1a0..31aef6852d5 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -76,10 +76,18 @@ struct update_edges_p_r_q_r_num_triangles { template std::enable_if_t> edge_triangle_count_impl( raft::handle_t const& handle, - graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts) + graph_view_t const& graph_view) { + using weight_t = float; + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = decompress_to_edgelist( + handle, + graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); + auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); @@ -163,11 +171,9 @@ std::enable_if_t> edge_triangle_count_im template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts) + graph_view_t const& graph_view) { - return detail::edge_triangle_count_impl(handle, graph_view, edgelist_srcs, edgelist_dsts); + return detail::edge_triangle_count_impl(handle, graph_view); } } // namespace cugraph diff --git a/cpp/src/community/edge_triangle_count_sg.cu b/cpp/src/community/edge_triangle_count_sg.cu index c4b7e71b967..4257f61bb93 100644 --- a/cpp/src/community/edge_triangle_count_sg.cu +++ b/cpp/src/community/edge_triangle_count_sg.cu @@ -20,20 +20,14 @@ namespace cugraph { // SG instantiation template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts); + cugraph::graph_view_t const& graph_view); template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts); + cugraph::graph_view_t const& graph_view); template rmm::device_uvector edge_triangle_count( raft::handle_t const& handle, - cugraph::graph_view_t const& graph_view, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts); + cugraph::graph_view_t const& graph_view); } // namespace cugraph diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 86b3c27cc4d..8fadb11a054 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -675,9 +675,7 @@ k_truss(raft::handle_t const& handle, auto num_triangles = edge_triangle_count( handle, - cur_graph_view, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size())); + cur_graph_view); auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 864d855753d..83a5fca5159 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -152,13 +152,11 @@ class Tests_EdgeTriangleCount graph_view, edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, std::optional>{std::nullopt}, - std::optional>{std::nullopt}); + std::optional>{std::nullopt}); // FIXME: No longer needed auto d_edge_triangle_counts = cugraph::edge_triangle_count( handle, - graph_view, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size())); + graph_view); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement From 2019d99c2e69f5d5395f06004de74ee43a049320 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 5 May 2024 01:17:19 -0700 Subject: [PATCH 07/93] fix style --- cpp/include/cugraph/algorithms.hpp | 4 +--- cpp/src/community/edge_triangle_count_impl.cuh | 15 +++++++-------- cpp/src/community/k_truss_impl.cuh | 4 +--- cpp/tests/community/edge_triangle_count_test.cpp | 7 +++---- 4 files changed, 12 insertions(+), 18 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 497904d9025..4618f017ecc 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2007,11 +2007,9 @@ void triangle_count(raft::handle_t const& handle, raft::device_span counts, bool do_expensive_check = false); - template rmm::device_uvector edge_triangle_count( - raft::handle_t const& handle, - graph_view_t const& graph_view); + raft::handle_t const& handle, graph_view_t const& graph_view); /* * @brief Compute K-Truss. diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 31aef6852d5..6fdfb874061 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -82,12 +82,12 @@ std::enable_if_t> edge_triangle_count_im rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - + handle, + graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); + auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); @@ -170,8 +170,7 @@ std::enable_if_t> edge_triangle_count_im template rmm::device_uvector edge_triangle_count( - raft::handle_t const& handle, - graph_view_t const& graph_view) + raft::handle_t const& handle, graph_view_t const& graph_view) { return detail::edge_triangle_count_impl(handle, graph_view); } diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 8fadb11a054..87184e4da31 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -673,9 +673,7 @@ k_truss(raft::handle_t const& handle, std::optional>{std::nullopt}, std::optional>(std::nullopt)); - auto num_triangles = edge_triangle_count( - handle, - cur_graph_view); + auto num_triangles = edge_triangle_count(handle, cur_graph_view); auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 83a5fca5159..6af49b7e350 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -152,11 +152,10 @@ class Tests_EdgeTriangleCount graph_view, edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, std::optional>{std::nullopt}, - std::optional>{std::nullopt}); // FIXME: No longer needed + std::optional>{std::nullopt}); // FIXME: No longer needed - auto d_edge_triangle_counts = cugraph::edge_triangle_count( - handle, - graph_view); + auto d_edge_triangle_counts = + cugraph::edge_triangle_count(handle, graph_view); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement From 701e33d6c09c4f1fc4940cb4c523b4576b558734 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 5 May 2024 06:37:52 -0700 Subject: [PATCH 08/93] update test --- cpp/tests/community/edge_triangle_count_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 6af49b7e350..a76000ab172 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -245,6 +245,6 @@ INSTANTIATE_TEST_SUITE_P( // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( ::testing::Values(EdgeTriangleCount_Usecase{false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(cugraph::test::Rmat_Usecase(18, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From 0d742464fe938be211acc38f7cc77d500abe43f8 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 7 May 2024 07:10:22 -0700 Subject: [PATCH 09/93] return edge_property_t --- cpp/include/cugraph/algorithms.hpp | 9 ++++- .../community/edge_triangle_count_impl.cuh | 32 +++++++++++++++-- cpp/src/community/edge_triangle_count_sg.cu | 9 +++-- cpp/src/community/k_truss_impl.cuh | 35 ++++++++++--------- .../community/edge_triangle_count_test.cpp | 20 +++++------ 5 files changed, 71 insertions(+), 34 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 4618f017ecc..76c520c14c3 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2006,9 +2006,16 @@ void triangle_count(raft::handle_t const& handle, std::optional> vertices, raft::device_span counts, bool do_expensive_check = false); +/* +template +edge_property_t, edge_t> edge_triangle_count( + raft::handle_t const& handle, graph_view_t const& graph_view); +*/ + template -rmm::device_uvector edge_triangle_count( +edge_property_t, edge_t> +edge_triangle_count( raft::handle_t const& handle, graph_view_t const& graph_view); /* diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 6fdfb874061..28db2a4ded9 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -18,6 +18,8 @@ #include "detail/graph_partition_utils.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" +#include "prims/transform_e.cuh" +#include "prims/edge_bucket.cuh" #include #include @@ -74,7 +76,7 @@ struct update_edges_p_r_q_r_num_triangles { }; template -std::enable_if_t> edge_triangle_count_impl( +std::enable_if_t, edge_t>> edge_triangle_count_impl( raft::handle_t const& handle, graph_view_t const& graph_view) { @@ -163,13 +165,37 @@ std::enable_if_t> edge_triangle_count_im prev_chunk_size += chunk_size; } - return num_triangles; + std::vector> buffer{}; + buffer.push_back(std::move(num_triangles)); + buffer.reserve(num_triangles.size()); + buffer.push_back(std::move(num_triangles)); + + auto buff_counts = + edge_property_t, edge_t>( + std::move(buffer)); + + cugraph::edge_property_t, edge_t> counts(handle, graph_view); + + cugraph::transform_e( + handle, + graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + buff_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count; + }, + counts.mutable_view(), + false); + + return counts; } } // namespace detail template -rmm::device_uvector edge_triangle_count( +edge_property_t, edge_t> +edge_triangle_count( raft::handle_t const& handle, graph_view_t const& graph_view) { return detail::edge_triangle_count_impl(handle, graph_view); diff --git a/cpp/src/community/edge_triangle_count_sg.cu b/cpp/src/community/edge_triangle_count_sg.cu index 4257f61bb93..b5edbb677e6 100644 --- a/cpp/src/community/edge_triangle_count_sg.cu +++ b/cpp/src/community/edge_triangle_count_sg.cu @@ -18,15 +18,18 @@ namespace cugraph { // SG instantiation -template rmm::device_uvector edge_triangle_count( +template edge_property_t, int32_t> +edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); -template rmm::device_uvector edge_triangle_count( +template edge_property_t, int64_t> +edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); -template rmm::device_uvector edge_triangle_count( +template edge_property_t, int64_t> +edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 87184e4da31..597c270ab2c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -661,32 +661,33 @@ k_truss(raft::handle_t const& handle, auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); + std::optional> num_triangles{std::nullopt}; std::optional> edgelist_wgts{std::nullopt}; edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore) = decompress_to_edgelist( + + auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles) = decompress_to_edgelist( handle, cur_graph_view, edge_weight_view, - std::optional>{std::nullopt}, + std::make_optional(prop_num_triangles.view()), std::optional>(std::nullopt)); - - auto num_triangles = edge_triangle_count(handle, cur_graph_view); - auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto transposed_edge_triangle_count_pair_first = - thrust::make_zip_iterator(transposed_edge_first, num_triangles.begin()); + thrust::make_zip_iterator(transposed_edge_first, (*num_triangles).begin()); thrust::sort_by_key(handle.get_thrust_policy(), transposed_edge_first, transposed_edge_first + edgelist_srcs.size(), - num_triangles.begin()); + (*num_triangles).begin()); cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); @@ -732,7 +733,7 @@ k_truss(raft::handle_t const& handle, thrust::sort_by_key(handle.get_thrust_policy(), edge_first + num_valid_edges, edge_first + edgelist_srcs.size(), - num_triangles.begin() + num_valid_edges); + (*num_triangles).begin() + num_valid_edges); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, @@ -751,7 +752,7 @@ k_truss(raft::handle_t const& handle, thrust::make_counting_iterator(chunk_size), [chunk_start = prev_chunk_size, num_triangles = - raft::device_span(num_triangles.data() + num_valid_edges, num_invalid_edges), + raft::device_span((*num_triangles).data() + num_valid_edges, num_invalid_edges), intersection_offsets = raft::device_span( intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { num_triangles[chunk_start + i] -= @@ -798,14 +799,14 @@ k_truss(raft::handle_t const& handle, thrust::sort_by_key(handle.get_thrust_policy(), transposed_edge_first + num_valid_edges, transposed_edge_first + edgelist_srcs.size(), - num_triangles.begin() + num_valid_edges); + (*num_triangles).begin() + num_valid_edges); thrust::for_each(handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(intersection_indices.size()), unroll_edge{ num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), + raft::device_span((*num_triangles).data(), (*num_triangles).size()), get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), transposed_edge_first, transposed_edge_first + num_valid_edges, @@ -816,7 +817,7 @@ k_truss(raft::handle_t const& handle, thrust::make_counting_iterator(intersection_indices.size()), unroll_edge{ num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), + raft::device_span((*num_triangles).data(), (*num_triangles).size()), get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), transposed_edge_first, transposed_edge_first + num_valid_edges, @@ -835,7 +836,7 @@ k_truss(raft::handle_t const& handle, num_valid_edges, raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span(num_triangles.data(), num_triangles.size())); + raft::device_span((*num_triangles).data(), (*num_triangles).size())); // case 3: unroll (p, r) cugraph::unroll_p_r_or_q_r_edges( @@ -845,14 +846,14 @@ k_truss(raft::handle_t const& handle, num_valid_edges, raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span(num_triangles.data(), num_triangles.size())); + raft::device_span((*num_triangles).data(), (*num_triangles).size())); // Remove edges that have a triangle count of zero. Those should not be accounted // for during the unroling phase. auto edges_with_triangle_last = thrust::stable_partition(handle.get_thrust_policy(), transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + num_triangles.size(), + transposed_edge_triangle_count_pair_first + (*num_triangles).size(), [] __device__(auto e) { auto num_triangles = thrust::get<1>(e); return num_triangles > 0; @@ -904,7 +905,7 @@ k_truss(raft::handle_t const& handle, edgelist_srcs.resize(num_edges_with_triangles, handle.get_stream()); edgelist_dsts.resize(num_edges_with_triangles, handle.get_stream()); - num_triangles.resize(num_edges_with_triangles, handle.get_stream()); + (*num_triangles).resize(num_edges_with_triangles, handle.get_stream()); } std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore) = decompress_to_edgelist( @@ -920,7 +921,7 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index a76000ab172..4f83ce22e02 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -144,19 +144,19 @@ class Tests_EdgeTriangleCount rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::optional> opt_wgt_v{std::nullopt}; + std::optional> d_edge_triangle_counts{std::nullopt}; + + auto d_cugraph_results = + cugraph::edge_triangle_count(handle, graph_view); - std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, d_edge_triangle_counts) = cugraph::decompress_to_edgelist( handle, graph_view, - edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, - std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional(d_cugraph_results.view()), std::optional>{std::nullopt}); // FIXME: No longer needed - - auto d_edge_triangle_counts = - cugraph::edge_triangle_count(handle, graph_view); - + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.start("EdgeTriangleCount"); @@ -178,7 +178,7 @@ class Tests_EdgeTriangleCount edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, std::optional>(std::nullopt)); - auto h_cugraph_edge_triangle_counts = cugraph::test::to_host(handle, d_edge_triangle_counts); + auto h_cugraph_edge_triangle_counts = cugraph::test::to_host(handle, *d_edge_triangle_counts); auto h_reference_edge_triangle_counts = edge_triangle_count_reference(h_srcs, h_dsts); @@ -245,6 +245,6 @@ INSTANTIATE_TEST_SUITE_P( // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( ::testing::Values(EdgeTriangleCount_Usecase{false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(18, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(cugraph::test::Rmat_Usecase(16, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From c103e52db125c02a15c2daf8c9ec6a0868953aa7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 7 May 2024 07:11:20 -0700 Subject: [PATCH 10/93] fix style --- cpp/include/cugraph/algorithms.hpp | 4 +- .../community/edge_triangle_count_impl.cuh | 37 ++++----- cpp/src/community/edge_triangle_count_sg.cu | 9 +-- cpp/src/community/k_truss_impl.cuh | 80 ++++++++++--------- .../community/edge_triangle_count_test.cpp | 4 +- 5 files changed, 66 insertions(+), 68 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 76c520c14c3..8dfddb401e1 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2012,10 +2012,8 @@ edge_property_t, edge_t> edge_t raft::handle_t const& handle, graph_view_t const& graph_view); */ - template -edge_property_t, edge_t> -edge_triangle_count( +edge_property_t, edge_t> edge_triangle_count( raft::handle_t const& handle, graph_view_t const& graph_view); /* diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 28db2a4ded9..dbe2208d254 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -17,9 +17,9 @@ #pragma once #include "detail/graph_partition_utils.cuh" -#include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" -#include "prims/transform_e.cuh" #include "prims/edge_bucket.cuh" +#include "prims/transform_e.cuh" +#include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include #include @@ -76,7 +76,9 @@ struct update_edges_p_r_q_r_num_triangles { }; template -std::enable_if_t, edge_t>> edge_triangle_count_impl( +std::enable_if_t, edge_t>> +edge_triangle_count_impl( raft::handle_t const& handle, graph_view_t const& graph_view) { @@ -171,22 +173,22 @@ std::enable_if_t, edge_t>( - std::move(buffer)); + edge_property_t, edge_t>(std::move(buffer)); - cugraph::edge_property_t, edge_t> counts(handle, graph_view); + cugraph::edge_property_t, edge_t> counts( + handle, graph_view); cugraph::transform_e( - handle, - graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - buff_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count; - }, - counts.mutable_view(), - false); + handle, + graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + buff_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count; + }, + counts.mutable_view(), + false); return counts; } @@ -194,8 +196,7 @@ std::enable_if_t -edge_property_t, edge_t> -edge_triangle_count( +edge_property_t, edge_t> edge_triangle_count( raft::handle_t const& handle, graph_view_t const& graph_view) { return detail::edge_triangle_count_impl(handle, graph_view); diff --git a/cpp/src/community/edge_triangle_count_sg.cu b/cpp/src/community/edge_triangle_count_sg.cu index b5edbb677e6..4ccb968458d 100644 --- a/cpp/src/community/edge_triangle_count_sg.cu +++ b/cpp/src/community/edge_triangle_count_sg.cu @@ -18,18 +18,15 @@ namespace cugraph { // SG instantiation -template edge_property_t, int32_t> -edge_triangle_count( +template edge_property_t, int32_t> edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); -template edge_property_t, int64_t> -edge_triangle_count( +template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); -template edge_property_t, int64_t> -edge_triangle_count( +template edge_property_t, int64_t> edge_triangle_count( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view); diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 597c270ab2c..be400c0afd7 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -670,12 +670,12 @@ k_truss(raft::handle_t const& handle, auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles) = decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view, - std::make_optional(prop_num_triangles.view()), - std::optional>(std::nullopt)); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles) = + decompress_to_edgelist(handle, + cur_graph_view, + edge_weight_view, + std::make_optional(prop_num_triangles.view()), + std::optional>(std::nullopt)); auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); @@ -750,9 +750,9 @@ k_truss(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(chunk_size), - [chunk_start = prev_chunk_size, - num_triangles = - raft::device_span((*num_triangles).data() + num_valid_edges, num_invalid_edges), + [chunk_start = prev_chunk_size, + num_triangles = raft::device_span((*num_triangles).data() + num_valid_edges, + num_invalid_edges), intersection_offsets = raft::device_span( intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { num_triangles[chunk_start + i] -= @@ -801,27 +801,29 @@ k_truss(raft::handle_t const& handle, transposed_edge_first + edgelist_srcs.size(), (*num_triangles).begin() + num_valid_edges); - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - thrust::for_each(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span((*num_triangles).data(), (*num_triangles).size()), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + transposed_edge_first, + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size()}); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + unroll_edge{ + num_valid_edges, + raft::device_span((*num_triangles).data(), (*num_triangles).size()), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + transposed_edge_first, + transposed_edge_first + num_valid_edges, + transposed_edge_first + edgelist_srcs.size()}); prev_chunk_size += chunk_size; chunk_num_invalid_edges -= chunk_size; @@ -850,14 +852,14 @@ k_truss(raft::handle_t const& handle, // Remove edges that have a triangle count of zero. Those should not be accounted // for during the unroling phase. - auto edges_with_triangle_last = - thrust::stable_partition(handle.get_thrust_policy(), - transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + (*num_triangles).size(), - [] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles > 0; - }); + auto edges_with_triangle_last = thrust::stable_partition( + handle.get_thrust_policy(), + transposed_edge_triangle_count_pair_first, + transposed_edge_triangle_count_pair_first + (*num_triangles).size(), + [] __device__(auto e) { + auto num_triangles = thrust::get<1>(e); + return num_triangles > 0; + }); auto num_edges_with_triangles = static_cast( thrust::distance(transposed_edge_triangle_count_pair_first, edges_with_triangle_last)); @@ -921,7 +923,7 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_dsts), std::move(edgelist_wgts), false); - + return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 4f83ce22e02..5fe5474e83a 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -145,7 +145,7 @@ class Tests_EdgeTriangleCount rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> d_edge_triangle_counts{std::nullopt}; - + auto d_cugraph_results = cugraph::edge_triangle_count(handle, graph_view); @@ -156,7 +156,7 @@ class Tests_EdgeTriangleCount std::optional>{std::nullopt}, std::make_optional(d_cugraph_results.view()), std::optional>{std::nullopt}); // FIXME: No longer needed - + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.start("EdgeTriangleCount"); From 50798d559839a6571564c457c55413273cac7712 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 May 2024 04:52:33 -0700 Subject: [PATCH 11/93] add edge mask tests --- .../community/edge_triangle_count_impl.cuh | 1 - .../community/edge_triangle_count_test.cpp | 19 ++++++++++++++----- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index dbe2208d254..b3fc4505daf 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -168,7 +168,6 @@ edge_triangle_count_impl( } std::vector> buffer{}; - buffer.push_back(std::move(num_triangles)); buffer.reserve(num_triangles.size()); buffer.push_back(std::move(num_triangles)); diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 5fe5474e83a..294a89184a6 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -19,6 +19,7 @@ #include "utilities/conversion_utilities.hpp" #include "utilities/test_graphs.hpp" #include "utilities/thrust_wrapper.hpp" +#include "utilities/property_generator_utilities.hpp" #include #include @@ -41,6 +42,7 @@ struct EdgeTriangleCount_Usecase { bool test_weighted_{false}; + bool edge_masking_{false}; bool check_correctness_{true}; }; @@ -142,6 +144,13 @@ class Tests_EdgeTriangleCount auto graph_view = graph.view(); + std::optional> edge_mask{std::nullopt}; + if (edge_triangle_count_usecase.edge_masking_) { + edge_mask = + cugraph::test::generate::edge_property(handle, graph_view, 2); + graph_view.attach_edge_mask((*edge_mask).view()); + } + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> d_edge_triangle_counts{std::nullopt}; @@ -220,8 +229,8 @@ INSTANTIATE_TEST_SUITE_P( Tests_EdgeTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + EdgeTriangleCount_Usecase{true, false, true}), // FIXME: Still debugging edge_mask ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -230,8 +239,8 @@ INSTANTIATE_TEST_SUITE_P( Tests_EdgeTriangleCount_Rmat, // enable correctness checks ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + EdgeTriangleCount_Usecase{true, false, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -244,7 +253,7 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(16, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From 86fd201044f85c15e8010ac0fab3cf5f66f08ded Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 8 May 2024 05:03:46 -0700 Subject: [PATCH 12/93] fix style --- cpp/tests/community/edge_triangle_count_test.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 294a89184a6..4e926fee6c1 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -17,9 +17,9 @@ #include "utilities/base_fixture.hpp" #include "utilities/check_utilities.hpp" #include "utilities/conversion_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" #include "utilities/test_graphs.hpp" #include "utilities/thrust_wrapper.hpp" -#include "utilities/property_generator_utilities.hpp" #include #include @@ -230,7 +230,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // enable correctness checks ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{true, false, true}), // FIXME: Still debugging edge_mask + EdgeTriangleCount_Usecase{ + true, false, true}), // FIXME: Still debugging edge_mask ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); From aad7590d065c783070750921087b2cd8333aa1c2 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 11 May 2024 08:11:15 -0700 Subject: [PATCH 13/93] add mg implementation of edge triangle count --- .../community/edge_triangle_count_impl.cuh | 331 ++++++++++++++++-- 1 file changed, 300 insertions(+), 31 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index b3fc4505daf..d2246ae89ac 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -21,7 +21,14 @@ #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" +// FIXME:::: Remove ************************************************************ +#include +#include +#include +// FIXME:::: Remove ************************************************************ + #include +#include #include #include @@ -75,9 +82,50 @@ struct update_edges_p_r_q_r_num_triangles { } }; +template +struct extract_p_r_q_r { + size_t chunk_start{}; + size_t p_r_or_q_r{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + EdgeIterator edge_first; + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound(thrust::seq, intersection_offsets.begin()+1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin()+1, itr); + + if (p_r_or_q_r == 0) { + return thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + } else { + return thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + } + + } +}; + + +template +struct extract_q_r { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + EdgeIterator edge_first; + + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound(thrust::seq, intersection_offsets.begin()+1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin()+1, itr); + auto pair = thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + + return pair; + } +}; + + template -std::enable_if_t, edge_t>> +edge_property_t, edge_t> edge_triangle_count_impl( raft::handle_t const& handle, graph_view_t const& graph_view) @@ -98,19 +146,37 @@ edge_triangle_count_impl( size_t approx_edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + + approx_edges_to_intersect_per_iteration = 4; auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; + + // Note: host_scalar_all_reduce to get the max reduction + // Note: edge src dst and delta -> shuffle those -> and update -> check this : shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning + // Note: shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning in shuffle_wrapper size_t prev_chunk_size = 0; auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); + //auto my_rank = handle.get_comms().get_rank(); + if constexpr (multi_gpu) { + num_chunks = host_scalar_allreduce( + handle.get_comms(), num_chunks, raft::comms::op_t::MAX, handle.get_stream()); + } + + printf("\n initial edgelists, num_chunk = %d\n", num_chunks); + + raft::print_device_vector("edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); + raft::print_device_vector("edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); + // Need to ensure that the vector has its values initialized to 0 before incrementing thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, num_edges); + printf("\niteration = %d, chunk_size = %d\n", i, chunk_size); num_edges -= chunk_size; // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = @@ -121,9 +187,18 @@ edge_triangle_count_impl( edge_first + prev_chunk_size + chunk_size, std::array{true, true}, false /*FIXME: pass 'do_expensive_check' as argument*/); + + printf("\nchunk processed\n"); + raft::print_device_vector("edgelist_srcs", edgelist_srcs.data() + prev_chunk_size, chunk_size, std::cout); + raft::print_device_vector("edgelist_dsts", edgelist_dsts.data() + prev_chunk_size, chunk_size, std::cout); + raft::print_device_vector("offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); + raft::print_device_vector("indices", intersection_indices.data(), intersection_indices.size(), std::cout); + printf("\n"); // Update the number of triangles of each (p, q) edges by looking at their intersection // size + + thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(0), @@ -135,59 +210,253 @@ edge_triangle_count_impl( num_triangles[chunk_start + i] += (intersection_offsets[i + 1] - intersection_offsets[i]); }); - // Given intersection offsets and indices that are used to update the number of - // triangles of (p, q) edges where `r`s are the intersection indices, update - // the number of triangles of the pairs (p, r) and (q, r). - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - update_edges_p_r_q_r_num_triangles{ - edgelist_srcs.size(), - 0, - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - raft::device_span(num_triangles.data(), num_triangles.size()), - edge_first}); + if constexpr (multi_gpu) { + // stores all the pairs (p, r) and (q, r) + auto vertex_pair_buffer_tmp = allocate_dataframe_buffer>( + intersection_indices.size() * 2, handle.get_stream()); // *2 for both (p, r) and (q, r) + // So that you shuffle only once - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - update_edges_p_r_q_r_num_triangles{ - edgelist_srcs.size(), - 1, - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - raft::device_span(num_triangles.data(), num_triangles.size()), - edge_first}); + // tabulate with the size of intersection_indices, and call binary search on intersection_offsets + // to get (p, r). + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), + extract_p_r_q_r{ + prev_chunk_size, + 0, + raft::device_span( + intersection_offsets.data(), intersection_offsets.size()), + raft::device_span( + intersection_indices.data(), intersection_indices.size()), + edge_first + }); + // FIXME: Consolidate both functions + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + (2 * intersection_indices.size()), + extract_p_r_q_r{ + prev_chunk_size, + 1, + raft::device_span( + intersection_offsets.data(), intersection_offsets.size()), + raft::device_span( + intersection_indices.data(), intersection_indices.size()), + edge_first + }); + + thrust::sort(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_end(vertex_pair_buffer_tmp)); + + printf("\np, r and q, r\n"); + raft::print_device_vector("edgelist_srcs", std::get<0>(vertex_pair_buffer_tmp).data(), std::get<0>(vertex_pair_buffer_tmp).size(), std::cout); + raft::print_device_vector("edgelist_dsts", std::get<1>(vertex_pair_buffer_tmp).data(), std::get<1>(vertex_pair_buffer_tmp).size(), std::cout); + + rmm::device_uvector increase_count_tmp(2 * intersection_indices.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), increase_count_tmp.begin(), increase_count_tmp.end(), size_t{1}); + + auto count_p_r_q_r = thrust::unique_count(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_end(vertex_pair_buffer_tmp)); + + rmm::device_uvector increase_count(count_p_r_q_r, handle.get_stream()); + + + auto vertex_pair_buffer = allocate_dataframe_buffer>( + count_p_r_q_r, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_end(vertex_pair_buffer_tmp), + increase_count_tmp.begin(), + get_dataframe_buffer_begin(vertex_pair_buffer), + increase_count.begin(), + thrust::equal_to>{}); + + rmm::device_uvector pair_srcs(0, handle.get_stream()); + rmm::device_uvector pair_dsts(0, handle.get_stream()); + std::optional> pair_count{std::nullopt}; + + std::optional> opt_increase_count = + std::make_optional(rmm::device_uvector(increase_count.size(), handle.get_stream())); + raft::copy((*opt_increase_count).begin(), + increase_count.begin(), + increase_count.size(), + handle.get_stream()); + + // There are still multiple copies here but is it worth sorting and reducing again? + std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore) = shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer)), + std::move(std::get<1>(vertex_pair_buffer)), + std::nullopt, + std::move(opt_increase_count), + std::nullopt, + graph_view.vertex_partition_range_lasts()); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(pair_srcs.size()), + [num_edges = edgelist_srcs.size(), + num_triangles = num_triangles.data(), + pair_srcs = pair_srcs.data(), + pair_dsts = pair_dsts.data(), + pair_count = (*pair_count).data(), + edge_first] + __device__(auto idx) { + auto src = pair_srcs[idx]; + auto dst = pair_dsts[idx]; + auto p_r_q_r_pair = thrust::make_tuple(src, dst); + + // Find its position in 'edges' + auto itr_p_r_q_r = + thrust::lower_bound(thrust::seq, edge_first, edge_first + num_edges, p_r_q_r_pair); + + assert(*itr_p_r_q_r == p_r_q_r_pair); + auto idx_p_r_q_r = thrust::distance(edge_first, itr_p_r_q_r); + + cuda::atomic_ref atomic_counter(num_triangles[idx_p_r_q_r]); + auto r = atomic_counter.fetch_add(pair_count[idx], cuda::std::memory_order_relaxed); + + } + ); + + } else { + + // Given intersection offsets and indices that are used to update the number of + // triangles of (p, q) edges where `r`s are the intersection indices, update + // the number of triangles of the pairs (p, r) and (q, r). + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + update_edges_p_r_q_r_num_triangles{ + edgelist_srcs.size(), + 0, + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(num_triangles.data(), num_triangles.size()), + edge_first}); + + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(intersection_indices.size()), + update_edges_p_r_q_r_num_triangles{ + edgelist_srcs.size(), + 1, + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(num_triangles.data(), num_triangles.size()), + edge_first}); + } + printf("\ndone with the iteration\n"); + printf("\nafter updating p, r and q, r edges\n"); + raft::print_device_vector("num_triangles", num_triangles.data(), num_triangles.size(), std::cout); prev_chunk_size += chunk_size; } + /* + printf("\nfrom edge triangle count and size = %d\n", num_triangles.size()); + raft::print_device_vector("edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); + raft::print_device_vector("edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); + raft::print_device_vector("triangle_count", num_triangles.data(), num_triangles.size(), std::cout); + printf("\n"); + */ + /* std::vector> buffer{}; - buffer.reserve(num_triangles.size()); buffer.push_back(std::move(num_triangles)); - auto buff_counts = edge_property_t, edge_t>(std::move(buffer)); + */ + //std::vector> buffer{}; + //buffer.push_back(std::move(num_triangles)); + //buffer.reserve(num_triangles.size()); + //buffer.push_back(std::move(num_triangles)); + //printf("\nother count\n"); + //raft::print_device_vector("triangle_count", buffer[0].data(), buffer[0].size(), std::cout); + printf("\n"); + + + //auto buff_counts = + // edge_property_t, edge_t>(std::move(buffer)); + + //#if 0 cugraph::edge_property_t, edge_t> counts( handle, graph_view); + /* + auto counts_ = detail::edge_partition_edge_property_device_view_t( + (buff_counts.view()), 0); + */ + + //edge_t* + //auto y = x.value_first(); + //raft::print_device_vector("prop_triangle_ct", counts_.value_first(), num_triangles.size(), std::cout); + /* cugraph::transform_e( handle, graph_view, + // buccket. cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), buff_counts.view(), [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + //printf("\nedge %d, %d, count = %d\n", src, dst, count); return count; }, counts.mutable_view(), false); + */ + + + cugraph::edge_bucket_t valid_edges(handle); + valid_edges.insert(edgelist_srcs.begin(), + edgelist_srcs.end(), + edgelist_dsts.begin()); + + auto cur_graph_view = graph_view; + /* + auto unmasked_cur_graph_view = cur_graph_view; + if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); } + */ + + auto edge_last = edge_first + edgelist_srcs.size(); + printf("\nthe number of edges = %d\n", edgelist_srcs.size()); + cugraph::transform_e( + handle, + graph_view, + valid_edges, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [edge_first, + edge_last, + num_edges = edgelist_srcs.size(), + num_triangles = num_triangles.data()] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + //printf("\nedge %d, %d\n", src, dst); + auto pair = thrust::make_tuple(src, dst); + // Find its position in 'edges' + + auto itr_pair = + thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); + //auto itr_pair = thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); + //assert(*itr_p_r_q_r == p_r_q_r_pair); + //if (itr_pair != edge_last && *itr_pair == pair) { + auto idx_pair = thrust::distance(edge_first, itr_pair); + printf("\nin - edge %d, %d, count = %d\n", src, dst, num_triangles[idx_pair]); + return num_triangles[idx_pair]; + //} + }, + counts.mutable_view(), + false); + + + //#endif return counts; } From 28149f7cf1abc69511c49ab2ca9b9d817ad24d55 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 11 May 2024 08:14:08 -0700 Subject: [PATCH 14/93] add reference for mg edge triangle count --- cpp/CMakeLists.txt | 1 + cpp/src/community/edge_triangle_count_mg.cu | 33 +++++++++++++++++++++ cpp/tests/CMakeLists.txt | 4 +++ 3 files changed, 38 insertions(+) create mode 100644 cpp/src/community/edge_triangle_count_mg.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index eb6f348b380..0a2aabcb2ca 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -175,6 +175,7 @@ set(CUGRAPH_SOURCES src/community/detail/refine_sg.cu src/community/detail/refine_mg.cu src/community/edge_triangle_count_sg.cu + src/community/edge_triangle_count_mg.cu src/community/detail/maximal_independent_moves_sg.cu src/community/detail/maximal_independent_moves_mg.cu src/detail/utility_wrappers.cu diff --git a/cpp/src/community/edge_triangle_count_mg.cu b/cpp/src/community/edge_triangle_count_mg.cu new file mode 100644 index 00000000000..254a0807e56 --- /dev/null +++ b/cpp/src/community/edge_triangle_count_mg.cu @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "community/edge_triangle_count_impl.cuh" + +namespace cugraph { + +// SG instantiation +template edge_property_t, int32_t> edge_triangle_count( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view); + +template edge_property_t, int64_t> edge_triangle_count( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view); + +template edge_property_t, int64_t> edge_triangle_count( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view); + +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d65eef3c5ae..5e5c7856da2 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -582,6 +582,10 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) + + ############################################################################################### + # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------------------- + #ConfigureTest(MG_EDGE_TC community/mg_tc_test.cpp) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ From 0e6938202fcb7eaa1642cfb758d5f7820ad81353 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 05:36:33 -0700 Subject: [PATCH 15/93] add mg edge triangle count tests --- cpp/tests/CMakeLists.txt | 2 +- .../community/mg_edge_triangle_count_test.cpp | 232 ++++++++++++++++++ 2 files changed, 233 insertions(+), 1 deletion(-) create mode 100644 cpp/tests/community/mg_edge_triangle_count_test.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 5e5c7856da2..02c6b4fa938 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -585,7 +585,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------------------- - #ConfigureTest(MG_EDGE_TC community/mg_tc_test.cpp) + ConfigureTest(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp new file mode 100644 index 00000000000..3ac60a56a2a --- /dev/null +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/device_comm_wrapper.hpp" +#include "utilities/mg_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include + +struct EdgeTriangleCount_Usecase { + bool test_weighted_{false}; + bool edge_masking_{false}; + bool check_correctness_{true}; +}; + +template +class Tests_MGEdgeTriangleCount + : public ::testing::TestWithParam> { + public: + Tests_MGEdgeTriangleCount() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running EdgeTriangleCount on multiple GPUs to that of a single-GPU run + template + void run_current_test(EdgeTriangleCount_Usecase const& edge_triangle_count_usecase, + input_usecase_t const& input_usecase) + { + using weight_t = float; + + HighResTimer hr_timer{}; + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + cugraph::graph_t mg_graph(*handle_); + std::optional> mg_renumber_map{std::nullopt}; + std::tie(mg_graph, std::ignore, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, false, true, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + + std::optional> edge_mask{std::nullopt}; + if (edge_triangle_count_usecase.edge_masking_) { + edge_mask = cugraph::test::generate::edge_property( + *handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + + // 2. run MG EdgeTriangleCount + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG EdgeTriangleCount"); + } + + auto d_mg_cugraph_results = + cugraph::edge_triangle_count(*handle_, mg_graph_view); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. Compare SG & MG results + if (edge_triangle_count_usecase.check_correctness_) { + // 3-1. Convert to SG graph + cugraph::graph_t sg_graph(*handle_); + std::optional< + cugraph::edge_property_t, edge_t>> + d_sg_cugraph_results{std::nullopt}; + std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::make_optional(d_mg_cugraph_results.view()), + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == int{0}) { + // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector + auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + std::make_optional((*d_sg_cugraph_results).view()), + std::optional>{std::nullopt}); // FIXME: No longer needed + + // 3-3. Run SG EdgeTriangleCount + auto ref_d_sg_cugraph_results = cugraph::edge_triangle_count(*handle_, sg_graph.view()); + auto [ref_edgelist_srcs, ref_edgelist_dsts, ref_d_edgelist_weights, ref_d_edge_triangle_counts] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + std::make_optional(ref_d_sg_cugraph_results.view()), + std::optional>{std::nullopt}); // FIXME: No longer needed + + // 3-4. Compare + auto h_mg_edge_triangle_counts = + cugraph::test::to_host(*handle_, *d_edge_triangle_counts); + auto h_sg_edge_triangle_counts = + cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); + + ASSERT_TRUE(std::equal(h_mg_edge_triangle_counts.begin(), + h_mg_edge_triangle_counts.end(), + h_sg_edge_triangle_counts.begin())); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; + +using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; +using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; + +TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGEdgeTriangleCount_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + EdgeTriangleCount_Usecase{false, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_tests, + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine(::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + EdgeTriangleCount_Usecase{false, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() From f893d24cbc8a23fc899c0ae188cc7887969aecc0 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 05:38:54 -0700 Subject: [PATCH 16/93] remove debug print and unused import --- .../community/edge_triangle_count_impl.cuh | 105 +----------------- 1 file changed, 4 insertions(+), 101 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index d2246ae89ac..84f700d2ac7 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -21,12 +21,6 @@ #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" -// FIXME:::: Remove ************************************************************ -#include -#include -#include -// FIXME:::: Remove ************************************************************ - #include #include #include @@ -146,13 +140,11 @@ edge_triangle_count_impl( size_t approx_edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - approx_edges_to_intersect_per_iteration = 4; auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; - + // Note: host_scalar_all_reduce to get the max reduction // Note: edge src dst and delta -> shuffle those -> and update -> check this : shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning // Note: shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning in shuffle_wrapper @@ -166,17 +158,11 @@ edge_triangle_count_impl( handle.get_comms(), num_chunks, raft::comms::op_t::MAX, handle.get_stream()); } - printf("\n initial edgelists, num_chunk = %d\n", num_chunks); - - raft::print_device_vector("edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); - raft::print_device_vector("edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); - // Need to ensure that the vector has its values initialized to 0 before incrementing thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, num_edges); - printf("\niteration = %d, chunk_size = %d\n", i, chunk_size); num_edges -= chunk_size; // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = @@ -187,18 +173,9 @@ edge_triangle_count_impl( edge_first + prev_chunk_size + chunk_size, std::array{true, true}, false /*FIXME: pass 'do_expensive_check' as argument*/); - - printf("\nchunk processed\n"); - raft::print_device_vector("edgelist_srcs", edgelist_srcs.data() + prev_chunk_size, chunk_size, std::cout); - raft::print_device_vector("edgelist_dsts", edgelist_dsts.data() + prev_chunk_size, chunk_size, std::cout); - raft::print_device_vector("offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); - raft::print_device_vector("indices", intersection_indices.data(), intersection_indices.size(), std::cout); - printf("\n"); // Update the number of triangles of each (p, q) edges by looking at their intersection - // size - - + // size thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(0), @@ -248,10 +225,6 @@ edge_triangle_count_impl( get_dataframe_buffer_begin(vertex_pair_buffer_tmp), get_dataframe_buffer_end(vertex_pair_buffer_tmp)); - printf("\np, r and q, r\n"); - raft::print_device_vector("edgelist_srcs", std::get<0>(vertex_pair_buffer_tmp).data(), std::get<0>(vertex_pair_buffer_tmp).size(), std::cout); - raft::print_device_vector("edgelist_dsts", std::get<1>(vertex_pair_buffer_tmp).data(), std::get<1>(vertex_pair_buffer_tmp).size(), std::cout); - rmm::device_uvector increase_count_tmp(2 * intersection_indices.size(), handle.get_stream()); thrust::fill(handle.get_thrust_policy(), increase_count_tmp.begin(), increase_count_tmp.end(), size_t{1}); @@ -260,7 +233,6 @@ edge_triangle_count_impl( get_dataframe_buffer_end(vertex_pair_buffer_tmp)); rmm::device_uvector increase_count(count_p_r_q_r, handle.get_stream()); - auto vertex_pair_buffer = allocate_dataframe_buffer>( count_p_r_q_r, handle.get_stream()); @@ -353,80 +325,20 @@ edge_triangle_count_impl( raft::device_span(num_triangles.data(), num_triangles.size()), edge_first}); } - printf("\ndone with the iteration\n"); - printf("\nafter updating p, r and q, r edges\n"); - raft::print_device_vector("num_triangles", num_triangles.data(), num_triangles.size(), std::cout); prev_chunk_size += chunk_size; } - /* - printf("\nfrom edge triangle count and size = %d\n", num_triangles.size()); - raft::print_device_vector("edgelist_srcs", edgelist_srcs.data(), edgelist_srcs.size(), std::cout); - raft::print_device_vector("edgelist_dsts", edgelist_dsts.data(), edgelist_dsts.size(), std::cout); - raft::print_device_vector("triangle_count", num_triangles.data(), num_triangles.size(), std::cout); - printf("\n"); - */ - /* - std::vector> buffer{}; - buffer.push_back(std::move(num_triangles)); - auto buff_counts = - edge_property_t, edge_t>(std::move(buffer)); - */ - - //std::vector> buffer{}; - //buffer.push_back(std::move(num_triangles)); - //buffer.reserve(num_triangles.size()); - //buffer.push_back(std::move(num_triangles)); - //printf("\nother count\n"); - //raft::print_device_vector("triangle_count", buffer[0].data(), buffer[0].size(), std::cout); - printf("\n"); - - - //auto buff_counts = - // edge_property_t, edge_t>(std::move(buffer)); - - //#if 0 cugraph::edge_property_t, edge_t> counts( handle, graph_view); - /* - auto counts_ = detail::edge_partition_edge_property_device_view_t( - (buff_counts.view()), 0); - */ - - //edge_t* - //auto y = x.value_first(); - //raft::print_device_vector("prop_triangle_ct", counts_.value_first(), num_triangles.size(), std::cout); - /* - cugraph::transform_e( - handle, - graph_view, - // buccket. - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - buff_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - //printf("\nedge %d, %d, count = %d\n", src, dst, count); - return count; - }, - counts.mutable_view(), - false); - */ - - cugraph::edge_bucket_t valid_edges(handle); valid_edges.insert(edgelist_srcs.begin(), edgelist_srcs.end(), edgelist_dsts.begin()); auto cur_graph_view = graph_view; - /* - auto unmasked_cur_graph_view = cur_graph_view; - if (unmasked_cur_graph_view.has_edge_mask()) { unmasked_cur_graph_view.clear_edge_mask(); } - */ - auto edge_last = edge_first + edgelist_srcs.size(); - printf("\nthe number of edges = %d\n", edgelist_srcs.size()); + auto edge_last = edge_first + edgelist_srcs.size(); // FIXME: Remove this unnecessary variable cugraph::transform_e( handle, graph_view, @@ -438,25 +350,16 @@ edge_triangle_count_impl( edge_last, num_edges = edgelist_srcs.size(), num_triangles = num_triangles.data()] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { - //printf("\nedge %d, %d\n", src, dst); auto pair = thrust::make_tuple(src, dst); - // Find its position in 'edges' + // Find its position in 'edges' auto itr_pair = thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); - //auto itr_pair = thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); - //assert(*itr_p_r_q_r == p_r_q_r_pair); - //if (itr_pair != edge_last && *itr_pair == pair) { auto idx_pair = thrust::distance(edge_first, itr_pair); - printf("\nin - edge %d, %d, count = %d\n", src, dst, num_triangles[idx_pair]); return num_triangles[idx_pair]; - //} }, counts.mutable_view(), false); - - - //#endif return counts; } From 30f891a48cb4024e9f2b9e1be6ca7f6c020c6360 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 05:48:27 -0700 Subject: [PATCH 17/93] add edge mask test --- cpp/tests/community/edge_triangle_count_test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 4e926fee6c1..cf1dbec263b 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -241,7 +241,7 @@ INSTANTIATE_TEST_SUITE_P( // enable correctness checks ::testing::Combine( ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{true, false, true}), + EdgeTriangleCount_Usecase{true, true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( From be7ed1ae588f2d3cf14ddc7e1d4b6a3d89936b9f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 05:51:35 -0700 Subject: [PATCH 18/93] update 'mg_graph_to_sg_graph' to support 'edge_ids' --- .../mg_betweenness_centrality_test.cpp | 3 ++- .../mg_edge_betweenness_centrality_test.cpp | 3 ++- .../mg_eigenvector_centrality_test.cpp | 3 ++- .../centrality/mg_katz_centrality_test.cpp | 3 ++- cpp/tests/community/mg_ecg_test.cpp | 3 ++- cpp/tests/community/mg_egonet_test.cu | 3 ++- cpp/tests/community/mg_leiden_test.cpp | 3 ++- cpp/tests/community/mg_louvain_test.cpp | 3 ++- .../community/mg_triangle_count_test.cpp | 3 ++- .../mg_weakly_connected_components_test.cpp | 3 ++- cpp/tests/cores/mg_core_number_test.cpp | 3 ++- cpp/tests/cores/mg_k_core_test.cpp | 3 ++- cpp/tests/link_analysis/mg_hits_test.cpp | 3 ++- cpp/tests/link_analysis/mg_pagerank_test.cpp | 3 ++- cpp/tests/mtmg/threaded_test_louvain.cu | 4 +++- cpp/tests/prims/mg_count_if_e.cu | 3 ++- cpp/tests/prims/mg_count_if_v.cu | 3 ++- cpp/tests/prims/mg_extract_transform_e.cu | 3 ++- ...extract_transform_v_frontier_outgoing_e.cu | 3 ++- ...r_v_pair_transform_dst_nbr_intersection.cu | 3 ++- ...transform_dst_nbr_weighted_intersection.cu | 3 ++- ...er_v_random_select_transform_outgoing_e.cu | 3 ++- ...rm_reduce_dst_key_aggregated_outgoing_e.cu | 3 ++- ..._v_transform_reduce_incoming_outgoing_e.cu | 3 ++- cpp/tests/prims/mg_reduce_v.cu | 3 ++- ...st_nbr_intersection_of_e_endpoints_by_v.cu | 3 ++- cpp/tests/prims/mg_transform_reduce_e.cu | 3 ++- .../mg_transform_reduce_e_by_src_dst_key.cu | 3 ++- cpp/tests/prims/mg_transform_reduce_v.cu | 3 ++- ...orm_reduce_v_frontier_outgoing_e_by_dst.cu | 3 ++- cpp/tests/structure/mg_coarsen_graph_test.cpp | 6 +++-- ..._count_self_loops_and_multi_edges_test.cpp | 3 ++- ...has_edge_and_compute_multiplicity_test.cpp | 3 ++- .../structure/mg_induced_subgraph_test.cu | 3 ++- cpp/tests/structure/mg_symmetrize_test.cpp | 3 ++- .../structure/mg_transpose_storage_test.cpp | 3 ++- cpp/tests/structure/mg_transpose_test.cpp | 3 ++- cpp/tests/traversal/mg_bfs_test.cpp | 3 ++- .../traversal/mg_extract_bfs_paths_test.cu | 3 ++- cpp/tests/traversal/mg_k_hop_nbrs_test.cpp | 3 ++- cpp/tests/traversal/mg_sssp_test.cpp | 3 ++- cpp/tests/utilities/conversion_utilities.hpp | 4 ++++ .../utilities/conversion_utilities_impl.cuh | 22 +++++++++++++---- .../utilities/conversion_utilities_mg.cu | 24 +++++++++++++++++++ 44 files changed, 130 insertions(+), 47 deletions(-) diff --git a/cpp/tests/centrality/mg_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_betweenness_centrality_test.cpp index 7924d449897..9bf4a88f8b1 100644 --- a/cpp/tests/centrality/mg_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/mg_betweenness_centrality_test.cpp @@ -152,10 +152,11 @@ class Tests_MGBetweennessCentrality std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp index c3417e96c03..1719842c7b6 100644 --- a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp @@ -142,10 +142,11 @@ class Tests_MGEdgeBetweennessCentrality std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); diff --git a/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp b/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp index ed24bee0923..0cea7e73ba6 100644 --- a/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp +++ b/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp @@ -144,10 +144,11 @@ class Tests_MGEigenvectorCentrality std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/centrality/mg_katz_centrality_test.cpp b/cpp/tests/centrality/mg_katz_centrality_test.cpp index abe02b2287b..acfacefb3b4 100644 --- a/cpp/tests/centrality/mg_katz_centrality_test.cpp +++ b/cpp/tests/centrality/mg_katz_centrality_test.cpp @@ -151,10 +151,11 @@ class Tests_MGKatzCentrality std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/community/mg_ecg_test.cpp b/cpp/tests/community/mg_ecg_test.cpp index a5e02c4f532..422bb055b92 100644 --- a/cpp/tests/community/mg_ecg_test.cpp +++ b/cpp/tests/community/mg_ecg_test.cpp @@ -127,10 +127,11 @@ class Tests_MGEcg : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); // crate a SG graph with MG graph vertex IDs diff --git a/cpp/tests/community/mg_egonet_test.cu b/cpp/tests/community/mg_egonet_test.cu index 66ab1f47312..3aaf749f01a 100644 --- a/cpp/tests/community/mg_egonet_test.cu +++ b/cpp/tests/community/mg_egonet_test.cu @@ -199,10 +199,11 @@ class Tests_MGEgonet triplet_first + d_mg_aggregate_edgelist_src.size()); } - auto [sg_graph, sg_edge_weights, sg_number_map] = + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index f1a2fc83192..73854d7e5e8 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -87,10 +87,11 @@ class Tests_MGLeiden std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); // crate an SG graph with MG graph vertex IDs diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 733ee9368ac..d45d71cd053 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -85,10 +85,11 @@ class Tests_MGLouvain std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); // crate an SG graph with MG graph vertex IDs diff --git a/cpp/tests/community/mg_triangle_count_test.cpp b/cpp/tests/community/mg_triangle_count_test.cpp index ca3e0b2ac8f..06134e87ddd 100644 --- a/cpp/tests/community/mg_triangle_count_test.cpp +++ b/cpp/tests/community/mg_triangle_count_test.cpp @@ -178,10 +178,11 @@ class Tests_MGTriangleCount d_mg_triangle_counts.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index c510e3139fb..5919ac15925 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -125,10 +125,11 @@ class Tests_MGWeaklyConnectedComponents raft::device_span(d_mg_components.data(), d_mg_components.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/cores/mg_core_number_test.cpp b/cpp/tests/cores/mg_core_number_test.cpp index ac99d7d4a93..2fb221e2d04 100644 --- a/cpp/tests/cores/mg_core_number_test.cpp +++ b/cpp/tests/cores/mg_core_number_test.cpp @@ -143,10 +143,11 @@ class Tests_MGCoreNumber raft::device_span(d_mg_core_numbers.data(), d_mg_core_numbers.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/cores/mg_k_core_test.cpp b/cpp/tests/cores/mg_k_core_test.cpp index 100c7fa3bcf..a6dcb68b726 100644 --- a/cpp/tests/cores/mg_k_core_test.cpp +++ b/cpp/tests/cores/mg_k_core_test.cpp @@ -160,10 +160,11 @@ class Tests_MGKCore : public ::testing::TestWithParam>{std::nullopt}, raft::device_span(d_mg_core_numbers.data(), d_mg_core_numbers.size())); - auto [sg_graph, sg_edge_weights, sg_number_map] = + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/link_analysis/mg_hits_test.cpp b/cpp/tests/link_analysis/mg_hits_test.cpp index 101a4fe1557..d3350f2e8ec 100644 --- a/cpp/tests/link_analysis/mg_hits_test.cpp +++ b/cpp/tests/link_analysis/mg_hits_test.cpp @@ -186,10 +186,11 @@ class Tests_MGHits : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/link_analysis/mg_pagerank_test.cpp b/cpp/tests/link_analysis/mg_pagerank_test.cpp index 6be451ac5fd..cd71e08a691 100644 --- a/cpp/tests/link_analysis/mg_pagerank_test.cpp +++ b/cpp/tests/link_analysis/mg_pagerank_test.cpp @@ -202,10 +202,11 @@ class Tests_MGPageRank std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>((*d_mg_renumber_map).data(), (*d_mg_renumber_map).size()), false); diff --git a/cpp/tests/mtmg/threaded_test_louvain.cu b/cpp/tests/mtmg/threaded_test_louvain.cu index ff9641d59f8..315962371ac 100644 --- a/cpp/tests/mtmg/threaded_test_louvain.cu +++ b/cpp/tests/mtmg/threaded_test_louvain.cu @@ -376,12 +376,13 @@ class Tests_Multithreaded auto thread_handle = instance_manager->get_handle(); if (thread_handle.get_rank() == 0) { - std::tie(sg_graph, sg_edge_weights, std::ignore) = + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( thread_handle.raft_handle(), graph_view.get(thread_handle), edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) : std::nullopt, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); // create an SG graph with MG graph vertex IDs } else { @@ -390,6 +391,7 @@ class Tests_Multithreaded graph_view.get(thread_handle), edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) : std::nullopt, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); // create an SG graph with MG graph vertex IDs } diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 8ad1a20e585..3224550cc77 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -149,10 +149,11 @@ class Tests_MGCountIfE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu index eb0e8cf9835..9c6fc2ff36c 100644 --- a/cpp/tests/prims/mg_count_if_v.cu +++ b/cpp/tests/prims/mg_count_if_v.cu @@ -123,10 +123,11 @@ class Tests_MGCountIfV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index 48b893f6fea..d6ab69680f0 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -253,10 +253,11 @@ class Tests_MGExtractTransformE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()), false); diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index 3611a250afd..f6db4978841 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -283,10 +283,11 @@ class Tests_MGExtractTransformVFrontierOutgoingE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*d_mg_renumber_map_labels).data(), (*d_mg_renumber_map_labels).size()), false); diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 762da62eeb8..59b37ded432 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -226,10 +226,11 @@ class Tests_MGPerVPairTransformDstNbrIntersection *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index de78b42603d..1ff4145fc4f 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -258,12 +258,13 @@ class Tests_MGPerVPairTransformDstNbrIntersection weight_t>> sg_edge_weight{std::nullopt}; - std::tie(sg_graph, sg_edge_weight, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weight, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight ? std::make_optional(mg_edge_weight_view) : std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 2ea8635fe36..84b3ba64b80 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -282,10 +282,11 @@ class Tests_MGPerVRandomSelectTransformOutgoingE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu index efcfee9fc66..738c15afbc3 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -297,10 +297,11 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index e3eb56d5a6e..cd329aa1e5f 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -271,10 +271,11 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu index 1449e8f9910..7b80aac6d9f 100644 --- a/cpp/tests/prims/mg_reduce_v.cu +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -163,10 +163,11 @@ class Tests_MGReduceV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu index 71cdf27fda1..f4f401a4fc2 100644 --- a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu @@ -174,10 +174,11 @@ class Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV raft::device_span(mg_result_buffer.data(), mg_result_buffer.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index a086571d6e0..fd4110d6f91 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -159,10 +159,11 @@ class Tests_MGTransformReduceE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu index a66c70ff586..dd831d81433 100644 --- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -237,10 +237,11 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::get_dataframe_buffer_begin(mg_aggregate_by_dst_values)); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index c26085a55c4..39885c644e8 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -169,10 +169,11 @@ class Tests_MGTransformReduceV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index 07a0f7e7aab..ca09cdac696 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -292,10 +292,11 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/structure/mg_coarsen_graph_test.cpp b/cpp/tests/structure/mg_coarsen_graph_test.cpp index 1da30869545..077c2560cb3 100644 --- a/cpp/tests/structure/mg_coarsen_graph_test.cpp +++ b/cpp/tests/structure/mg_coarsen_graph_test.cpp @@ -330,10 +330,11 @@ class Tests_MGCoarsenGraph cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); @@ -342,11 +343,12 @@ class Tests_MGCoarsenGraph cugraph::edge_property_t, weight_t>> sg_coarse_edge_weights{std::nullopt}; - std::tie(sg_coarse_graph, sg_coarse_edge_weights, std::ignore) = + std::tie(sg_coarse_graph, sg_coarse_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_coarse_graph_view, mg_coarse_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); diff --git a/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp b/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp index 45fac884f49..7efafa30963 100644 --- a/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp +++ b/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp @@ -126,10 +126,11 @@ class Tests_MGCountSelfLoopsAndMultiEdges // 3-1. aggregate MG results cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp index 0ee72726294..0c7065961e6 100644 --- a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp +++ b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp @@ -204,10 +204,11 @@ class Tests_MGHasEdgeAndComputeMultiplicity d_mg_edge_multiplicities.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/structure/mg_induced_subgraph_test.cu b/cpp/tests/structure/mg_induced_subgraph_test.cu index 3b32c15bf9f..32f9430713f 100644 --- a/cpp/tests/structure/mg_induced_subgraph_test.cu +++ b/cpp/tests/structure/mg_induced_subgraph_test.cu @@ -214,10 +214,11 @@ class Tests_MGInducedSubgraph true, handle_->get_stream()); - auto [sg_graph, sg_edge_weights, sg_number_map] = cugraph::test::mg_graph_to_sg_graph( + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::optional>{std::nullopt}, false); diff --git a/cpp/tests/structure/mg_symmetrize_test.cpp b/cpp/tests/structure/mg_symmetrize_test.cpp index 7eb387f3915..8949ba07bec 100644 --- a/cpp/tests/structure/mg_symmetrize_test.cpp +++ b/cpp/tests/structure/mg_symmetrize_test.cpp @@ -88,10 +88,11 @@ class Tests_MGSymmetrize weight_t>> sg_edge_weights{std::nullopt}; if (symmetrize_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph.view(), mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/structure/mg_transpose_storage_test.cpp b/cpp/tests/structure/mg_transpose_storage_test.cpp index 4cbbe500dd8..febf446779c 100644 --- a/cpp/tests/structure/mg_transpose_storage_test.cpp +++ b/cpp/tests/structure/mg_transpose_storage_test.cpp @@ -87,10 +87,11 @@ class Tests_MGTransposeStorage weight_t>> sg_edge_weights{std::nullopt}; if (transpose_storage_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph.view(), mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/structure/mg_transpose_test.cpp b/cpp/tests/structure/mg_transpose_test.cpp index 80cdcae070a..2ee343f26a4 100644 --- a/cpp/tests/structure/mg_transpose_test.cpp +++ b/cpp/tests/structure/mg_transpose_test.cpp @@ -87,10 +87,11 @@ class Tests_MGTranspose weight_t>> sg_edge_weights{std::nullopt}; if (transpose_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph.view(), mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/traversal/mg_bfs_test.cpp b/cpp/tests/traversal/mg_bfs_test.cpp index 431ed75c82d..6632e1506e7 100644 --- a/cpp/tests/traversal/mg_bfs_test.cpp +++ b/cpp/tests/traversal/mg_bfs_test.cpp @@ -183,10 +183,11 @@ class Tests_MGBFS : public ::testing::TestWithParam sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/traversal/mg_extract_bfs_paths_test.cu b/cpp/tests/traversal/mg_extract_bfs_paths_test.cu index 8484066c6a0..13e7e885c8f 100644 --- a/cpp/tests/traversal/mg_extract_bfs_paths_test.cu +++ b/cpp/tests/traversal/mg_extract_bfs_paths_test.cu @@ -237,10 +237,11 @@ class Tests_MGExtractBFSPaths cugraph::test::device_gatherv(*handle_, d_mg_paths.data(), d_mg_paths.size()); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp b/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp index 07ea107a2ed..f51fee68078 100644 --- a/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp +++ b/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp @@ -178,10 +178,11 @@ class Tests_MGKHopNbrs *handle_, raft::device_span(d_mg_nbrs.data(), d_mg_nbrs.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/traversal/mg_sssp_test.cpp b/cpp/tests/traversal/mg_sssp_test.cpp index 188d0eca115..84fe8daf346 100644 --- a/cpp/tests/traversal/mg_sssp_test.cpp +++ b/cpp/tests/traversal/mg_sssp_test.cpp @@ -176,10 +176,11 @@ class Tests_MGSSSP : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore) = + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph(*handle_, mg_graph_view, mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>( (*mg_renumber_map).data(), (*mg_renumber_map).size()), false); diff --git a/cpp/tests/utilities/conversion_utilities.hpp b/cpp/tests/utilities/conversion_utilities.hpp index 9b55f45d5bd..1d0ff4c76a7 100644 --- a/cpp/tests/utilities/conversion_utilities.hpp +++ b/cpp/tests/utilities/conversion_utilities.hpp @@ -220,11 +220,15 @@ std::tuple, std::optional, weight_t>>, + std::optional, + edge_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); diff --git a/cpp/tests/utilities/conversion_utilities_impl.cuh b/cpp/tests/utilities/conversion_utilities_impl.cuh index fb2af023c03..1eb1a80e2cc 100644 --- a/cpp/tests/utilities/conversion_utilities_impl.cuh +++ b/cpp/tests/utilities/conversion_utilities_impl.cuh @@ -280,23 +280,26 @@ template , std::optional, weight_t>>, + std::optional, edge_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber) { rmm::device_uvector d_src(0, handle.get_stream()); rmm::device_uvector d_dst(0, handle.get_stream()); std::optional> d_wgt{std::nullopt}; + std::optional> d_edge_id{std::nullopt}; - std::tie(d_src, d_dst, d_wgt, std::ignore) = cugraph::decompress_to_edgelist( + std::tie(d_src, d_dst, d_wgt, d_edge_id) = cugraph::decompress_to_edgelist( handle, graph_view, edge_weight_view, - std::optional>{std::nullopt}, + edge_id_view, renumber_map); d_src = cugraph::test::device_gatherv( @@ -306,6 +309,9 @@ mg_graph_to_sg_graph( if (d_wgt) *d_wgt = cugraph::test::device_gatherv( handle, raft::device_span{d_wgt->data(), d_wgt->size()}); + if (d_edge_id) + *d_edge_id = cugraph::test::device_gatherv( + handle, raft::device_span{d_edge_id->data(), d_edge_id->size()}); rmm::device_uvector vertices(0, handle.get_stream()); if (renumber_map) { vertices = cugraph::test::device_gatherv(handle, *renumber_map); } @@ -313,6 +319,8 @@ mg_graph_to_sg_graph( graph_t sg_graph(handle); std::optional, weight_t>> sg_edge_weights{std::nullopt}; + std::optional, edge_t>> + sg_edge_ids{std::nullopt}; std::optional> sg_number_map; if (handle.get_comms().get_rank() == 0) { if (!renumber_map) { @@ -321,7 +329,7 @@ mg_graph_to_sg_graph( handle.get_stream(), vertices.data(), vertices.size(), vertex_t{0}); } - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore, sg_number_map) = + std::tie(sg_graph, sg_edge_weights, sg_edge_ids, std::ignore, sg_number_map) = cugraph::create_graph_from_edgelist diff --git a/cpp/tests/utilities/conversion_utilities_mg.cu b/cpp/tests/utilities/conversion_utilities_mg.cu index d657f868497..cb4703ec89b 100644 --- a/cpp/tests/utilities/conversion_utilities_mg.cu +++ b/cpp/tests/utilities/conversion_utilities_mg.cu @@ -381,132 +381,156 @@ graph_to_host_csc( template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int32_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int32_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int32_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, float>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int32_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); template std::tuple< cugraph::graph_t, std::optional, double>>, + std::optional, int64_t>>, std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, + std::optional> edge_id_view, std::optional> renumber_map, bool renumber); From e705eca69dc7beaa4e1479328198bf2678236efe Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 05:59:51 -0700 Subject: [PATCH 19/93] add fixme --- cpp/src/community/edge_triangle_count_impl.cuh | 4 +--- cpp/tests/community/mg_edge_triangle_count_test.cpp | 8 ++++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 84f700d2ac7..3b4870a5c79 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -145,9 +145,6 @@ edge_triangle_count_impl( ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; - // Note: host_scalar_all_reduce to get the max reduction - // Note: edge src dst and delta -> shuffle those -> and update -> check this : shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning - // Note: shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning in shuffle_wrapper size_t prev_chunk_size = 0; auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); @@ -262,6 +259,7 @@ edge_triangle_count_impl( std::move(std::get<0>(vertex_pair_buffer)), std::move(std::get<1>(vertex_pair_buffer)), std::nullopt, + // FIXME: Update 'shuffle_int_...' to support int32_t and int64_t values std::move(opt_increase_count), std::nullopt, graph_view.vertex_partition_range_lasts()); diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp index 3ac60a56a2a..a8bef9e17aa 100644 --- a/cpp/tests/community/mg_edge_triangle_count_test.cpp +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -99,6 +99,7 @@ class Tests_MGEdgeTriangleCount // 2. run MG EdgeTriangleCount + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -116,8 +117,10 @@ class Tests_MGEdgeTriangleCount } // 3. Compare SG & MG results + if (edge_triangle_count_usecase.check_correctness_) { // 3-1. Convert to SG graph + cugraph::graph_t sg_graph(*handle_); std::optional< cugraph::edge_property_t, edge_t>> @@ -126,6 +129,7 @@ class Tests_MGEdgeTriangleCount *handle_, mg_graph_view, std::optional>{std::nullopt}, + // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values std::make_optional(d_mg_cugraph_results.view()), std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), @@ -133,15 +137,18 @@ class Tests_MGEdgeTriangleCount if (handle_->get_comms().get_rank() == int{0}) { // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector + auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), std::optional>{std::nullopt}, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values std::make_optional((*d_sg_cugraph_results).view()), std::optional>{std::nullopt}); // FIXME: No longer needed // 3-3. Run SG EdgeTriangleCount + auto ref_d_sg_cugraph_results = cugraph::edge_triangle_count(*handle_, sg_graph.view()); auto [ref_edgelist_srcs, ref_edgelist_dsts, ref_d_edgelist_weights, ref_d_edge_triangle_counts] = cugraph::decompress_to_edgelist( @@ -152,6 +159,7 @@ class Tests_MGEdgeTriangleCount std::optional>{std::nullopt}); // FIXME: No longer needed // 3-4. Compare + auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); auto h_sg_edge_triangle_counts = From 2453a6f698c42bc5e99aeba9747aa72134cd3b85 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 06:29:02 -0700 Subject: [PATCH 20/93] add doxygen documentation --- cpp/include/cugraph/algorithms.hpp | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 8dfddb401e1..242a1c5717c 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2006,12 +2006,21 @@ void triangle_count(raft::handle_t const& handle, std::optional> vertices, raft::device_span counts, bool do_expensive_check = false); -/* -template -edge_property_t, edge_t> edge_triangle_count( - raft::handle_t const& handle, graph_view_t const& graph_view); -*/ +/* + * @brief Compute edge triangle counts. + * + * Compute edge triangle counts for the entire set of edges. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * + * @return edge_property_t containing the edge triangle count + */ template edge_property_t, edge_t> edge_triangle_count( raft::handle_t const& handle, graph_view_t const& graph_view); From e69f8627df6baed38f708eb063bd70f5c7534491 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 06:39:58 -0700 Subject: [PATCH 21/93] explicitly provide template parameter types --- cpp/src/community/edge_triangle_count_impl.cuh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 3b4870a5c79..71d052cc251 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -127,12 +127,12 @@ edge_triangle_count_impl( using weight_t = float; rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = decompress_to_edgelist( + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = decompress_to_edgelist( handle, graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); + std::nullopt, + std::nullopt, + std::nullopt); auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); From 2bb9cba5b0c1e32e7d93e4ead6e3a4237f6ab72c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 06:45:45 -0700 Subject: [PATCH 22/93] rename variable --- cpp/src/community/edge_triangle_count_impl.cuh | 10 +++++----- cpp/src/community/k_truss_impl.cuh | 10 +++++----- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 71d052cc251..9e93bd63f53 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -138,12 +138,12 @@ edge_triangle_count_impl( thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - size_t approx_edges_to_intersect_per_iteration = + size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - auto num_chunks = ((edgelist_srcs.size() % approx_edges_to_intersect_per_iteration) == 0) - ? (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) - : (edgelist_srcs.size() / approx_edges_to_intersect_per_iteration) + 1; + auto num_chunks = ((edgelist_srcs.size() % edges_to_intersect_per_iteration) == 0) + ? (edgelist_srcs.size() / edges_to_intersect_per_iteration) + : (edgelist_srcs.size() / edges_to_intersect_per_iteration) + 1; size_t prev_chunk_size = 0; auto num_edges = edgelist_srcs.size(); @@ -159,7 +159,7 @@ edge_triangle_count_impl( thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(approx_edges_to_intersect_per_iteration, num_edges); + auto chunk_size = std::min(edges_to_intersect_per_iteration, num_edges); num_edges -= chunk_size; // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index be400c0afd7..9b6730bef73 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -718,18 +718,18 @@ k_truss(raft::handle_t const& handle, // nbr_intersection requires the edges to be sort by 'src' // sort the invalid edges by src for nbr intersection - size_t approx_edges_to_intersect_per_iteration = + size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = num_invalid_edges; - auto num_chunks = ((num_invalid_edges % approx_edges_to_intersect_per_iteration) == 0) - ? (num_invalid_edges / approx_edges_to_intersect_per_iteration) - : (num_invalid_edges / approx_edges_to_intersect_per_iteration) + 1; + auto num_chunks = ((num_invalid_edges % edges_to_intersect_per_iteration) == 0) + ? (num_invalid_edges / edges_to_intersect_per_iteration) + : (num_invalid_edges / edges_to_intersect_per_iteration) + 1; for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = - std::min(approx_edges_to_intersect_per_iteration, chunk_num_invalid_edges); + std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); thrust::sort_by_key(handle.get_thrust_policy(), edge_first + num_valid_edges, edge_first + edgelist_srcs.size(), From 6a02f039481d928c9f4baa1054500e82427f0b1c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 06:52:07 -0700 Subject: [PATCH 23/93] remove unnecessary sort --- cpp/src/community/edge_triangle_count_impl.cuh | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 9e93bd63f53..aad3879cb2a 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -136,8 +136,6 @@ edge_triangle_count_impl( auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); - size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); From 17017ec70709e855f29f08878c1e569e37f897b5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 07:27:55 -0700 Subject: [PATCH 24/93] round with the raft util function --- cpp/src/community/edge_triangle_count_impl.cuh | 7 +++---- cpp/src/community/k_truss_impl.cuh | 7 ++++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index aad3879cb2a..1d03889305e 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -26,6 +26,8 @@ #include #include +#include + #include #include #include @@ -139,10 +141,7 @@ edge_triangle_count_impl( size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - auto num_chunks = ((edgelist_srcs.size() % edges_to_intersect_per_iteration) == 0) - ? (edgelist_srcs.size() / edges_to_intersect_per_iteration) - : (edgelist_srcs.size() / edges_to_intersect_per_iteration) + 1; - + auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); size_t prev_chunk_size = 0; auto num_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 9b6730bef73..76f363d8e84 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -27,6 +27,8 @@ #include #include +#include + #include #include #include @@ -723,9 +725,8 @@ k_truss(raft::handle_t const& handle, size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = num_invalid_edges; - auto num_chunks = ((num_invalid_edges % edges_to_intersect_per_iteration) == 0) - ? (num_invalid_edges / edges_to_intersect_per_iteration) - : (num_invalid_edges / edges_to_intersect_per_iteration) + 1; + + auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = From 13501fe7dff70da02f3f4fdf5c3d5507ca69d70a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 07:44:29 -0700 Subject: [PATCH 25/93] update fixme --- cpp/src/community/k_truss_impl.cuh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 76f363d8e84..a51aab86170 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -566,7 +566,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); // FIXME: Renumbering should not be hardcoded. + false); modified_graph_view = (*modified_graph).view(); @@ -676,6 +676,7 @@ k_truss(raft::handle_t const& handle, decompress_to_edgelist(handle, cur_graph_view, edge_weight_view, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values std::make_optional(prop_num_triangles.view()), std::optional>(std::nullopt)); auto transposed_edge_first = From e5a0f2dd028222527897da91de5165c8fdea9a7d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 07:53:04 -0700 Subject: [PATCH 26/93] rename variable --- cpp/src/community/edge_triangle_count_impl.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 1d03889305e..684df07d3fe 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -143,7 +143,7 @@ edge_triangle_count_impl( auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); size_t prev_chunk_size = 0; - auto num_edges = edgelist_srcs.size(); + auto num_remaining_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); //auto my_rank = handle.get_comms().get_rank(); @@ -156,8 +156,8 @@ edge_triangle_count_impl( thrust::fill(handle.get_thrust_policy(), num_triangles.begin(), num_triangles.end(), 0); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, num_edges); - num_edges -= chunk_size; + auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_edges); + num_remaining_edges -= chunk_size; // Perform 'nbr_intersection' in chunks to reduce peak memory. auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, From ca30c84fcfab6a33e7fbf61ccf3b3af26670061d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 07:56:40 -0700 Subject: [PATCH 27/93] fix style --- cpp/include/cugraph/algorithms.hpp | 2 +- .../community/edge_triangle_count_impl.cuh | 204 +++++++++--------- cpp/src/community/k_truss_impl.cuh | 22 +- cpp/tests/CMakeLists.txt | 2 +- .../mg_betweenness_centrality_test.cpp | 15 +- .../mg_edge_betweenness_centrality_test.cpp | 15 +- .../mg_eigenvector_centrality_test.cpp | 15 +- .../centrality/mg_katz_centrality_test.cpp | 15 +- cpp/tests/community/mg_ecg_test.cpp | 15 +- .../community/mg_edge_triangle_count_test.cpp | 70 +++--- cpp/tests/community/mg_egonet_test.cu | 15 +- cpp/tests/community/mg_leiden_test.cpp | 15 +- cpp/tests/community/mg_louvain_test.cpp | 15 +- .../community/mg_triangle_count_test.cpp | 17 +- .../mg_weakly_connected_components_test.cpp | 17 +- cpp/tests/cores/mg_core_number_test.cpp | 17 +- cpp/tests/cores/mg_k_core_test.cpp | 15 +- cpp/tests/link_analysis/mg_hits_test.cpp | 17 +- cpp/tests/link_analysis/mg_pagerank_test.cpp | 17 +- cpp/tests/prims/mg_count_if_e.cu | 17 +- cpp/tests/prims/mg_count_if_v.cu | 17 +- cpp/tests/prims/mg_extract_transform_e.cu | 17 +- ...extract_transform_v_frontier_outgoing_e.cu | 17 +- ...r_v_pair_transform_dst_nbr_intersection.cu | 17 +- ...transform_dst_nbr_weighted_intersection.cu | 21 +- ...er_v_random_select_transform_outgoing_e.cu | 17 +- ...rm_reduce_dst_key_aggregated_outgoing_e.cu | 17 +- ..._v_transform_reduce_incoming_outgoing_e.cu | 17 +- cpp/tests/prims/mg_reduce_v.cu | 17 +- ...st_nbr_intersection_of_e_endpoints_by_v.cu | 17 +- cpp/tests/prims/mg_transform_reduce_e.cu | 17 +- .../mg_transform_reduce_e_by_src_dst_key.cu | 17 +- cpp/tests/prims/mg_transform_reduce_v.cu | 17 +- ...orm_reduce_v_frontier_outgoing_e_by_dst.cu | 17 +- cpp/tests/structure/mg_coarsen_graph_test.cpp | 15 +- ..._count_self_loops_and_multi_edges_test.cpp | 17 +- ...has_edge_and_compute_multiplicity_test.cpp | 17 +- .../structure/mg_induced_subgraph_test.cu | 15 +- cpp/tests/structure/mg_symmetrize_test.cpp | 17 +- .../structure/mg_transpose_storage_test.cpp | 17 +- cpp/tests/structure/mg_transpose_test.cpp | 17 +- cpp/tests/traversal/mg_bfs_test.cpp | 17 +- .../traversal/mg_extract_bfs_paths_test.cu | 17 +- cpp/tests/traversal/mg_k_hop_nbrs_test.cpp | 17 +- cpp/tests/traversal/mg_sssp_test.cpp | 15 +- cpp/tests/utilities/conversion_utilities.hpp | 17 +- .../utilities/conversion_utilities_impl.cuh | 11 +- 47 files changed, 519 insertions(+), 469 deletions(-) diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 242a1c5717c..077ba90a57b 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2018,7 +2018,7 @@ void triangle_count(raft::handle_t const& handle, * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. - * + * * @return edge_property_t containing the edge triangle count */ template diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 684df07d3fe..30eea336c10 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -21,8 +21,8 @@ #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" -#include #include +#include #include #include @@ -88,19 +88,20 @@ struct extract_p_r_q_r { __device__ thrust::tuple operator()(edge_t i) const { - auto itr = thrust::upper_bound(thrust::seq, intersection_offsets.begin()+1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin()+1, itr); + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); if (p_r_or_q_r == 0) { - return thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + return thrust::make_tuple(thrust::get<0>(*(edge_first + chunk_start + idx)), + intersection_indices[i]); } else { - return thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + return thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), + intersection_indices[i]); } - } }; - template struct extract_q_r { size_t chunk_start{}; @@ -108,48 +109,45 @@ struct extract_q_r { raft::device_span intersection_indices{}; EdgeIterator edge_first; - __device__ thrust::tuple operator()(edge_t i) const { - auto itr = thrust::upper_bound(thrust::seq, intersection_offsets.begin()+1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin()+1, itr); - auto pair = thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), intersection_indices[i]); + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + auto pair = thrust::make_tuple(thrust::get<1>(*(edge_first + chunk_start + idx)), + intersection_indices[i]); return pair; } }; - template -edge_property_t, edge_t> -edge_triangle_count_impl( +edge_property_t, edge_t> edge_triangle_count_impl( raft::handle_t const& handle, graph_view_t const& graph_view) { using weight_t = float; rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = decompress_to_edgelist( - handle, - graph_view, - std::nullopt, - std::nullopt, - std::nullopt); + std::tie(edgelist_srcs, edgelist_dsts, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, graph_view, std::nullopt, std::nullopt, std::nullopt); auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); - size_t prev_chunk_size = 0; - auto num_remaining_edges = edgelist_srcs.size(); + auto num_chunks = + raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); + size_t prev_chunk_size = 0; + auto num_remaining_edges = edgelist_srcs.size(); rmm::device_uvector num_triangles(edgelist_srcs.size(), handle.get_stream()); - //auto my_rank = handle.get_comms().get_rank(); + // auto my_rank = handle.get_comms().get_rank(); if constexpr (multi_gpu) { num_chunks = host_scalar_allreduce( - handle.get_comms(), num_chunks, raft::comms::op_t::MAX, handle.get_stream()); + handle.get_comms(), num_chunks, raft::comms::op_t::MAX, handle.get_stream()); } // Need to ensure that the vector has its values initialized to 0 before incrementing @@ -169,7 +167,7 @@ edge_triangle_count_impl( false /*FIXME: pass 'do_expensive_check' as argument*/); // Update the number of triangles of each (p, q) edges by looking at their intersection - // size + // size thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(0), @@ -184,52 +182,54 @@ edge_triangle_count_impl( if constexpr (multi_gpu) { // stores all the pairs (p, r) and (q, r) auto vertex_pair_buffer_tmp = allocate_dataframe_buffer>( - intersection_indices.size() * 2, handle.get_stream()); // *2 for both (p, r) and (q, r) + intersection_indices.size() * 2, handle.get_stream()); // *2 for both (p, r) and (q, r) // So that you shuffle only once - // tabulate with the size of intersection_indices, and call binary search on intersection_offsets - // to get (p, r). - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), - extract_p_r_q_r{ - prev_chunk_size, - 0, - raft::device_span( - intersection_offsets.data(), intersection_offsets.size()), - raft::device_span( - intersection_indices.data(), intersection_indices.size()), - edge_first - }); + // tabulate with the size of intersection_indices, and call binary search on + // intersection_offsets to get (p, r). + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), + extract_p_r_q_r{ + prev_chunk_size, + 0, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + edge_first}); // FIXME: Consolidate both functions - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + (2 * intersection_indices.size()), - extract_p_r_q_r{ - prev_chunk_size, - 1, - raft::device_span( - intersection_offsets.data(), intersection_offsets.size()), - raft::device_span( - intersection_indices.data(), intersection_indices.size()), - edge_first - }); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + intersection_indices.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_tmp) + (2 * intersection_indices.size()), + extract_p_r_q_r{ + prev_chunk_size, + 1, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + edge_first}); thrust::sort(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp), - get_dataframe_buffer_end(vertex_pair_buffer_tmp)); - - rmm::device_uvector increase_count_tmp(2 * intersection_indices.size(), handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), increase_count_tmp.begin(), increase_count_tmp.end(), size_t{1}); + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_end(vertex_pair_buffer_tmp)); + + rmm::device_uvector increase_count_tmp(2 * intersection_indices.size(), + handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + increase_count_tmp.begin(), + increase_count_tmp.end(), + size_t{1}); auto count_p_r_q_r = thrust::unique_count(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_tmp), - get_dataframe_buffer_end(vertex_pair_buffer_tmp)); + get_dataframe_buffer_begin(vertex_pair_buffer_tmp), + get_dataframe_buffer_end(vertex_pair_buffer_tmp)); rmm::device_uvector increase_count(count_p_r_q_r, handle.get_stream()); auto vertex_pair_buffer = allocate_dataframe_buffer>( - count_p_r_q_r, handle.get_stream()); + count_p_r_q_r, handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_tmp), get_dataframe_buffer_end(vertex_pair_buffer_tmp), @@ -243,54 +243,55 @@ edge_triangle_count_impl( std::optional> pair_count{std::nullopt}; std::optional> opt_increase_count = - std::make_optional(rmm::device_uvector(increase_count.size(), handle.get_stream())); + std::make_optional(rmm::device_uvector(increase_count.size(), handle.get_stream())); raft::copy((*opt_increase_count).begin(), - increase_count.begin(), - increase_count.size(), - handle.get_stream()); + increase_count.begin(), + increase_count.size(), + handle.get_stream()); // There are still multiple copies here but is it worth sorting and reducing again? - std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore) = shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer)), - std::move(std::get<1>(vertex_pair_buffer)), - std::nullopt, - // FIXME: Update 'shuffle_int_...' to support int32_t and int64_t values - std::move(opt_increase_count), - std::nullopt, - graph_view.vertex_partition_range_lasts()); - + std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore) = + shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer)), + std::move(std::get<1>(vertex_pair_buffer)), + std::nullopt, + // FIXME: Update 'shuffle_int_...' to support int32_t and int64_t values + std::move(opt_increase_count), + std::nullopt, + graph_view.vertex_partition_range_lasts()); + thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(0), thrust::make_counting_iterator(pair_srcs.size()), - [num_edges = edgelist_srcs.size(), + [num_edges = edgelist_srcs.size(), num_triangles = num_triangles.data(), - pair_srcs = pair_srcs.data(), - pair_dsts = pair_dsts.data(), - pair_count = (*pair_count).data(), - edge_first] - __device__(auto idx) { - auto src = pair_srcs[idx]; - auto dst = pair_dsts[idx]; + pair_srcs = pair_srcs.data(), + pair_dsts = pair_dsts.data(), + pair_count = (*pair_count).data(), + edge_first] __device__(auto idx) { + auto src = pair_srcs[idx]; + auto dst = pair_dsts[idx]; auto p_r_q_r_pair = thrust::make_tuple(src, dst); - + // Find its position in 'edges' auto itr_p_r_q_r = thrust::lower_bound(thrust::seq, edge_first, edge_first + num_edges, p_r_q_r_pair); - + assert(*itr_p_r_q_r == p_r_q_r_pair); auto idx_p_r_q_r = thrust::distance(edge_first, itr_p_r_q_r); - cuda::atomic_ref atomic_counter(num_triangles[idx_p_r_q_r]); + cuda::atomic_ref atomic_counter( + num_triangles[idx_p_r_q_r]); auto r = atomic_counter.fetch_add(pair_count[idx], cuda::std::memory_order_relaxed); - - } - ); + }); } else { - // Given intersection offsets and indices that are used to update the number of // triangles of (p, q) edges where `r`s are the intersection indices, update // the number of triangles of the pairs (p, r) and (q, r). @@ -303,7 +304,8 @@ edge_triangle_count_impl( 0, prev_chunk_size, raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), raft::device_span(num_triangles.data(), num_triangles.size()), edge_first}); @@ -316,7 +318,8 @@ edge_triangle_count_impl( 1, prev_chunk_size, raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), raft::device_span(num_triangles.data(), num_triangles.size()), edge_first}); } @@ -327,13 +330,11 @@ edge_triangle_count_impl( handle, graph_view); cugraph::edge_bucket_t valid_edges(handle); - valid_edges.insert(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin()); + valid_edges.insert(edgelist_srcs.begin(), edgelist_srcs.end(), edgelist_dsts.begin()); auto cur_graph_view = graph_view; - auto edge_last = edge_first + edgelist_srcs.size(); // FIXME: Remove this unnecessary variable + auto edge_last = edge_first + edgelist_srcs.size(); // FIXME: Remove this unnecessary variable cugraph::transform_e( handle, graph_view, @@ -343,13 +344,16 @@ edge_triangle_count_impl( cugraph::edge_dummy_property_t{}.view(), [edge_first, edge_last, - num_edges = edgelist_srcs.size(), - num_triangles = num_triangles.data()] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + num_edges = edgelist_srcs.size(), + num_triangles = num_triangles.data()] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) { auto pair = thrust::make_tuple(src, dst); // Find its position in 'edges' - auto itr_pair = - thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); + auto itr_pair = thrust::lower_bound(thrust::seq, edge_first, edge_last, pair); auto idx_pair = thrust::distance(edge_first, itr_pair); return num_triangles[idx_pair]; }, diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index a51aab86170..4e8483c0cab 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -672,13 +672,13 @@ k_truss(raft::handle_t const& handle, auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles) = - decompress_to_edgelist(handle, - cur_graph_view, - edge_weight_view, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional(prop_num_triangles.view()), - std::optional>(std::nullopt)); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles) = decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values + std::make_optional(prop_num_triangles.view()), + std::optional>(std::nullopt)); auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); @@ -726,12 +726,12 @@ k_truss(raft::handle_t const& handle, size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = num_invalid_edges; - - auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); + + auto num_chunks = + raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = - std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); thrust::sort_by_key(handle.get_thrust_policy(), edge_first + num_valid_edges, edge_first + edgelist_srcs.size(), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 02c6b4fa938..6cb5f3e18d5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -582,7 +582,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) - + ############################################################################################### # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------------------- ConfigureTest(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) diff --git a/cpp/tests/centrality/mg_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_betweenness_centrality_test.cpp index 9bf4a88f8b1..798e767085e 100644 --- a/cpp/tests/centrality/mg_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/mg_betweenness_centrality_test.cpp @@ -153,13 +153,14 @@ class Tests_MGBetweennessCentrality cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp index 1719842c7b6..1703f198a4c 100644 --- a/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp +++ b/cpp/tests/centrality/mg_edge_betweenness_centrality_test.cpp @@ -142,13 +142,14 @@ class Tests_MGEdgeBetweennessCentrality std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); if (handle_->get_comms().get_rank() == 0) { auto sg_edge_weights_view = diff --git a/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp b/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp index 0cea7e73ba6..76c52d52bfd 100644 --- a/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp +++ b/cpp/tests/centrality/mg_eigenvector_centrality_test.cpp @@ -145,13 +145,14 @@ class Tests_MGEigenvectorCentrality cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-2. run SG Eigenvector Centrality diff --git a/cpp/tests/centrality/mg_katz_centrality_test.cpp b/cpp/tests/centrality/mg_katz_centrality_test.cpp index acfacefb3b4..e38f87749b8 100644 --- a/cpp/tests/centrality/mg_katz_centrality_test.cpp +++ b/cpp/tests/centrality/mg_katz_centrality_test.cpp @@ -152,13 +152,14 @@ class Tests_MGKatzCentrality cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 4-2. run SG Katz Centrality diff --git a/cpp/tests/community/mg_ecg_test.cpp b/cpp/tests/community/mg_ecg_test.cpp index 422bb055b92..c99f83fa2e8 100644 --- a/cpp/tests/community/mg_ecg_test.cpp +++ b/cpp/tests/community/mg_ecg_test.cpp @@ -127,13 +127,14 @@ class Tests_MGEcg : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); // crate a SG graph with MG graph vertex IDs + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); // crate a SG graph with MG graph vertex IDs auto const comm_rank = handle_->get_comms().get_rank(); if (comm_rank == 0) { diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp index a8bef9e17aa..41a4beb7464 100644 --- a/cpp/tests/community/mg_edge_triangle_count_test.cpp +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -89,26 +89,25 @@ class Tests_MGEdgeTriangleCount } auto mg_graph_view = mg_graph.view(); - + std::optional> edge_mask{std::nullopt}; if (edge_triangle_count_usecase.edge_masking_) { edge_mask = cugraph::test::generate::edge_property( *handle_, mg_graph_view, 2); mg_graph_view.attach_edge_mask((*edge_mask).view()); } - // 2. run MG EdgeTriangleCount - + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); hr_timer.start("MG EdgeTriangleCount"); } - + auto d_mg_cugraph_results = cugraph::edge_triangle_count(*handle_, mg_graph_view); - + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); @@ -117,27 +116,28 @@ class Tests_MGEdgeTriangleCount } // 3. Compare SG & MG results - + if (edge_triangle_count_usecase.check_correctness_) { // 3-1. Convert to SG graph - + cugraph::graph_t sg_graph(*handle_); std::optional< cugraph::edge_property_t, edge_t>> d_sg_cugraph_results{std::nullopt}; - std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values - std::make_optional(d_mg_cugraph_results.view()), - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values + std::make_optional(d_mg_cugraph_results.view()), + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - + auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts] = cugraph::decompress_to_edgelist( *handle_, @@ -145,23 +145,28 @@ class Tests_MGEdgeTriangleCount std::optional>{std::nullopt}, // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values std::make_optional((*d_sg_cugraph_results).view()), - std::optional>{std::nullopt}); // FIXME: No longer needed + std::optional>{ + std::nullopt}); // FIXME: No longer needed // 3-3. Run SG EdgeTriangleCount - - auto ref_d_sg_cugraph_results = cugraph::edge_triangle_count(*handle_, sg_graph.view()); - auto [ref_edgelist_srcs, ref_edgelist_dsts, ref_d_edgelist_weights, ref_d_edge_triangle_counts] = + + auto ref_d_sg_cugraph_results = + cugraph::edge_triangle_count(*handle_, sg_graph.view()); + auto [ref_edgelist_srcs, + ref_edgelist_dsts, + ref_d_edgelist_weights, + ref_d_edge_triangle_counts] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), std::optional>{std::nullopt}, std::make_optional(ref_d_sg_cugraph_results.view()), - std::optional>{std::nullopt}); // FIXME: No longer needed + std::optional>{ + std::nullopt}); // FIXME: No longer needed // 3-4. Compare - - auto h_mg_edge_triangle_counts = - cugraph::test::to_host(*handle_, *d_edge_triangle_counts); + + auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); auto h_sg_edge_triangle_counts = cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); @@ -219,12 +224,13 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGEdgeTriangleCount_Rmat, - ::testing::Combine(::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{false, true, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + EdgeTriangleCount_Usecase{false, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -236,5 +242,5 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( ::testing::Values(EdgeTriangleCount_Usecase{false, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); - + CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_egonet_test.cu b/cpp/tests/community/mg_egonet_test.cu index 3aaf749f01a..ac363df3ec5 100644 --- a/cpp/tests/community/mg_egonet_test.cu +++ b/cpp/tests/community/mg_egonet_test.cu @@ -200,13 +200,14 @@ class Tests_MGEgonet } auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto d_mg_aggregate_edgelist_offsets = diff --git a/cpp/tests/community/mg_leiden_test.cpp b/cpp/tests/community/mg_leiden_test.cpp index 73854d7e5e8..65f4827ba06 100644 --- a/cpp/tests/community/mg_leiden_test.cpp +++ b/cpp/tests/community/mg_leiden_test.cpp @@ -87,13 +87,14 @@ class Tests_MGLeiden std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); // crate an SG graph with MG graph vertex IDs + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); // crate an SG graph with MG graph vertex IDs // FIXME: We need to figure out how to test each iteration of // SG vs MG Leiden, possibly by passing results of refinement phase diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index d45d71cd053..106ad2562f7 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -85,13 +85,14 @@ class Tests_MGLouvain std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); // crate an SG graph with MG graph vertex IDs + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); // crate an SG graph with MG graph vertex IDs weight_t sg_modularity{-1.0}; diff --git a/cpp/tests/community/mg_triangle_count_test.cpp b/cpp/tests/community/mg_triangle_count_test.cpp index 06134e87ddd..932ff5050f1 100644 --- a/cpp/tests/community/mg_triangle_count_test.cpp +++ b/cpp/tests/community/mg_triangle_count_test.cpp @@ -178,14 +178,15 @@ class Tests_MGTriangleCount d_mg_triangle_counts.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 4-2. run SG TriangleCount diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp index 5919ac15925..368fea68877 100644 --- a/cpp/tests/components/mg_weakly_connected_components_test.cpp +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -125,14 +125,15 @@ class Tests_MGWeaklyConnectedComponents raft::device_span(d_mg_components.data(), d_mg_components.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-2. run SG weakly connected components diff --git a/cpp/tests/cores/mg_core_number_test.cpp b/cpp/tests/cores/mg_core_number_test.cpp index 2fb221e2d04..f8294d81fdf 100644 --- a/cpp/tests/cores/mg_core_number_test.cpp +++ b/cpp/tests/cores/mg_core_number_test.cpp @@ -143,14 +143,15 @@ class Tests_MGCoreNumber raft::device_span(d_mg_core_numbers.data(), d_mg_core_numbers.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-2. run SG CoreNumber diff --git a/cpp/tests/cores/mg_k_core_test.cpp b/cpp/tests/cores/mg_k_core_test.cpp index a6dcb68b726..28bc445bda8 100644 --- a/cpp/tests/cores/mg_k_core_test.cpp +++ b/cpp/tests/cores/mg_k_core_test.cpp @@ -161,13 +161,14 @@ class Tests_MGKCore : public ::testing::TestWithParam(d_mg_core_numbers.data(), d_mg_core_numbers.size())); auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/link_analysis/mg_hits_test.cpp b/cpp/tests/link_analysis/mg_hits_test.cpp index d3350f2e8ec..40a439ffc4c 100644 --- a/cpp/tests/link_analysis/mg_hits_test.cpp +++ b/cpp/tests/link_analysis/mg_hits_test.cpp @@ -186,14 +186,15 @@ class Tests_MGHits : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-3. run SG Hits diff --git a/cpp/tests/link_analysis/mg_pagerank_test.cpp b/cpp/tests/link_analysis/mg_pagerank_test.cpp index cd71e08a691..26136c8c9d2 100644 --- a/cpp/tests/link_analysis/mg_pagerank_test.cpp +++ b/cpp/tests/link_analysis/mg_pagerank_test.cpp @@ -202,14 +202,15 @@ class Tests_MGPageRank std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>((*d_mg_renumber_map).data(), - (*d_mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*d_mg_renumber_map).data(), + (*d_mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 4-2. run SG PageRank diff --git a/cpp/tests/prims/mg_count_if_e.cu b/cpp/tests/prims/mg_count_if_e.cu index 3224550cc77..137f7db8625 100644 --- a/cpp/tests/prims/mg_count_if_e.cu +++ b/cpp/tests/prims/mg_count_if_e.cu @@ -149,14 +149,15 @@ class Tests_MGCountIfE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_count_if_v.cu b/cpp/tests/prims/mg_count_if_v.cu index 9c6fc2ff36c..e3f30e37729 100644 --- a/cpp/tests/prims/mg_count_if_v.cu +++ b/cpp/tests/prims/mg_count_if_v.cu @@ -123,14 +123,15 @@ class Tests_MGCountIfV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index d6ab69680f0..20e87070fa5 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -253,14 +253,15 @@ class Tests_MGExtractTransformE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*d_mg_renumber_map_labels).data(), - (*d_mg_renumber_map_labels).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*d_mg_renumber_map_labels).data(), + (*d_mg_renumber_map_labels).size()), + false); rmm::device_uvector sg_vertex_prop(0, handle_->get_stream()); std::tie(std::ignore, sg_vertex_prop) = cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( diff --git a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu index f6db4978841..9e7611190ae 100644 --- a/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu +++ b/cpp/tests/prims/mg_extract_transform_v_frontier_outgoing_e.cu @@ -283,14 +283,15 @@ class Tests_MGExtractTransformVFrontierOutgoingE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*d_mg_renumber_map_labels).data(), - (*d_mg_renumber_map_labels).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*d_mg_renumber_map_labels).data(), + (*d_mg_renumber_map_labels).size()), + false); rmm::device_uvector sg_vertex_prop(0, handle_->get_stream()); std::tie(std::ignore, sg_vertex_prop) = cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 59b37ded432..75b711fbd9c 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -226,14 +226,15 @@ class Tests_MGPerVPairTransformDstNbrIntersection *handle_, std::get<1>(mg_result_buffer).data(), std::get<1>(mg_result_buffer).size()); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 1ff4145fc4f..48bbc6176d8 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -258,16 +258,17 @@ class Tests_MGPerVPairTransformDstNbrIntersection weight_t>> sg_edge_weight{std::nullopt}; - std::tie(sg_graph, sg_edge_weight, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight - ? std::make_optional(mg_edge_weight_view) - : std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weight, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight + ? std::make_optional(mg_edge_weight_view) + : std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 84b3ba64b80..5df8721a588 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -282,14 +282,15 @@ class Tests_MGPerVRandomSelectTransformOutgoingE } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { std::optional> mg_aggregate_sample_offsets{std::nullopt}; diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu index 738c15afbc3..fd9192dcce5 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -297,14 +297,15 @@ class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE std::optional< cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); for (size_t i = 0; i < reduction_types.size(); ++i) { auto mg_aggregate_results = diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index cd329aa1e5f..be29c793ad5 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -271,14 +271,15 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); for (size_t i = 0; i < reduction_types.size(); ++i) { auto mg_aggregate_in_results = diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu index 7b80aac6d9f..e91db5fa6ad 100644 --- a/cpp/tests/prims/mg_reduce_v.cu +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -163,14 +163,15 @@ class Tests_MGReduceV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu index f4f401a4fc2..4fac6ef3be7 100644 --- a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu @@ -174,14 +174,15 @@ class Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV raft::device_span(mg_result_buffer.data(), mg_result_buffer.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index fd4110d6f91..4785a8bb01b 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -159,14 +159,15 @@ class Tests_MGTransformReduceE if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu index dd831d81433..9950b5bdbf4 100644 --- a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -237,14 +237,15 @@ class Tests_MGTransformReduceEBySrcDstKey cugraph::get_dataframe_buffer_begin(mg_aggregate_by_dst_values)); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index 39885c644e8..f6f07bc03ab 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -169,14 +169,15 @@ class Tests_MGTransformReduceV if (prims_usecase.check_correctness) { cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu index ca09cdac696..335a7ec879c 100644 --- a/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu +++ b/cpp/tests/prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu @@ -292,14 +292,15 @@ class Tests_MGTransformReduceVFrontierOutgoingEByDst } cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { if constexpr (std::is_same_v) { diff --git a/cpp/tests/structure/mg_coarsen_graph_test.cpp b/cpp/tests/structure/mg_coarsen_graph_test.cpp index 077c2560cb3..471773d71bd 100644 --- a/cpp/tests/structure/mg_coarsen_graph_test.cpp +++ b/cpp/tests/structure/mg_coarsen_graph_test.cpp @@ -330,13 +330,14 @@ class Tests_MGCoarsenGraph cugraph::edge_property_t, weight_t>> sg_edge_weights{std::nullopt}; - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); cugraph::graph_t sg_coarse_graph(*handle_); std::optional< diff --git a/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp b/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp index 7efafa30963..61f40049e31 100644 --- a/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp +++ b/cpp/tests/structure/mg_count_self_loops_and_multi_edges_test.cpp @@ -126,14 +126,15 @@ class Tests_MGCountSelfLoopsAndMultiEdges // 3-1. aggregate MG results cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp index 0c7065961e6..3d3d881fb23 100644 --- a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp +++ b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp @@ -204,14 +204,15 @@ class Tests_MGHasEdgeAndComputeMultiplicity d_mg_edge_multiplicities.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); diff --git a/cpp/tests/structure/mg_induced_subgraph_test.cu b/cpp/tests/structure/mg_induced_subgraph_test.cu index 32f9430713f..2ed909b9955 100644 --- a/cpp/tests/structure/mg_induced_subgraph_test.cu +++ b/cpp/tests/structure/mg_induced_subgraph_test.cu @@ -214,13 +214,14 @@ class Tests_MGInducedSubgraph true, handle_->get_stream()); - auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - false); + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + false); if (my_rank == 0) { auto d_sg_subgraph_offsets = cugraph::test::to_device(*handle_, h_sg_subgraph_offsets); diff --git a/cpp/tests/structure/mg_symmetrize_test.cpp b/cpp/tests/structure/mg_symmetrize_test.cpp index 8949ba07bec..09174dcbf72 100644 --- a/cpp/tests/structure/mg_symmetrize_test.cpp +++ b/cpp/tests/structure/mg_symmetrize_test.cpp @@ -88,14 +88,15 @@ class Tests_MGSymmetrize weight_t>> sg_edge_weights{std::nullopt}; if (symmetrize_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph.view(), - mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph.view(), + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); } // 3. run MG symmetrize diff --git a/cpp/tests/structure/mg_transpose_storage_test.cpp b/cpp/tests/structure/mg_transpose_storage_test.cpp index febf446779c..86bf18cd019 100644 --- a/cpp/tests/structure/mg_transpose_storage_test.cpp +++ b/cpp/tests/structure/mg_transpose_storage_test.cpp @@ -87,14 +87,15 @@ class Tests_MGTransposeStorage weight_t>> sg_edge_weights{std::nullopt}; if (transpose_storage_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph.view(), - mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph.view(), + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); } // 2. run MG transpose storage diff --git a/cpp/tests/structure/mg_transpose_test.cpp b/cpp/tests/structure/mg_transpose_test.cpp index 2ee343f26a4..5f27adb8caf 100644 --- a/cpp/tests/structure/mg_transpose_test.cpp +++ b/cpp/tests/structure/mg_transpose_test.cpp @@ -87,14 +87,15 @@ class Tests_MGTranspose weight_t>> sg_edge_weights{std::nullopt}; if (transpose_usecase.check_correctness) { - std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph.view(), - mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph.view(), + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); } // 3. run MG transpose diff --git a/cpp/tests/traversal/mg_bfs_test.cpp b/cpp/tests/traversal/mg_bfs_test.cpp index 6632e1506e7..1b63ad3b085 100644 --- a/cpp/tests/traversal/mg_bfs_test.cpp +++ b/cpp/tests/traversal/mg_bfs_test.cpp @@ -183,14 +183,15 @@ class Tests_MGBFS : public ::testing::TestWithParam sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-3. run SG BFS diff --git a/cpp/tests/traversal/mg_extract_bfs_paths_test.cu b/cpp/tests/traversal/mg_extract_bfs_paths_test.cu index 13e7e885c8f..476a6ffab8f 100644 --- a/cpp/tests/traversal/mg_extract_bfs_paths_test.cu +++ b/cpp/tests/traversal/mg_extract_bfs_paths_test.cu @@ -237,14 +237,15 @@ class Tests_MGExtractBFSPaths cugraph::test::device_gatherv(*handle_, d_mg_paths.data(), d_mg_paths.size()); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // run SG extract_bfs_paths diff --git a/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp b/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp index f51fee68078..64674fb3799 100644 --- a/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp +++ b/cpp/tests/traversal/mg_k_hop_nbrs_test.cpp @@ -178,14 +178,15 @@ class Tests_MGKHopNbrs *handle_, raft::device_span(d_mg_nbrs.data(), d_mg_nbrs.size())); cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - false); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-3. run SG K-hop neighbors diff --git a/cpp/tests/traversal/mg_sssp_test.cpp b/cpp/tests/traversal/mg_sssp_test.cpp index 84fe8daf346..9ad16d1c947 100644 --- a/cpp/tests/traversal/mg_sssp_test.cpp +++ b/cpp/tests/traversal/mg_sssp_test.cpp @@ -177,13 +177,14 @@ class Tests_MGSSSP : public ::testing::TestWithParam, weight_t>> sg_edge_weights{std::nullopt}; std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore) = - cugraph::test::mg_graph_to_sg_graph(*handle_, - mg_graph_view, - mg_edge_weight_view, - std::optional>{std::nullopt}, - std::make_optional>( - (*mg_renumber_map).data(), (*mg_renumber_map).size()), - false); + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + mg_edge_weight_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); if (handle_->get_comms().get_rank() == int{0}) { // 3-3. run SG SSSP diff --git a/cpp/tests/utilities/conversion_utilities.hpp b/cpp/tests/utilities/conversion_utilities.hpp index 1d0ff4c76a7..24a8ecbe4fd 100644 --- a/cpp/tests/utilities/conversion_utilities.hpp +++ b/cpp/tests/utilities/conversion_utilities.hpp @@ -216,14 +216,15 @@ graph_to_host_csc( // Only the rank 0 GPU holds the valid data template -std::tuple, - std::optional, - weight_t>>, - std::optional, - edge_t>>, - std::optional>> +std::tuple< + cugraph::graph_t, + std::optional< + cugraph::edge_property_t, + weight_t>>, + std::optional< + cugraph::edge_property_t, + edge_t>>, + std::optional>> mg_graph_to_sg_graph( raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, diff --git a/cpp/tests/utilities/conversion_utilities_impl.cuh b/cpp/tests/utilities/conversion_utilities_impl.cuh index 1eb1a80e2cc..2a023d57693 100644 --- a/cpp/tests/utilities/conversion_utilities_impl.cuh +++ b/cpp/tests/utilities/conversion_utilities_impl.cuh @@ -296,11 +296,7 @@ mg_graph_to_sg_graph( std::optional> d_edge_id{std::nullopt}; std::tie(d_src, d_dst, d_wgt, d_edge_id) = cugraph::decompress_to_edgelist( - handle, - graph_view, - edge_weight_view, - edge_id_view, - renumber_map); + handle, graph_view, edge_weight_view, edge_id_view, renumber_map); d_src = cugraph::test::device_gatherv( handle, raft::device_span{d_src.data(), d_src.size()}); @@ -361,7 +357,10 @@ mg_graph_to_sg_graph( } } - return std::make_tuple(std::move(sg_graph), std::move(sg_edge_weights), std::move(sg_edge_ids), std::move(sg_number_map)); + return std::make_tuple(std::move(sg_graph), + std::move(sg_edge_weights), + std::move(sg_edge_ids), + std::move(sg_number_map)); } template From b5d069ac8ce9c3a19b420f44bcf45c28ef84b0dd Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 12 May 2024 08:35:02 -0700 Subject: [PATCH 28/93] fix typo --- cpp/tests/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6cb5f3e18d5..b9985a48fab 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -482,6 +482,10 @@ ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) # - K-truss tests -------------------------------------------------------------------------- ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) +################################################################################################### +# - Triangle Count tests -------------------------------------------------------------------------- +ConfigureTest(TRIANGLE_COUNT_TEST community/triangle_count_test.cpp) + ################################################################################################### # - Edge Triangle Count tests -------------------------------------------------------------------------- ConfigureTest(EDGE_TRIANGLE_COUNT_TEST community/edge_triangle_count_test.cpp) From 8ca19cd31f43f74526610cc80016c20d53351059 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:01:47 -0700 Subject: [PATCH 29/93] fix improper cast --- cpp/src/community/k_truss_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 4e8483c0cab..9c7646a1b3a 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -541,7 +541,7 @@ k_truss(raft::handle_t const& handle, std::tie(srcs, dsts, wgts) = k_core(handle, cur_graph_view, edge_weight_view, - size_t{k - 1}, + k - 1, std::make_optional(k_core_degree_type_t::OUT), std::make_optional(core_number_span)); From 0ca2953791fa769a88768fcd736d9cf1165ae629 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:06:08 -0700 Subject: [PATCH 30/93] remove temporary variable --- cpp/src/community/edge_triangle_count_impl.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 30eea336c10..6b10998a7e8 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -334,7 +334,6 @@ edge_property_t, edge_t> edge_t auto cur_graph_view = graph_view; - auto edge_last = edge_first + edgelist_srcs.size(); // FIXME: Remove this unnecessary variable cugraph::transform_e( handle, graph_view, @@ -343,7 +342,7 @@ edge_property_t, edge_t> edge_t cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), [edge_first, - edge_last, + edge_last = edge_first + edgelist_srcs.size(), num_edges = edgelist_srcs.size(), num_triangles = num_triangles.data()] __device__(auto src, auto dst, From 56118f337b7b643fbc1c5498b3a452358aef2dc8 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:15:00 -0700 Subject: [PATCH 31/93] remove weights test --- cpp/tests/community/edge_triangle_count_test.cpp | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index cf1dbec263b..4af197dd3e6 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -41,7 +41,6 @@ #include struct EdgeTriangleCount_Usecase { - bool test_weighted_{false}; bool edge_masking_{false}; bool check_correctness_{true}; }; @@ -134,7 +133,7 @@ class Tests_EdgeTriangleCount auto [graph, edge_weight, d_renumber_map_labels] = cugraph::test::construct_graph( - handle, input_usecase, edge_triangle_count_usecase.test_weighted_, renumber, true, true); + handle, input_usecase, false, renumber, true, true); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -229,9 +228,9 @@ INSTANTIATE_TEST_SUITE_P( Tests_EdgeTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, EdgeTriangleCount_Usecase{ - true, false, true}), // FIXME: Still debugging edge_mask + true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -240,8 +239,8 @@ INSTANTIATE_TEST_SUITE_P( Tests_EdgeTriangleCount_Rmat, // enable correctness checks ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{true, true, true}), + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -254,7 +253,7 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false, false}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(16, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From 3463723772fe207ed19cee3385567dfc09b3bbfd Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:22:30 -0700 Subject: [PATCH 32/93] add edge mask test for rmat benchmark --- cpp/tests/community/edge_triangle_count_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 4af197dd3e6..2ce287c83d9 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -253,7 +253,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false}, + EdgeTriangleCount_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(16, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From f047a41cbe81556ba300980e5794f9256aaf377a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:48:51 -0700 Subject: [PATCH 33/93] update mg tests --- cpp/tests/community/mg_edge_triangle_count_test.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp index 41a4beb7464..55eee401f6b 100644 --- a/cpp/tests/community/mg_edge_triangle_count_test.cpp +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -40,7 +40,6 @@ #include struct EdgeTriangleCount_Usecase { - bool test_weighted_{false}; bool edge_masking_{false}; bool check_correctness_{true}; }; @@ -219,8 +218,8 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGEdgeTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{false, true, true}), + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -228,8 +227,8 @@ INSTANTIATE_TEST_SUITE_P( rmat_small_tests, Tests_MGEdgeTriangleCount_Rmat, ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false, true}, - EdgeTriangleCount_Usecase{false, true, true}), + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -240,7 +239,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGEdgeTriangleCount_Rmat, ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false, false}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false}, + EdgeTriangleCount_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 52bc2733132d4b371231a76716e5e8d05c928c6e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 08:51:50 -0700 Subject: [PATCH 34/93] fix style --- cpp/tests/community/edge_triangle_count_test.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 2ce287c83d9..132eeeb9c54 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -229,8 +229,7 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // enable correctness checks ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{ - true, true}), + EdgeTriangleCount_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); From 2030ed879c30f5fdc320ddf45f7f758febd3eeb5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 09:44:36 -0700 Subject: [PATCH 35/93] fix style --- cpp/src/community/k_truss_impl.cuh | 17 +++++++++-------- .../community/edge_triangle_count_test.cpp | 2 +- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index be9ac3d72bc..f830e6a7700 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -672,14 +672,15 @@ k_truss(raft::handle_t const& handle, auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles, std::ignore) = decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional(prop_num_triangles.view()), - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles, std::ignore) = + decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values + std::make_optional(prop_num_triangles.view()), + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); auto transposed_edge_first = thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); diff --git a/cpp/tests/community/edge_triangle_count_test.cpp b/cpp/tests/community/edge_triangle_count_test.cpp index 9afd03e0fe1..8cefc2c31f4 100644 --- a/cpp/tests/community/edge_triangle_count_test.cpp +++ b/cpp/tests/community/edge_triangle_count_test.cpp @@ -163,7 +163,7 @@ class Tests_EdgeTriangleCount graph_view, std::optional>{std::nullopt}, std::make_optional(d_cugraph_results.view()), - std::optional> {std::nullopt}, + std::optional>{std::nullopt}, std::optional>{std::nullopt}); // FIXME: No longer needed if (cugraph::test::g_perf) { From 1c4ee110cd00f3d276b9fbf15777ce019fd8c66b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 15:30:54 -0700 Subject: [PATCH 36/93] update tests --- cpp/tests/community/mg_edge_triangle_count_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp index 55eee401f6b..483ca51490d 100644 --- a/cpp/tests/community/mg_edge_triangle_count_test.cpp +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -137,13 +137,14 @@ class Tests_MGEdgeTriangleCount if (handle_->get_comms().get_rank() == int{0}) { // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts] = + auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts, d_edgelist_type] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), std::optional>{std::nullopt}, // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values std::make_optional((*d_sg_cugraph_results).view()), + std::optional>{std::nullopt}, std::optional>{ std::nullopt}); // FIXME: No longer needed From 5b1c248bd31047bd6c9f6abf7224606607e7f41f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 14 May 2024 15:32:51 -0700 Subject: [PATCH 37/93] fix style --- cpp/tests/community/mg_edge_triangle_count_test.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/tests/community/mg_edge_triangle_count_test.cpp b/cpp/tests/community/mg_edge_triangle_count_test.cpp index 483ca51490d..888134fa972 100644 --- a/cpp/tests/community/mg_edge_triangle_count_test.cpp +++ b/cpp/tests/community/mg_edge_triangle_count_test.cpp @@ -137,7 +137,11 @@ class Tests_MGEdgeTriangleCount if (handle_->get_comms().get_rank() == int{0}) { // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts, d_edgelist_type] = + auto [edgelist_srcs, + edgelist_dsts, + d_edgelist_weights, + d_edge_triangle_counts, + d_edgelist_type] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), From 0ab790678a5da2174fc19416d46781fa692687c0 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 15 May 2024 18:27:24 -0700 Subject: [PATCH 38/93] update fixme --- cpp/src/community/edge_triangle_count_impl.cuh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 58acd990811..c4277e240be 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -182,8 +182,7 @@ edge_property_t, edge_t> edge_t if constexpr (multi_gpu) { // stores all the pairs (p, r) and (q, r) auto vertex_pair_buffer_tmp = allocate_dataframe_buffer>( - intersection_indices.size() * 2, handle.get_stream()); // *2 for both (p, r) and (q, r) - // So that you shuffle only once + intersection_indices.size() * 2, handle.get_stream()); // tabulate with the size of intersection_indices, and call binary search on // intersection_offsets to get (p, r). @@ -260,7 +259,7 @@ edge_property_t, edge_t> edge_t std::move(std::get<0>(vertex_pair_buffer)), std::move(std::get<1>(vertex_pair_buffer)), std::nullopt, - // FIXME: Update 'shuffle_int_...' to support int32_t and int64_t values + // FIXME: Add general purpose function for shuffling vertex pairs and arbitrary attributes std::move(opt_increase_count), std::nullopt, graph_view.vertex_partition_range_lasts()); From 29ed50cdb9de2cced4fa0cb40d017f813a5afa47 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 24 May 2024 11:44:23 -0700 Subject: [PATCH 39/93] update ktruss implementation --- cpp/CMakeLists.txt | 1 + cpp/src/community/k_truss_impl.cuh | 1729 +++++++++++------ cpp/src/community/k_truss_mg.cu | 78 + cpp/src/community/k_truss_sg.cu | 3 +- cpp/tests/CMakeLists.txt | 10 +- cpp/tests/community/k_truss_test.cpp | 37 +- cpp/tests/community/mg_k_truss_test.cpp | 249 +++ .../community/mg_triangle_count_test.cpp | 242 +-- 8 files changed, 1611 insertions(+), 738 deletions(-) create mode 100644 cpp/src/community/k_truss_mg.cu create mode 100644 cpp/tests/community/mg_k_truss_test.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0a2aabcb2ca..1dbeb61bc7e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -205,6 +205,7 @@ set(CUGRAPH_SOURCES src/community/egonet_sg.cu src/community/egonet_mg.cu src/community/k_truss_sg.cu + #src/community/k_truss_mg.cu src/sampling/random_walks.cu src/sampling/random_walks_sg.cu src/sampling/detail/prepare_next_frontier_sg.cu diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index f830e6a7700..b9aa857bb85 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" #include "prims/fill_edge_property.cuh" @@ -41,350 +42,46 @@ namespace cugraph { -template -struct unroll_edge { - size_t num_valid_edges{}; - raft::device_span num_triangles{}; - EdgeIterator edge_to_unroll_first{}; - EdgeIterator transposed_valid_edge_first{}; - EdgeIterator transposed_valid_edge_last{}; - EdgeIterator transposed_invalid_edge_last{}; +template +struct extract_weak_edges { + edge_t k{}; + __device__ thrust::optional> operator()( + vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const + { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + return count < k - 2 + ? thrust::optional>{thrust::make_tuple(src, dst, count)} + : thrust::nullopt; + } +}; - __device__ thrust::tuple operator()(edge_t i) const +template +struct extract_edges { + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { - // edges are sorted with destination as key so reverse the edge when looking it - auto pair = thrust::make_tuple(thrust::get<1>(*(edge_to_unroll_first + i)), - thrust::get<0>(*(edge_to_unroll_first + i))); - // Find its position in either partition of the transposed edgelist - // An edge can be in found in either of the two partitions (valid or invalid) - auto itr = thrust::lower_bound( - thrust::seq, transposed_valid_edge_last, transposed_invalid_edge_last, pair); - size_t idx{}; - if (itr != transposed_invalid_edge_last && *itr == pair) { - idx = - static_cast(thrust::distance(transposed_valid_edge_last, itr) + num_valid_edges); - } else { - // The edge must be in the first boundary - itr = thrust::lower_bound( - thrust::seq, transposed_valid_edge_first, transposed_valid_edge_last, pair); - assert(*itr == pair); - idx = thrust::distance(transposed_valid_edge_first, itr); - } - cuda::atomic_ref atomic_counter(num_triangles[idx]); - auto r = atomic_counter.fetch_sub(edge_t{1}, cuda::std::memory_order_relaxed); + //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return thrust::make_tuple(src, dst, count); } }; -// FIXME: May re-locate this function as a general utility function for graph algorithm -// implementations. template -rmm::device_uvector compute_prefix_sum(raft::handle_t const& handle, - raft::device_span sorted_vertices, - raft::device_span query_vertices) -{ - rmm::device_uvector prefix_sum(query_vertices.size() + 1, handle.get_stream()); - - auto count_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_t{0}), - cuda::proclaim_return_type( - [query_vertices, - num_edges = sorted_vertices.size(), - sorted_vertices = sorted_vertices.begin()] __device__(size_t idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, sorted_vertices, sorted_vertices + num_edges, query_vertices[idx]); - - auto itr_upper = thrust::upper_bound( - thrust::seq, itr_lower, sorted_vertices + num_edges, query_vertices[idx]); - vertex_t dist = thrust::distance(itr_lower, itr_upper); - - return dist; - })); - - thrust::exclusive_scan(handle.get_thrust_policy(), - count_first, - count_first + query_vertices.size() + 1, - prefix_sum.begin()); - - return prefix_sum; -} - -template -edge_t remove_overcompensating_edges(raft::handle_t const& handle, - size_t buffer_size, - EdgeIterator potential_closing_or_incoming_edges, - EdgeIterator incoming_or_potential_closing_edges, - raft::device_span invalid_edgelist_srcs, - raft::device_span invalid_edgelist_dsts) -{ - // To avoid over-compensating, check whether the 'potential_closing_edges' - // are within the invalid edges. If yes, the was already unrolled - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - thrust::make_zip_iterator(potential_closing_or_incoming_edges + buffer_size, - incoming_or_potential_closing_edges + buffer_size), - [num_invalid_edges = invalid_edgelist_dsts.size(), - invalid_first = - thrust::make_zip_iterator(invalid_edgelist_dsts.begin(), invalid_edgelist_srcs.begin()), - invalid_last = thrust::make_zip_iterator(invalid_edgelist_dsts.end(), - invalid_edgelist_srcs.end())] __device__(auto e) { - auto potential_edge = thrust::get<0>(e); - auto transposed_potential_or_incoming_edge = - thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); - auto itr = thrust::lower_bound( - thrust::seq, invalid_first, invalid_last, transposed_potential_or_incoming_edge); - return (itr != invalid_last && *itr == transposed_potential_or_incoming_edge); - }); - - auto dist = thrust::distance(thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - edges_not_overcomp); - - return dist; -} - -template -void unroll_p_r_or_q_r_edges(raft::handle_t const& handle, - graph_view_t& graph_view, - size_t num_invalid_edges, - size_t num_valid_edges, - raft::device_span edgelist_srcs, - raft::device_span edgelist_dsts, - raft::device_span num_triangles) -{ - auto prefix_sum_valid = compute_prefix_sum( - handle, - raft::device_span(edgelist_dsts.data(), num_valid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - auto prefix_sum_invalid = compute_prefix_sum( - handle, - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - auto potential_closing_edges = allocate_dataframe_buffer>( - prefix_sum_valid.back_element(handle.get_stream()) + - prefix_sum_invalid.back_element(handle.get_stream()), - handle.get_stream()); - - auto incoming_edges_to_r = allocate_dataframe_buffer>( - prefix_sum_valid.back_element(handle.get_stream()) + - prefix_sum_invalid.back_element(handle.get_stream()), - handle.get_stream()); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_invalid_edges), - [num_valid_edges, - num_invalid_edges, - invalid_dst_first = edgelist_dsts.begin() + num_valid_edges, - invalid_src_first = edgelist_srcs.begin() + num_valid_edges, - valid_src_first = edgelist_srcs.begin(), - valid_dst_first = edgelist_dsts.begin(), - prefix_sum_valid = prefix_sum_valid.data(), - prefix_sum_invalid = prefix_sum_invalid.data(), - potential_closing_edges = get_dataframe_buffer_begin(potential_closing_edges), - incoming_edges_to_r = get_dataframe_buffer_begin(incoming_edges_to_r)] __device__(auto idx) { - auto src = invalid_src_first[idx]; - auto dst = invalid_dst_first[idx]; - auto dst_array_end_valid = valid_dst_first + num_valid_edges; - - auto itr_lower_valid = - thrust::lower_bound(thrust::seq, valid_dst_first, dst_array_end_valid, dst); - auto idx_lower_valid = thrust::distance( - valid_dst_first, - itr_lower_valid); // Need a binary search to find the begining of the range - - auto invalid_end_dst = invalid_dst_first + num_invalid_edges; - - auto itr_lower_invalid = - thrust::lower_bound(thrust::seq, invalid_dst_first, invalid_end_dst, dst); - auto idx_lower_invalid = thrust::distance( - invalid_dst_first, - itr_lower_invalid); // Need a binary search to find the begining of the range - - auto incoming_edges_to_r_first_valid = thrust::make_zip_iterator( - valid_src_first + idx_lower_valid, thrust::make_constant_iterator(dst)); - thrust::copy( - thrust::seq, - incoming_edges_to_r_first_valid, - incoming_edges_to_r_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - incoming_edges_to_r + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto incoming_edges_to_r_first_invalid = thrust::make_zip_iterator( - invalid_src_first + idx_lower_invalid, thrust::make_constant_iterator(dst)); - thrust::copy( - thrust::seq, - incoming_edges_to_r_first_invalid, - incoming_edges_to_r_first_invalid + (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - - incoming_edges_to_r + prefix_sum_invalid[idx] + prefix_sum_valid[idx + 1]); - - if constexpr (is_q_r_edge) { - auto potential_closing_edges_first_valid = thrust::make_zip_iterator( - valid_src_first + idx_lower_valid, thrust::make_constant_iterator(src)); - thrust::copy( - thrust::seq, - potential_closing_edges_first_valid, - potential_closing_edges_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - potential_closing_edges + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto potential_closing_edges_first_invalid = thrust::make_zip_iterator( - invalid_src_first + idx_lower_invalid, thrust::make_constant_iterator(src)); - thrust::copy(thrust::seq, - potential_closing_edges_first_invalid, - potential_closing_edges_first_invalid + - (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - potential_closing_edges + prefix_sum_invalid[idx] + prefix_sum_valid[idx + 1]); - - } else { - auto potential_closing_edges_first_valid = thrust::make_zip_iterator( - thrust::make_constant_iterator(src), valid_src_first + idx_lower_valid); - thrust::copy( - thrust::seq, - potential_closing_edges_first_valid, - potential_closing_edges_first_valid + (prefix_sum_valid[idx + 1] - prefix_sum_valid[idx]), - potential_closing_edges + prefix_sum_valid[idx] + prefix_sum_invalid[idx]); - - auto potential_closing_edges_first_invalid = thrust::make_zip_iterator( - thrust::make_constant_iterator(src), invalid_src_first + idx_lower_invalid); - thrust::copy( - thrust::seq, - potential_closing_edges_first_invalid, - potential_closing_edges_first_invalid + - (prefix_sum_invalid[idx + 1] - prefix_sum_invalid[idx]), - potential_closing_edges + prefix_sum_invalid[idx] + (prefix_sum_valid[idx + 1])); - } - }); +struct extract_edges_to_q_r { - auto edges_exist = graph_view.has_edge( - handle, - raft::device_span(std::get<0>(potential_closing_edges).data(), - std::get<0>(potential_closing_edges).size()), - raft::device_span(std::get<1>(potential_closing_edges).data(), - std::get<1>(potential_closing_edges).size())); - - auto edge_to_existance = thrust::make_zip_iterator( - thrust::make_zip_iterator(get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r)), - edges_exist.begin()); - - auto has_edge_last = thrust::remove_if(handle.get_thrust_policy(), - edge_to_existance, - edge_to_existance + edges_exist.size(), - [] __device__(auto e) { - auto edge_exists = thrust::get<1>(e); - return edge_exists == 0; - }); - - auto num_edge_exists = thrust::distance(edge_to_existance, has_edge_last); - - // After pushing the non-existant edges to the second partition, - // remove them by resizing both vertex pair buffer - resize_dataframe_buffer(potential_closing_edges, num_edge_exists, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edge_exists, handle.get_stream()); - - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - num_edge_exists, - get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, num_invalid_edges)); - - // After pushing the non-existant edges to the second partition, - // remove them by resizing both vertex pair buffer - resize_dataframe_buffer(potential_closing_edges, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edges_not_overcomp, handle.get_stream()); - - // Extra check for 'incoming_edges_to_r' - if constexpr (!is_q_r_edge) { - // Exchange the arguments (incoming_edges_to_r, num_edges_not_overcomp) order - // To also check if the 'incoming_edges_to_r' belong the the invalid_edgelist - num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp, - get_dataframe_buffer_begin(incoming_edges_to_r), - get_dataframe_buffer_begin(potential_closing_edges), - raft::device_span(edgelist_srcs.data() + num_valid_edges, - num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)); - - resize_dataframe_buffer(potential_closing_edges, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(incoming_edges_to_r, num_edges_not_overcomp, handle.get_stream()); + raft::device_span vertex_q_r{}; + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + { + //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); + auto itr = thrust::find( + thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); + return (itr != vertex_q_r.end() && *itr == src) + ? thrust::optional>{thrust::make_tuple(src, dst)} + : thrust::nullopt; } - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_zip_iterator(get_dataframe_buffer_begin(potential_closing_edges), - get_dataframe_buffer_begin(incoming_edges_to_r)), - thrust::make_zip_iterator( - get_dataframe_buffer_begin(potential_closing_edges) + num_edges_not_overcomp, - get_dataframe_buffer_begin(incoming_edges_to_r) + num_edges_not_overcomp), - [num_triangles = num_triangles.begin(), - num_valid_edges, - invalid_first = thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - invalid_last = thrust::make_zip_iterator( - edgelist_dsts.end(), edgelist_srcs.end())] __device__(auto potential_or_incoming_e) { - auto potential_e = thrust::get<0>(potential_or_incoming_e); - auto incoming_e_to_r = thrust::get<1>(potential_or_incoming_e); - // thrust::tuple> transposed_invalid_edge_; - auto transposed_invalid_edge = - thrust::make_tuple(thrust::get<1>(incoming_e_to_r), thrust::get<1>(potential_e)); - - if constexpr (!is_q_r_edge) { - transposed_invalid_edge = - thrust::make_tuple(thrust::get<1>(incoming_e_to_r), thrust::get<0>(potential_e)); - } - auto itr = - thrust::lower_bound(thrust::seq, invalid_first, invalid_last, transposed_invalid_edge); - if (itr != invalid_last) { assert(*itr == transposed_invalid_edge); } - auto dist = thrust::distance(invalid_first, itr) + num_valid_edges; - - cuda::atomic_ref atomic_counter(num_triangles[dist]); - auto r = atomic_counter.fetch_sub(edge_t{1}, cuda::std::memory_order_relaxed); - }); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_edges_not_overcomp), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(potential_closing_edges), - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end())}); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_edges_not_overcomp), - unroll_edge{ - num_valid_edges, - raft::device_span(num_triangles.data(), num_triangles.size()), - get_dataframe_buffer_begin(incoming_edges_to_r), - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()), - thrust::make_zip_iterator(edgelist_dsts.begin() + num_valid_edges, - edgelist_srcs.begin() + num_valid_edges), - thrust::make_zip_iterator(edgelist_dsts.end(), edgelist_srcs.end())}); -} +}; namespace { @@ -456,8 +153,442 @@ struct generate_p_r_or_q_r_from_p_q { } } }; + +template +struct extract_q_idx { + using return_type = thrust::optional>; + + return_type __device__ operator()(thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) const + { + printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); + return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); + } +}; + +template +struct extract_q_idx_closing { + using return_type = thrust::optional>; + raft::device_span weak_edgelist_dsts; + + return_type __device__ operator()(thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) const + { + //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); + edge_t idx = thrust::get<1>(tagged_src); + if (dst == weak_edgelist_dsts[idx]){ + //printf("\nsrc = %d --- dst = %d, tag = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); + } + return dst == weak_edgelist_dsts[idx] + ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) + : thrust::nullopt; + } +}; + +template +struct generate_p_q { + size_t chunk_start{}; + raft::device_span intersection_offsets{}; + raft::device_span intersection_indices{}; + raft::device_span invalid_srcs{}; + raft::device_span invalid_dsts{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + auto itr = thrust::upper_bound( + thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); + auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); + + return thrust::make_tuple(invalid_srcs[chunk_start + idx], invalid_dsts[chunk_start + idx]); + } +}; + +template +struct generate_p_r { + EdgeIterator invalid_edge{}; + raft::device_span invalid_edge_idx{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + return *(invalid_edge + invalid_edge_idx[i]); + } +}; + +template +struct generate_p_q_q_r { + EdgeIterator invalid_edge{}; + raft::device_span q_closing{}; + raft::device_span invalid_edge_idx{}; + + __device__ thrust::tuple operator()(edge_t i) const + { + + if constexpr (generate_p_q) { + return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), q_closing[i]); + } else { + return thrust::make_tuple(q_closing[i], thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); + } + } +}; + +// FIXME: remove 'EdgeIterator' template +template +void unroll_p_q_p_r_edges(raft::handle_t const& handle, + graph_view_t & cur_graph_view, + //thrust::optional(q_r_graph) + edge_property_t, edge_t> & e_property_triangle_count, + raft::device_span vertex_pair_buffer_first, + raft::device_span vertex_pair_buffer_last + //EdgeIterator vertex_pair_buffer, + //vertex_t buffer_size + ) { + + + cugraph::edge_bucket_t edges_to_decrement_count(handle); + edges_to_decrement_count.insert(vertex_pair_buffer_first.begin(), + vertex_pair_buffer_first.end(), + vertex_pair_buffer_last.begin()); + + + printf("\nupdating count\n"); + auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_first.begin(), vertex_pair_buffer_last.begin()); + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_decrement_count, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_begin = vertex_pair_buffer_begin, + vertex_pair_buffer_end = vertex_pair_buffer_begin + vertex_pair_buffer_first.size() + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); + + if ((itr != vertex_pair_buffer_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + +}; + + + + +template +vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, + graph_view_t & cur_graph_view, + //thrust::optional(q_r_graph) + edge_property_t, edge_t> & e_property_triangle_count, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + bool do_expensive_check) { + + size_t prev_chunk_size = 0; + size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); + size_t edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + auto num_chunks = + raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + + auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + + for (size_t i = 0; i < num_chunks; ++i) { + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + do_expensive_check); + + raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); + raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); + + // Generate (p, q) edges + // FIXME: Should this array be reduced? an edge can have an intersection size > 1 + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts + }); + + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + + // unroll (p, q) edges + // FIXME: remove 'EdgeIterator' template + unroll_p_q_p_r_edges( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) + ); + + + /* + // Unroll (p, q) edges + cugraph::edge_bucket_t invalid_edges_bucket(handle); + invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + printf("\nupdating count\n"); + cugraph::transform_e( + handle, + cur_graph_view, + invalid_edges_bucket, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + + + //#if 0 + auto [srcs, dsts, count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + printf("\nafter unrolling (p, q) edges\n"); + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + raft::print_device_vector("count", count.data(), count.size(), std::cout); + //#endif + */ + + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + unroll_p_q_p_r_edges( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) + ); + + /* + //invalid_edges + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + printf("\nafter unrolling (p, q) edge (p, r) edges\n"); + */ + + + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + unroll_p_q_p_r_edges( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); + + /* + // invalid_edges + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + + + + #if 0 + auto [srcs_, dsts_, count_] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_edges{}); + + printf("\nafter unrolling (p, q) edges from (p, r)\n"); + raft::print_device_vector("srcs", srcs_.data(), srcs_.size(), std::cout); + raft::print_device_vector("dsts", dsts_.data(), dsts_.size(), std::cout); + raft::print_device_vector("count", count_.data(), count_.size(), std::cout); + #endif + */ + + if constexpr (is_p_q_edge) { + // FIXME: This might not work when chunking because the invalid (p. q) edges should be + // temporarily masked at the end when completly unrolling (p, q) edges. Failing to do + // this might cause some invalid edges (p, q) to not have their count decremented + //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); + //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); + /* + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + cugraph::edge_bucket_t edges_to_tmp_mask(handle); + edges_to_tmp_mask.clear(); // Continuously mask (p, q) edges as they are processed in chunks + edges_to_tmp_mask.insert(std::get<0>(vertex_pair_buffer_p_q).begin(), + std::get<0>(vertex_pair_buffer_p_q).end(), + std::get<1>(vertex_pair_buffer_p_q).begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_tmp_mask, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { + return false; + }, + tmp_edge_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + */ + } + + + + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } + + return 0; +} } // namespace + + + + + + template std::tuple, rmm::device_uvector, @@ -521,6 +652,7 @@ k_truss(raft::handle_t const& handle, } // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + #if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -579,6 +711,7 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } + #endif // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -661,277 +794,713 @@ k_truss(raft::handle_t const& handle, { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - rmm::device_uvector edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector edgelist_dsts(0, handle.get_stream()); - std::optional> num_triangles{std::nullopt}; - std::optional> edgelist_wgts{std::nullopt}; + /* + Design + 1) create a new graph with with the edge property from which we will iterate + a) Directly update the property of the edges + a) How do you traverse the graph? + */ + + auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + + cugraph::edge_property_t tmp_edge_mask(handle, cur_graph_view); + cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + + // extract the edges that have counts less than k - 2. THose edges will be unrolled + std::cout<< "before calling extract transform_e" << std::endl; + auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_weak_edges{k}); + + auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); + raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); + raft::print_device_vector("n_tr", triangle_count.data(), triangle_count.size(), std::cout); + + // Call nbr_intersection unroll (p, q) edges + size_t edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; + size_t prev_chunk_size = 0; + size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); - auto prop_num_triangles = edge_triangle_count(handle, cur_graph_view); + auto num_chunks = + raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, num_triangles, std::ignore) = - decompress_to_edgelist( + edge_property_t modified_triangle_count(handle, cur_graph_view); + + // find intersection edges + /* + find_unroll_p_q_q_r_edges( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + do_expensive_check + //invalid_edge_first, + //weak_edgelist_srcs.size() + ); + */ + + + + /* + for (size_t i = 0; i < num_chunks; ++i) { + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + do_expensive_check); + + raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); + raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); + + // Generate (p, q) edges + // FIXME: Should this array be reduced? an edge can have an intersection size > 1 + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + + // Unroll (p, q) edges + cugraph::edge_bucket_t invalid_edges_bucket(handle); + invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + printf("\nupdating count\n"); + cugraph::transform_e( handle, cur_graph_view, - edge_weight_view, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional(prop_num_triangles.view()), - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - auto transposed_edge_first = - thrust::make_zip_iterator(edgelist_dsts.begin(), edgelist_srcs.begin()); - - auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); - - auto transposed_edge_triangle_count_pair_first = - thrust::make_zip_iterator(transposed_edge_first, (*num_triangles).begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - transposed_edge_first, - transposed_edge_first + edgelist_srcs.size(), - (*num_triangles).begin()); - - cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - cur_graph_view.attach_edge_mask(edge_mask.view()); - - while (true) { - // 'invalid_transposed_edge_triangle_count_first' marks the beginning of the edges to be - // removed 'invalid_transposed_edge_triangle_count_first' + edgelist_srcs.size() marks the end - // of the edges to be removed 'edge_triangle_count_pair_first' marks the begining of the valid - // edges. - auto invalid_transposed_edge_triangle_count_first = - thrust::stable_partition(handle.get_thrust_policy(), - transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + edgelist_srcs.size(), - [k] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles >= k - 2; - }); - auto num_invalid_edges = static_cast( - thrust::distance(invalid_transposed_edge_triangle_count_first, - transposed_edge_triangle_count_pair_first + edgelist_srcs.size())); - - if (num_invalid_edges == 0) { break; } - - auto num_valid_edges = edgelist_srcs.size() - num_invalid_edges; - - // case 1. For the (p, q), find intersection 'r'. - - // nbr_intersection requires the edges to be sort by 'src' - // sort the invalid edges by src for nbr intersection - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - size_t prev_chunk_size = 0; - size_t chunk_num_invalid_edges = num_invalid_edges; - - auto num_chunks = - raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); - - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - thrust::sort_by_key(handle.get_thrust_policy(), - edge_first + num_valid_edges, - edge_first + edgelist_srcs.size(), - (*num_triangles).begin() + num_valid_edges); - - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - edge_first + num_valid_edges + prev_chunk_size, - edge_first + num_valid_edges + prev_chunk_size + chunk_size, - std::array{true, true}, - do_expensive_check); - - // Update the number of triangles of each (p, q) edges by looking at their intersection - // size. - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(chunk_size), - [chunk_start = prev_chunk_size, - num_triangles = raft::device_span((*num_triangles).data() + num_valid_edges, - num_invalid_edges), - intersection_offsets = raft::device_span( - intersection_offsets.data(), intersection_offsets.size())] __device__(auto i) { - num_triangles[chunk_start + i] -= - (intersection_offsets[i + 1] - intersection_offsets[i]); - }); - - // FIXME: Find a way to not have to maintain a dataframe_buffer - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)}); - - auto vertex_pair_buffer_q_r_edge_p_q = + invalid_edges_bucket, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + + #if 0 + auto [srcs, dsts, count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + printf("\nafter unrolling (p, q) edges\n"); + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + raft::print_device_vector("count", count.data(), count.size(), std::cout); + #endif + + auto vertex_pair_buffer_p_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(edgelist_srcs.data() + num_valid_edges, num_invalid_edges), - raft::device_span(edgelist_dsts.data() + num_valid_edges, - num_invalid_edges)}); - - // Unrolling the edges require the edges to be sorted by destination - // re-sort the invalid edges by 'dst' - thrust::sort_by_key(handle.get_thrust_policy(), - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size(), - (*num_triangles).begin() + num_valid_edges); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(intersection_indices.size()), - unroll_edge{ - num_valid_edges, - raft::device_span((*num_triangles).data(), (*num_triangles).size()), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - transposed_edge_first, - transposed_edge_first + num_valid_edges, - transposed_edge_first + edgelist_srcs.size()}); - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } - // case 2: unroll (q, r) - // For each (q, r) edges to unroll, find the incoming edges to 'r' let's say from 'p' and - // create the pair (p, q) - cugraph::unroll_p_r_or_q_r_edges( + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + + //invalid_edges + cugraph::transform_e( handle, cur_graph_view, - num_invalid_edges, - num_valid_edges, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span((*num_triangles).data(), (*num_triangles).size())); - - // case 3: unroll (p, r) - cugraph::unroll_p_r_or_q_r_edges( + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + + // invalid_edges + cugraph::transform_e( handle, cur_graph_view, - num_invalid_edges, - num_valid_edges, - raft::device_span(edgelist_srcs.data(), edgelist_srcs.size()), - raft::device_span(edgelist_dsts.data(), edgelist_dsts.size()), - raft::device_span((*num_triangles).data(), (*num_triangles).size())); - - // Remove edges that have a triangle count of zero. Those should not be accounted - // for during the unroling phase. - auto edges_with_triangle_last = thrust::stable_partition( - handle.get_thrust_policy(), - transposed_edge_triangle_count_pair_first, - transposed_edge_triangle_count_pair_first + (*num_triangles).size(), - [] __device__(auto e) { - auto num_triangles = thrust::get<1>(e); - return num_triangles > 0; - }); - - auto num_edges_with_triangles = static_cast( - thrust::distance(transposed_edge_triangle_count_pair_first, edges_with_triangle_last)); - - thrust::sort(handle.get_thrust_policy(), - thrust::make_zip_iterator(edgelist_srcs.begin() + num_edges_with_triangles, - edgelist_dsts.begin() + num_edges_with_triangles), - thrust::make_zip_iterator(edgelist_srcs.end(), edgelist_dsts.end())); - - cugraph::edge_bucket_t edges_with_no_triangle(handle); - edges_with_no_triangle.insert(edgelist_srcs.begin() + num_edges_with_triangles, - edgelist_srcs.end(), - edgelist_dsts.begin() + num_edges_with_triangles); + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + - cur_graph_view.clear_edge_mask(); - if (edge_weight_view) { - cugraph::transform_e( - handle, - cur_graph_view, - edges_with_no_triangle, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - *edge_weight_view, - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return false; - }, - edge_mask.mutable_view(), - false); - } else { - cugraph::transform_e( + + + + + auto [srcs_, dsts_, count_] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_edges{}); + + printf("\nafter unrolling (p, q) edges from (p, r)\n"); + raft::print_device_vector("srcs", srcs_.data(), srcs_.size(), std::cout); + raft::print_device_vector("dsts", dsts_.data(), dsts_.size(), std::cout); + raft::print_device_vector("count", count_.data(), count_.size(), std::cout); + + + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } + */ + + + // case 2: unroll (q, r) + // temporarily mask (p, q) edges + /* + cugraph::edge_bucket_t edges_to_tmp_mask(handle); + edges_to_tmp_mask.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + cugraph::transform_e( handle, cur_graph_view, - edges_with_no_triangle, + edges_to_tmp_mask, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__( - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { return false; }, - edge_mask.mutable_view(), + tmp_edge_mask.mutable_view(), false); - } - cur_graph_view.attach_edge_mask(edge_mask.view()); + + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + */ + + + // FIXME: memory footprint overhead + rmm::device_uvector vertex_q_r(weak_edgelist_srcs.size() * 2, handle.get_stream()); + + // Iterate over unique vertices that appear as either q or r + thrust::merge(handle.get_thrust_policy(), + weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + vertex_q_r.begin()); + thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); + auto invalid_unique_v_end = thrust::unique( + handle.get_thrust_policy(), + vertex_q_r.begin(), + vertex_q_r.end()); + + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); + + auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + // raft::device_span(vertex_q_r.data(), vertex_q_r.size()) + auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + printf("\nunrolling q, r edges\n"); + raft::print_device_vector("srcs", srcs_to_q_r.data(), srcs_to_q_r.size(), std::cout); + raft::print_device_vector("dsts", dsts_to_q_r.data(), dsts_to_q_r.size(), std::cout); + + std::optional> graph_q_r{std::nullopt}; + std::optional> renumber_map_q_r{std::nullopt}; + std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, graph_view.is_multigraph()}, + false); + + auto [srcs__, dsts__] = extract_transform_e(handle, + (*graph_q_r).view(), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + printf("\nq, r edge graph\n"); + raft::print_device_vector("srcs__", srcs__.data(), srcs__.size(), std::cout); + raft::print_device_vector("dsts__", dsts__.data(), dsts__.size(), std::cout); + + + // ********************************************************************************************************* + printf("\nbefore crash\n"); + raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); + raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_srcs.size(), std::cout); + + prev_chunk_size = 0; + chunk_num_invalid_edges = weak_edgelist_srcs.size(); + + num_chunks = + raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + + for (size_t i = 0; i < num_chunks; ++i) { + + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + (*graph_q_r).view(), + //cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + //thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + //thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), + std::array{true, true}, + true); + + // clear mask. + cur_graph_view.clear_edge_mask(); + + printf("\n**********intersection when unrolling q, r edges\n"); + raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); + raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); + + // Generate (p, q) edges + // FIXME: Should this array be reduced? an edge can have an intersection size > 1 + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + + // Unroll (p, q) edges + cugraph::edge_bucket_t invalid_edges_bucket(handle); + invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin()); + + printf("\nupdating count\n"); + cugraph::transform_e( + handle, + cur_graph_view, + invalid_edges_bucket, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + + //#if 0 + auto [srcs, dsts, count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + //printf("\nafter unrolling (p, q) edges\n"); + //raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + //raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + //raft::print_device_vector("count", count.data(), count.size(), std::cout); + //#endif + + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + + //invalid_edges + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + - edgelist_srcs.resize(num_edges_with_triangles, handle.get_stream()); - edgelist_dsts.resize(num_edges_with_triangles, handle.get_stream()); - (*num_triangles).resize(num_edges_with_triangles, handle.get_stream()); + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + + // invalid_edges + cugraph::transform_e( + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [ + vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); + auto itr = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); + + if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + return count - 1; + } + + return count; + + }, + e_property_triangle_count.mutable_view(), + false); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + + + + printf("\nafter unrolling (p, q) edges\n"); + raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); + raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); + raft::print_device_vector("count", count.data(), count.size(), std::cout); } + // **************************************************************************** + + + // Unrolling p, r edges + // create pair invalid_src, invalid_edge_idx + // create a dataframe buffer of size invalid_edge_size + // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs + // with a vector counting from 0 .. + auto vertex_pair_buffer_p_tag = + allocate_dataframe_buffer>(weak_edgelist_srcs.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [ + p = weak_edgelist_srcs.begin() + ] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + + raft::print_device_vector("vertex_pair_buffer_p_tag", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); + + + vertex_frontier_t vertex_frontier(handle, 1); + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); + + printf("\nsize after inserting - part 1 = %d\n", vertex_frontier.bucket(0).size()); + + /* + std::tie(edge_majors, edge_minors, *edge_weights, subgraph_edge_graph_ids) = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__( + thrust::tuple tagged_src, + vertex_t dst, + thrust::nullopt_t, + thrust::nullopt_t, + thrust::nullopt_t) { + printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = - decompress_to_edgelist( + }, + do_expensive_check); + */ + + auto [q, idx] = + cugraph::extract_transform_v_frontier_outgoing_e( handle, cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - - std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = - symmetrize_edgelist(handle, - std::move(edgelist_srcs), - std::move(edgelist_dsts), - std::move(edgelist_wgts), - false); + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx{}, + do_expensive_check); + + raft::print_device_vector("q", q.data(), q.size(), std::cout); + raft::print_device_vector("i", idx.data(), idx.size(), std::cout); + vertex_frontier.bucket(0).clear(); + printf("\nsize after clearning = %d\n", vertex_frontier.bucket(0).size()); + + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(q.begin(), idx.begin()), + thrust::make_zip_iterator(q.end(), idx.end())); + + printf("\nsize after inserting - part 2 = %d\n", vertex_frontier.bucket(0).size()); + // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating + auto [q_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + do_expensive_check); + + raft::print_device_vector("q_closing", q_closing.data(), q_closing.size(), std::cout); + raft::print_device_vector("i_closing", idx_closing.data(), idx_closing.size(), std::cout); + + + // extract pair (p, r) + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + // construct pair (p, q) + // construct pair (q, r) + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + invalid_edgelist, + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + + // construct pair (p, q) + auto vertex_pair_buffer_p_q_for_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + // construct pair (q, r) + auto vertex_pair_buffer_q_r_for_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + + raft::print_device_vector("vertex_pair_buffer_p_r", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_r", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); + + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); + + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); + + //rmm::device_uvector weak_edgelist_srcs(0, handle.get_stream()); + //rmm::device_uvector weak_edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_wgts{std::nullopt}; return std::make_tuple( - std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); + std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::move(edgelist_wgts)); + + + + } + } } // namespace cugraph diff --git a/cpp/src/community/k_truss_mg.cu b/cpp/src/community/k_truss_mg.cu new file mode 100644 index 00000000000..04845d5b73d --- /dev/null +++ b/cpp/src/community/k_truss_mg.cu @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// SG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t k, + bool do_expensive_check); +/* +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int32_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); +*/ + +} // namespace cugraph diff --git a/cpp/src/community/k_truss_sg.cu b/cpp/src/community/k_truss_sg.cu index dfea62182f5..2899f9fd722 100644 --- a/cpp/src/community/k_truss_sg.cu +++ b/cpp/src/community/k_truss_sg.cu @@ -29,6 +29,7 @@ k_truss(raft::handle_t const& handle, int32_t k, bool do_expensive_check); +/* template std::tuple, rmm::device_uvector, std::optional>> @@ -73,5 +74,5 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int64_t k, bool do_expensive_check); - +*/ } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b9985a48fab..a64bda8a135 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -589,7 +589,11 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------------------- - ConfigureTest(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) + #ConfigureTest(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) + + ############################################################################################### + # - MG K-TRUSS tests -------------------------------------------------------------------------- + #ConfigureTest(MG_K_TRUSS_TEST community/mg_k_truss_test.cpp) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ @@ -614,7 +618,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG TRIANGLE COUNT tests ------------------------------------------------------------------- - ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) + #ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) ############################################################################################### # - MG coarsening tests ----------------------------------------------------------------------- @@ -788,7 +792,7 @@ ConfigureCTest(CAPI_INDUCED_SUBGRAPH_TEST c_api/induced_subgraph_test.c) ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) -ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) +#ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) if (BUILD_CUGRAPH_MTMG_TESTS) ################################################################################################### diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index c8010422e42..ac8ab6de5d3 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -261,14 +261,22 @@ class Tests_KTruss : public ::testing::TestWithParam; -using Tests_KTruss_Rmat = Tests_KTruss; +//using Tests_KTruss_Rmat = Tests_KTruss; TEST_P(Tests_KTruss_File, CheckInt32Int32Float) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam())); } +/* +TEST_P(Tests_KTruss_Rmat, CheckInt32Int32Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} +*/ +/* TEST_P(Tests_KTruss_File, CheckInt64Int64Float) { run_current_test( @@ -286,29 +294,19 @@ TEST_P(Tests_KTruss_Rmat, CheckInt64Int64Float) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } +*/ INSTANTIATE_TEST_SUITE_P( simple_test, Tests_KTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{5, true, false}, - KTruss_Usecase{4, true, false}, - KTruss_Usecase{9, true, true}, - KTruss_Usecase{7, true, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/netscience.mtx"), - cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); - -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_KTruss_Rmat, - // enable correctness checks - ::testing::Combine(::testing::Values(KTruss_Usecase{5, false, true}, - KTruss_Usecase{4, false, true}, - KTruss_Usecase{9, true, true}, - KTruss_Usecase{7, true, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{4, false, true}), + ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx")))); + + +#if 0 INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with --gtest_filter to select only the rmat_benchmark_test with a specific @@ -319,7 +317,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(KTruss_Usecase{12, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(14, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{4, false, true}, KTruss_Usecase{5, false, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(12, 6, 0.57, 0.19, 0.19, 0, true, false)))); +#endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp new file mode 100644 index 00000000000..8cf12e0fc3e --- /dev/null +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -0,0 +1,249 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "utilities/base_fixture.hpp" +#include "utilities/conversion_utilities.hpp" +#include "utilities/device_comm_wrapper.hpp" +#include "utilities/mg_utilities.hpp" +#include "utilities/property_generator_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include + +#include + +struct EdgeTriangleCount_Usecase { + bool edge_masking_{false}; + bool check_correctness_{true}; +}; + +template +class Tests_MGEdgeTriangleCount + : public ::testing::TestWithParam> { + public: + Tests_MGEdgeTriangleCount() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running EdgeTriangleCount on multiple GPUs to that of a single-GPU run + template + void run_current_test(EdgeTriangleCount_Usecase const& edge_triangle_count_usecase, + input_usecase_t const& input_usecase) + { + using weight_t = float; + + HighResTimer hr_timer{}; + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + cugraph::graph_t mg_graph(*handle_); + std::optional> mg_renumber_map{std::nullopt}; + std::tie(mg_graph, std::ignore, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, false, true, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + + std::optional> edge_mask{std::nullopt}; + if (edge_triangle_count_usecase.edge_masking_) { + edge_mask = cugraph::test::generate::edge_property( + *handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + // 2. run MG EdgeTriangleCount + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG EdgeTriangleCount"); + } + + auto d_mg_cugraph_results = + cugraph::edge_triangle_count(*handle_, mg_graph_view); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. Compare SG & MG results + + if (edge_triangle_count_usecase.check_correctness_) { + // 3-1. Convert to SG graph + + cugraph::graph_t sg_graph(*handle_); + std::optional< + cugraph::edge_property_t, edge_t>> + d_sg_cugraph_results{std::nullopt}; + std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = + cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values + std::make_optional(d_mg_cugraph_results.view()), + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == int{0}) { + // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector + + auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts, d_edgelist_types] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values + std::make_optional((*d_sg_cugraph_results).view()), + std::optional>{std::nullopt}, + std::optional>{ + std::nullopt}); // FIXME: No longer needed + + // 3-3. Run SG EdgeTriangleCount + + auto ref_d_sg_cugraph_results = + cugraph::edge_triangle_count(*handle_, sg_graph.view()); + auto [ref_edgelist_srcs, + ref_edgelist_dsts, + ref_d_edgelist_weights, + ref_d_edge_triangle_counts, + ref_d_edgelist_types] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + std::make_optional(ref_d_sg_cugraph_results.view()), + std::optional>{std::nullopt}, + std::optional>{ + std::nullopt}); // FIXME: No longer needed + + // 3-4. Compare + + auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); + auto h_sg_edge_triangle_counts = + cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); + + ASSERT_TRUE(std::equal(h_mg_edge_triangle_counts.begin(), + h_mg_edge_triangle_counts.end(), + h_sg_edge_triangle_counts.begin())); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; + +using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; +using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; + +TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGEdgeTriangleCount_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, false}, + EdgeTriangleCount_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_triangle_count_test.cpp b/cpp/tests/community/mg_triangle_count_test.cpp index 932ff5050f1..2b09b17ac10 100644 --- a/cpp/tests/community/mg_triangle_count_test.cpp +++ b/cpp/tests/community/mg_triangle_count_test.cpp @@ -39,18 +39,16 @@ #include -struct TriangleCount_Usecase { - double vertex_subset_ratio{0.0}; - - bool edge_masking{false}; - bool check_correctness{true}; +struct EdgeTriangleCount_Usecase { + bool edge_masking_{false}; + bool check_correctness_{true}; }; template -class Tests_MGTriangleCount - : public ::testing::TestWithParam> { +class Tests_MGEdgeTriangleCount + : public ::testing::TestWithParam> { public: - Tests_MGTriangleCount() {} + Tests_MGEdgeTriangleCount() {} static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } @@ -59,9 +57,9 @@ class Tests_MGTriangleCount virtual void SetUp() {} virtual void TearDown() {} - // Compare the results of running TriangleCount on multiple GPUs to that of a single-GPU run + // Compare the results of running EdgeTriangleCount on multiple GPUs to that of a single-GPU run template - void run_current_test(TriangleCount_Usecase const& triangle_count_usecase, + void run_current_test(EdgeTriangleCount_Usecase const& edge_triangle_count_usecase, input_usecase_t const& input_usecase) { using weight_t = float; @@ -92,63 +90,34 @@ class Tests_MGTriangleCount auto mg_graph_view = mg_graph.view(); std::optional> edge_mask{std::nullopt}; - if (triangle_count_usecase.edge_masking) { + if (edge_triangle_count_usecase.edge_masking_) { edge_mask = cugraph::test::generate::edge_property( *handle_, mg_graph_view, 2); mg_graph_view.attach_edge_mask((*edge_mask).view()); } - // 2. generate a vertex subset to compute triangle counts - - std::optional> h_mg_vertices{std::nullopt}; - if (triangle_count_usecase.vertex_subset_ratio < 1.0) { - std::default_random_engine generator{ - static_cast(handle_->get_comms().get_rank()) /* seed */}; - std::uniform_real_distribution distribution{0.0, 1.0}; - h_mg_vertices = std::vector(mg_graph_view.local_vertex_partition_range_size()); - std::iota((*h_mg_vertices).begin(), - (*h_mg_vertices).end(), - mg_graph_view.local_vertex_partition_range_first()); - (*h_mg_vertices) - .erase(std::remove_if((*h_mg_vertices).begin(), - (*h_mg_vertices).end(), - [&generator, &distribution, triangle_count_usecase](auto v) { - return distribution(generator) >= - triangle_count_usecase.vertex_subset_ratio; - }), - (*h_mg_vertices).end()); - } - - auto d_mg_vertices = h_mg_vertices ? std::make_optional>( - (*h_mg_vertices).size(), handle_->get_stream()) - : std::nullopt; - if (d_mg_vertices) { - raft::update_device((*d_mg_vertices).data(), - (*h_mg_vertices).data(), - (*h_mg_vertices).size(), - handle_->get_stream()); - } - - // 3. run MG TriangleCount - - rmm::device_uvector d_mg_triangle_counts( - d_mg_vertices ? (*d_mg_vertices).size() : mg_graph_view.local_vertex_partition_range_size(), - handle_->get_stream()); + // 2. run MG EdgeTriangleCount if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); - hr_timer.start("MG TriangleCount"); + hr_timer.start("MG EdgeTriangleCount"); } - cugraph::triangle_count( - *handle_, - mg_graph_view, - d_mg_vertices ? std::make_optional>( - (*d_mg_vertices).begin(), (*d_mg_vertices).end()) - : std::nullopt, - raft::device_span(d_mg_triangle_counts.begin(), d_mg_triangle_counts.end()), - false); + /* + auto d_mg_cugraph_results = + cugraph::edge_triangle_count(*handle_, mg_graph_view); + */ + + auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = + cugraph::k_truss( + *handle_, + mg_graph_view, + //edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::nullopt, // FIXME: test weights + //k_truss_usecase.k_, + 4, + false); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -157,70 +126,73 @@ class Tests_MGTriangleCount hr_timer.display_and_clear(std::cout); } - // 4. copmare SG & MG results + // 3. Compare SG & MG results - if (triangle_count_usecase.check_correctness) { - // 4-1. aggregate MG results - - std::optional> d_mg_aggregate_vertices{std::nullopt}; - rmm::device_uvector d_mg_aggregate_triangle_counts(0, handle_->get_stream()); - std::tie(d_mg_aggregate_vertices, d_mg_aggregate_triangle_counts) = - cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( - *handle_, - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), - mg_graph_view.local_vertex_partition_range(), - std::optional>{std::nullopt}, - d_mg_vertices ? std::make_optional>( - (*d_mg_vertices).data(), (*d_mg_vertices).size()) - : std::nullopt, - raft::device_span(d_mg_triangle_counts.data(), - d_mg_triangle_counts.size())); + #if 0 + if (edge_triangle_count_usecase.check_correctness_) { + // 3-1. Convert to SG graph cugraph::graph_t sg_graph(*handle_); - std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = + std::optional< + cugraph::edge_property_t, edge_t>> + d_sg_cugraph_results{std::nullopt}; + std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, std::optional>{std::nullopt}, - std::optional>{std::nullopt}, + // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values + std::make_optional(d_mg_cugraph_results.view()), std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); if (handle_->get_comms().get_rank() == int{0}) { - // 4-2. run SG TriangleCount - - auto sg_graph_view = sg_graph.view(); - - ASSERT_EQ(mg_graph_view.number_of_vertices(), sg_graph_view.number_of_vertices()); - - rmm::device_uvector d_sg_triangle_counts(d_mg_aggregate_vertices - ? (*d_mg_aggregate_vertices).size() - : sg_graph_view.number_of_vertices(), - handle_->get_stream()); - - cugraph::triangle_count( - *handle_, - sg_graph_view, - d_mg_aggregate_vertices - ? std::make_optional>( - (*d_mg_aggregate_vertices).begin(), (*d_mg_aggregate_vertices).end()) - : std::nullopt, - raft::device_span(d_sg_triangle_counts.begin(), d_sg_triangle_counts.end()), - false); - - // 4-3. compare - - auto h_mg_aggregate_triangle_counts = - cugraph::test::to_host(*handle_, d_mg_aggregate_triangle_counts); - auto h_sg_triangle_counts = cugraph::test::to_host(*handle_, d_sg_triangle_counts); - - ASSERT_TRUE(std::equal(h_mg_aggregate_triangle_counts.begin(), - h_mg_aggregate_triangle_counts.end(), - h_sg_triangle_counts.begin())); + // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector + + auto [edgelist_srcs, + edgelist_dsts, + d_edgelist_weights, + d_edge_triangle_counts, + d_edgelist_type] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values + std::make_optional((*d_sg_cugraph_results).view()), + std::optional>{std::nullopt}, + std::optional>{ + std::nullopt}); // FIXME: No longer needed + + // 3-3. Run SG EdgeTriangleCount + + auto ref_d_sg_cugraph_results = + cugraph::edge_triangle_count(*handle_, sg_graph.view()); + auto [ref_edgelist_srcs, + ref_edgelist_dsts, + ref_d_edgelist_weights, + ref_d_edge_triangle_counts] = + cugraph::decompress_to_edgelist( + *handle_, + sg_graph.view(), + std::optional>{std::nullopt}, + std::make_optional(ref_d_sg_cugraph_results.view()), + std::optional>{ + std::nullopt}); // FIXME: No longer needed + + // 3-4. Compare + + auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); + auto h_sg_edge_triangle_counts = + cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); + + ASSERT_TRUE(std::equal(h_mg_edge_triangle_counts.begin(), + h_mg_edge_triangle_counts.end(), + h_sg_edge_triangle_counts.begin())); } } + #endif } private: @@ -228,58 +200,59 @@ class Tests_MGTriangleCount }; template -std::unique_ptr Tests_MGTriangleCount::handle_ = nullptr; +std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; -using Tests_MGTriangleCount_File = Tests_MGTriangleCount; -using Tests_MGTriangleCount_Rmat = Tests_MGTriangleCount; +using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; +//using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; -TEST_P(Tests_MGTriangleCount_File, CheckInt32Int32) +TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } - -TEST_P(Tests_MGTriangleCount_Rmat, CheckInt32Int32) +#if 0 +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTriangleCount_Rmat, CheckInt32Int64) +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGTriangleCount_Rmat, CheckInt64Int64) +TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +#endif INSTANTIATE_TEST_SUITE_P( file_tests, - Tests_MGTriangleCount_File, + Tests_MGEdgeTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(TriangleCount_Usecase{0.1, false}, - TriangleCount_Usecase{0.1, true}, - TriangleCount_Usecase{1.0, false}, - TriangleCount_Usecase{1.0, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); - -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGTriangleCount_Rmat, - ::testing::Combine(::testing::Values(TriangleCount_Usecase{0.1, false}, - TriangleCount_Usecase{0.1, true}, - TriangleCount_Usecase{1.0, false}, - TriangleCount_Usecase{1.0, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(EdgeTriangleCount_Usecase{false, false} + //EdgeTriangleCount_Usecase{true, true} + ), + ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") + //cugraph::test::File_Usecase("test/datasets/dolphins.mtx") + ))); + +#if 0 +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGEdgeTriangleCount_Rmat, + ::testing::Combine( + ::testing::Values(EdgeTriangleCount_Usecase{false, true}, + EdgeTriangleCount_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -287,12 +260,11 @@ INSTANTIATE_TEST_SUITE_P( vertex & edge type combination) by command line arguments and do not include more than one Rmat_Usecase that differ only in scale or edge factor (to avoid running same benchmarks more than once) */ - Tests_MGTriangleCount_Rmat, + Tests_MGEdgeTriangleCount_Rmat, ::testing::Combine( - ::testing::Values(TriangleCount_Usecase{0.1, false, false}, - TriangleCount_Usecase{0.1, true, false}, - TriangleCount_Usecase{1.0, false, false}, - TriangleCount_Usecase{1.0, true, false}), + ::testing::Values(EdgeTriangleCount_Usecase{false, false}, + EdgeTriangleCount_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); +#endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From 1f3f0dcd20b340fb7fb3f48c88dd651b80ded86a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 07:38:31 -0700 Subject: [PATCH 40/93] add mg ktruss --- cpp/CMakeLists.txt | 2 +- cpp/src/community/k_truss_impl.cuh | 1664 +++++++++++------------ cpp/tests/CMakeLists.txt | 4 +- cpp/tests/community/mg_k_truss_test.cpp | 43 +- 4 files changed, 806 insertions(+), 907 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 0189ae007bf..6cdb893bfeb 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -210,7 +210,7 @@ set(CUGRAPH_SOURCES src/community/egonet_sg.cu src/community/egonet_mg.cu src/community/k_truss_sg.cu - #src/community/k_truss_mg.cu + src/community/k_truss_mg.cu src/sampling/random_walks.cu src/sampling/random_walks_sg.cu src/sampling/detail/prepare_next_frontier_sg.cu diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index b9aa857bb85..838a3078836 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -27,6 +27,7 @@ #include #include #include +#include #include @@ -42,13 +43,51 @@ namespace cugraph { +template +edge_t remove_overcompensating_edges(raft::handle_t const& handle, + size_t buffer_size, + EdgeIterator potential_closing_or_incoming_edges, + EdgeIterator incoming_or_potential_closing_edges, + raft::device_span invalid_edgelist_srcs, + raft::device_span invalid_edgelist_dsts) +{ + // To avoid over-compensating, check whether the 'potential_closing_edges' + // are within the invalid edges. If yes, the was already unrolled + auto edges_not_overcomp = thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(potential_closing_or_incoming_edges, + incoming_or_potential_closing_edges), + thrust::make_zip_iterator(potential_closing_or_incoming_edges + buffer_size, + incoming_or_potential_closing_edges + buffer_size), + [num_invalid_edges = invalid_edgelist_dsts.size(), + invalid_first = + thrust::make_zip_iterator(invalid_edgelist_srcs.begin(), invalid_edgelist_dsts.begin()), + invalid_last = thrust::make_zip_iterator(invalid_edgelist_srcs.end(), + invalid_edgelist_dsts.end())] __device__(auto e) { + auto potential_edge = thrust::get<0>(e); + auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); + if constexpr (is_p_q_edge) { + potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + }; + + auto itr = thrust::lower_bound( + thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); + return (itr != invalid_last && *itr == potential_or_incoming_edge); + }); + + auto dist = thrust::distance(thrust::make_zip_iterator(potential_closing_or_incoming_edges, + incoming_or_potential_closing_edges), + edges_not_overcomp); + + return dist; +} + template struct extract_weak_edges { edge_t k{}; __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); return count < k - 2 ? thrust::optional>{thrust::make_tuple(src, dst, count)} : thrust::nullopt; @@ -61,11 +100,11 @@ struct extract_edges { auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { - //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); return thrust::make_tuple(src, dst, count); } }; + template struct extract_edges_to_q_r { @@ -74,12 +113,22 @@ struct extract_edges_to_q_r { auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); - auto itr = thrust::find( + // FIXME: Replace by lowerbound after validation + auto itr_src = thrust::find( thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); - return (itr != vertex_q_r.end() && *itr == src) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; + + // FIXME: Replace by lowerbound after validation + auto itr_dst = thrust::find( + thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); + + + if (itr_src != vertex_q_r.end() && *itr_src == src) { + return thrust::optional>{thrust::make_tuple(src, dst)}; + } else if (itr_dst != vertex_q_r.end() && *itr_dst == dst) { + return thrust::optional>{thrust::make_tuple(src, dst)}; + } else { + return thrust::nullopt; + } } }; @@ -164,7 +213,6 @@ struct extract_q_idx { thrust::nullopt_t, thrust::nullopt_t) const { - printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); } }; @@ -180,10 +228,8 @@ struct extract_q_idx_closing { thrust::nullopt_t, thrust::nullopt_t) const { - //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); edge_t idx = thrust::get<1>(tagged_src); if (dst == weak_edgelist_dsts[idx]){ - //printf("\nsrc = %d --- dst = %d, tag = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); } return dst == weak_edgelist_dsts[idx] ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) @@ -237,27 +283,56 @@ struct generate_p_q_q_r { } }; -// FIXME: remove 'EdgeIterator' template template -void unroll_p_q_p_r_edges(raft::handle_t const& handle, +void update_count(raft::handle_t const& handle, graph_view_t & cur_graph_view, - //thrust::optional(q_r_graph) edge_property_t, edge_t> & e_property_triangle_count, - raft::device_span vertex_pair_buffer_first, - raft::device_span vertex_pair_buffer_last - //EdgeIterator vertex_pair_buffer, - //vertex_t buffer_size + edge_property_t, bool> const & tmp_edge_mask, + raft::device_span vertex_pair_buffer_src, + raft::device_span vertex_pair_buffer_dst ) { + // FIXME: Only for debugging so remove after + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); - cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(vertex_pair_buffer_first.begin(), - vertex_pair_buffer_first.end(), - vertex_pair_buffer_last.begin()); + auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); + + thrust::sort(handle.get_thrust_policy(), + vertex_pair_buffer_begin, + vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + + auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + vertex_pair_buffer_begin, + vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); + + rmm::device_uvector decrease_count_tmp(vertex_pair_buffer_src.size(), + handle.get_stream()); + + thrust::fill(handle.get_thrust_policy(), + decrease_count_tmp.begin(), + decrease_count_tmp.end(), + size_t{1}); + + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( + unique_pair_count, handle.get_stream()); + + thrust::reduce_by_key(handle.get_thrust_policy(), + vertex_pair_buffer_begin, + vertex_pair_buffer_begin + vertex_pair_buffer_src.size(), + decrease_count_tmp.begin(), + get_dataframe_buffer_begin(vertex_pair_buffer_unique), + decrease_count.begin(), + thrust::equal_to>{}); + + cugraph::edge_bucket_t edges_to_decrement_count(handle); + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + std::get<0>(vertex_pair_buffer_unique).end(), + std::get<1>(vertex_pair_buffer_unique).begin()); - printf("\nupdating count\n"); - auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_first.begin(), vertex_pair_buffer_last.begin()); cugraph::transform_e( handle, cur_graph_view, @@ -266,64 +341,76 @@ void unroll_p_q_p_r_edges(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), [ - vertex_pair_buffer_begin = vertex_pair_buffer_begin, - vertex_pair_buffer_end = vertex_pair_buffer_begin + vertex_pair_buffer_first.size() + vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count = decrease_count.data() ] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( + auto itr_pair = thrust::lower_bound( thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); - if ((itr != vertex_pair_buffer_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; + if ((itr_pair != vertex_pair_buffer_end) && (*itr_pair == e)) { + auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); + return count - decrease_count[idx_pair]; } return count; }, e_property_triangle_count.mutable_view(), - false); - - + true); }; - - - -template -vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, +template +void find_unroll_p_q_q_r_edges(raft::handle_t const& handle, graph_view_t & cur_graph_view, - //thrust::optional(q_r_graph) + optional_graph_view_t const & graph_q_r, edge_property_t, edge_t> & e_property_triangle_count, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - bool do_expensive_check) { - + edge_property_t, bool> & tmp_edge_mask, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + std::optional> renumber_map, + bool do_expensive_check + ) { + size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - do_expensive_check); - - raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); - raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); + rmm::device_uvector intersection_offsets(0, handle.get_stream()); + rmm::device_uvector intersection_indices(0, handle.get_stream()); + + if constexpr (is_p_q_edge) { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + //do_expensive_check : FIXME + true); + } else { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + (*graph_q_r).view(), + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + //do_expensive_check : FIXME + true); + } // Generate (p, q) edges // FIXME: Should this array be reduced? an edge can have an intersection size > 1 @@ -345,74 +432,6 @@ vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, weak_edgelist_dsts }); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - - // unroll (p, q) edges - // FIXME: remove 'EdgeIterator' template - unroll_p_q_p_r_edges( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) - ); - - - /* - // Unroll (p, q) edges - cugraph::edge_bucket_t invalid_edges_bucket(handle); - invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); - - printf("\nupdating count\n"); - cugraph::transform_e( - handle, - cur_graph_view, - invalid_edges_bucket, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - - - - //#if 0 - auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - - printf("\nafter unrolling (p, q) edges\n"); - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - raft::print_device_vector("count", count.data(), count.size(), std::cout); - //#endif - */ - auto vertex_pair_buffer_p_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), handle.get_stream()); @@ -428,47 +447,6 @@ vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, intersection_indices.size()), weak_edgelist_srcs, weak_edgelist_dsts}); - - unroll_p_q_p_r_edges( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) - ); - - /* - //invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - printf("\nafter unrolling (p, q) edge (p, r) edges\n"); - */ - - auto vertex_pair_buffer_q_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), @@ -485,110 +463,279 @@ vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, intersection_indices.size()), weak_edgelist_srcs, weak_edgelist_dsts}); - - unroll_p_q_p_r_edges( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); - - /* - // invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); + + if constexpr (!is_p_q_edge) { + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); // FIXME: for debugging + // Get global weak_edgelist + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_srcs)); - if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_dsts)); - return count; - - }, - e_property_triangle_count.mutable_view(), - false); + weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()); + weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()); + + // Sort the weak edges if they are not already + auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); + + } + } + if constexpr (is_p_q_edge) { + auto num_edges_not_overcomp = + remove_overcompensating_edges( + handle, + intersection_indices.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + ); + + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + + // resize initial (q, r) edges + resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); + // Reconstruct (q, r) edges that didn't already have their count updated + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + [ + vertex_pair_buffer_p_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); + }); + } + // Shuffle edges + if constexpr (multi_gpu) { + if constexpr (is_q_r_edge) { - #if 0 - auto [srcs_, dsts_, count_] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_edges{}); + auto vertex_partition_range_lasts = std::make_optional>((*graph_q_r).view().vertex_partition_range_lasts()); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_p_q).data(), + std::get<0>(vertex_pair_buffer_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_p_q).data(), + std::get<1>(vertex_pair_buffer_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + } + + rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); + rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); + rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); + + std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); - printf("\nafter unrolling (p, q) edges from (p, r)\n"); - raft::print_device_vector("srcs", srcs_.data(), srcs_.size(), std::cout); - raft::print_device_vector("dsts", dsts_.data(), dsts_.size(), std::cout); - raft::print_device_vector("count", count_.data(), count_.size(), std::cout); - #endif - */ - if constexpr (is_p_q_edge) { - // FIXME: This might not work when chunking because the invalid (p. q) edges should be - // temporarily masked at the end when completly unrolling (p, q) edges. Failing to do - // this might cause some invalid edges (p, q) to not have their count decremented - //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); - //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); - /* - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - cugraph::edge_bucket_t edges_to_tmp_mask(handle); - edges_to_tmp_mask.clear(); // Continuously mask (p, q) edges as they are processed in chunks - edges_to_tmp_mask.insert(std::get<0>(vertex_pair_buffer_p_q).begin(), - std::get<0>(vertex_pair_buffer_p_q).end(), - std::get<1>(vertex_pair_buffer_p_q).begin()); - cugraph::transform_e( + std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } else { + + std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - cur_graph_view, - edges_to_tmp_mask, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return false; - }, - tmp_edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - */ + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } - + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_p_q_srcs.data(), pair_p_q_srcs.size()), + raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size())); - + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_p_r_srcs.data(), pair_p_r_srcs.size()), + raft::device_span(pair_p_r_dsts.data(), pair_p_r_dsts.size()) + ); + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), + raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size()) + ); + + } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) + ); + + if constexpr (is_p_q_edge) { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) + ); + } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) + ); + } + + if constexpr (is_p_q_edge) { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); + } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); + } + + } + prev_chunk_size += chunk_size; chunk_num_invalid_edges -= chunk_size; } - return 0; } } // namespace - - - - - - template std::tuple, rmm::device_uvector, @@ -652,7 +799,6 @@ k_truss(raft::handle_t const& handle, } // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - #if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -698,7 +844,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); @@ -711,7 +857,6 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } - #endif // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -776,7 +921,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); if (renumber_map) { // collapse renumber_map @@ -794,712 +939,443 @@ k_truss(raft::handle_t const& handle, { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - /* - Design - 1) create a new graph with with the edge property from which we will iterate - a) Directly update the property of the edges - a) How do you traverse the graph? - */ - auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + + auto [or_srcs, or_dsts, or_count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_edges{}); - cugraph::edge_property_t tmp_edge_mask(handle, cur_graph_view); + cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - - // extract the edges that have counts less than k - 2. THose edges will be unrolled - std::cout<< "before calling extract transform_e" << std::endl; - auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_weak_edges{k}); - - auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - - raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); - raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); - raft::print_device_vector("n_tr", triangle_count.data(), triangle_count.size(), std::cout); - // Call nbr_intersection unroll (p, q) edges - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + cugraph::edge_property_t edge_mask(handle, cur_graph_view); + cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - size_t prev_chunk_size = 0; - size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); + auto iteration = -1; + while (true) { + // FIXME: Keep it at 1 iteration for debugging + iteration += 1; + if (iteration == 1) { + break; + } + // extract the edges that have counts less than k - 2. Those edges will be unrolled + // FIXME: extracting 'triangle_count' is not required here. + auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_weak_edges{k}); + - auto num_chunks = - raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + // FIXME: Add a flag checking wether the other ranks have completed their task or + // not before exiting. + if (weak_edgelist_srcs.size() == 0) { break; } + auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - edge_property_t modified_triangle_count(handle, cur_graph_view); - - // find intersection edges - /* - find_unroll_p_q_q_r_edges( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - do_expensive_check - //invalid_edge_first, - //weak_edgelist_srcs.size() - ); - */ + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); + - - - /* - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - do_expensive_check); - - raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); - raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); - - // Generate (p, q) edges - // FIXME: Should this array be reduced? an edge can have an intersection size > 1 - auto vertex_pair_buffer_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + edge_property_t modified_triangle_count(handle, cur_graph_view); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); - - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - - // Unroll (p, q) edges - cugraph::edge_bucket_t invalid_edges_bucket(handle); - invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); - - printf("\nupdating count\n"); - cugraph::transform_e( + std::optional> dummy_graph{std::nullopt}; + find_unroll_p_q_q_r_edges( handle, cur_graph_view, - invalid_edges_bucket, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - + dummy_graph, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + std::nullopt, + do_expensive_check + ); - #if 0 + auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - - printf("\nafter unrolling (p, q) edges\n"); - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - raft::print_device_vector("count", count.data(), count.size(), std::cout); - #endif + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_edges{}); - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + // FIXME: memory footprint overhead + rmm::device_uvector vertex_q_r(weak_edgelist_srcs.size() * 2, handle.get_stream()); + + // Iterate over unique vertices that appear as either q or r + // FIXME: Reduce 'weak_edgelist_srcs' and 'weak_edgelist_srcs' before calling 'set_union' + thrust::set_union(handle.get_thrust_policy(), + weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + vertex_q_r.begin()); + + thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); + + auto invalid_unique_v_end = thrust::unique( + handle.get_thrust_policy(), + vertex_q_r.begin(), + vertex_q_r.end()); - //invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + if constexpr (multi_gpu) { + std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = + detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, std::move(dsts_to_q_r), std::move(srcs_to_q_r), std::nullopt, std::nullopt, std::nullopt); - // invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); - - if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - + } + std::optional> graph_q_r{std::nullopt}; + std::optional> renumber_map_q_r{std::nullopt}; + std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, graph_view.is_multigraph()}, + true); + + if constexpr (multi_gpu) { + rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; + rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; + + std::tie(weak_edgelist_srcs, weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::nullopt, std::nullopt, std::nullopt); + + renumber_ext_vertices(handle, + weak_edgelist_srcs.data(), + weak_edgelist_srcs.size(), + (*renumber_map_q_r).data(), + (*graph_q_r).view().local_vertex_partition_range_first(), + (*graph_q_r).view().local_vertex_partition_range_last(), + true); + + renumber_ext_vertices(handle, + weak_edgelist_dsts.data(), + weak_edgelist_dsts.size(), + (*renumber_map_q_r).data(), + (*graph_q_r).view().local_vertex_partition_range_first(), + (*graph_q_r).view().local_vertex_partition_range_last(), + true); + } + invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); - - auto [srcs_, dsts_, count_] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_edges{}); - - printf("\nafter unrolling (p, q) edges from (p, r)\n"); - raft::print_device_vector("srcs", srcs_.data(), srcs_.size(), std::cout); - raft::print_device_vector("dsts", dsts_.data(), dsts_.size(), std::cout); - raft::print_device_vector("count", count_.data(), count_.size(), std::cout); + find_unroll_p_q_q_r_edges( + handle, + cur_graph_view, + graph_q_r, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + std::move(renumber_map_q_r), + do_expensive_check + ); - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } - */ - - - // case 2: unroll (q, r) - // temporarily mask (p, q) edges - /* - cugraph::edge_bucket_t edges_to_tmp_mask(handle); - edges_to_tmp_mask.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); + // FIXME: unrenumber weak edgelist before proceeding + #if 0 + auto [srcs__, dsts__] = extract_transform_e(handle, + (*graph_q_r).view(), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + // Unrolling p, r edges + // create pair invalid_src, invalid_edge_idx + // create a dataframe buffer of size invalid_edge_size + // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs + // with a vector counting from 0 .. + auto vertex_pair_buffer_p_tag = + allocate_dataframe_buffer>(weak_edgelist_srcs.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [ + p = weak_edgelist_srcs.begin() + ] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + + vertex_frontier_t vertex_frontier(handle, 1); + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); - cugraph::transform_e( + auto [q, idx] = + cugraph::extract_transform_v_frontier_outgoing_e( handle, cur_graph_view, - edges_to_tmp_mask, + vertex_frontier.bucket(0), cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return false; - }, - tmp_edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - */ - - - // FIXME: memory footprint overhead - rmm::device_uvector vertex_q_r(weak_edgelist_srcs.size() * 2, handle.get_stream()); + extract_q_idx{}, + do_expensive_check); - // Iterate over unique vertices that appear as either q or r - thrust::merge(handle.get_thrust_policy(), - weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - vertex_q_r.begin()); - thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - auto invalid_unique_v_end = thrust::unique( - handle.get_thrust_policy(), - vertex_q_r.begin(), - vertex_q_r.end()); - - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); + vertex_frontier.bucket(0).clear(); - auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - // raft::device_span(vertex_q_r.data(), vertex_q_r.size()) - auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); - - printf("\nunrolling q, r edges\n"); - raft::print_device_vector("srcs", srcs_to_q_r.data(), srcs_to_q_r.size(), std::cout); - raft::print_device_vector("dsts", dsts_to_q_r.data(), dsts_to_q_r.size(), std::cout); - - std::optional> graph_q_r{std::nullopt}; - std::optional> renumber_map_q_r{std::nullopt}; - std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); - - auto [srcs__, dsts__] = extract_transform_e(handle, - (*graph_q_r).view(), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); - - printf("\nq, r edge graph\n"); - raft::print_device_vector("srcs__", srcs__.data(), srcs__.size(), std::cout); - raft::print_device_vector("dsts__", dsts__.data(), dsts__.size(), std::cout); - + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(q.begin(), idx.begin()), + thrust::make_zip_iterator(q.end(), idx.end())); - // ********************************************************************************************************* - printf("\nbefore crash\n"); - raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); - raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_srcs.size(), std::cout); - - prev_chunk_size = 0; - chunk_num_invalid_edges = weak_edgelist_srcs.size(); - - num_chunks = - raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - - for (size_t i = 0; i < num_chunks; ++i) { - - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - (*graph_q_r).view(), - //cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - //thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - //thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - std::array{true, true}, - true); - - // clear mask. - cur_graph_view.clear_edge_mask(); - - printf("\n**********intersection when unrolling q, r edges\n"); - raft::print_device_vector("intersection_offsets", intersection_offsets.data(), intersection_offsets.size(), std::cout); - raft::print_device_vector("intersection_indices", intersection_indices.data(), intersection_indices.size(), std::cout); + // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating + auto [q_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + do_expensive_check); - // Generate (p, q) edges - // FIXME: Should this array be reduced? an edge can have an intersection size > 1 - auto vertex_pair_buffer_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - + // extract pair (p, r) + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + // construct pair (p, q) + // construct pair (q, r) thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}); + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + invalid_edgelist, + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - - // Unroll (p, q) edges - cugraph::edge_bucket_t invalid_edges_bucket(handle); - invalid_edges_bucket.insert(weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin()); - - printf("\nupdating count\n"); - cugraph::transform_e( - handle, - cur_graph_view, - invalid_edges_bucket, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - vertex_pair_buffer_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_q_begin, vertex_pair_buffer_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - - - //#if 0 - auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - - //printf("\nafter unrolling (p, q) edges\n"); - //raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - //raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - //raft::print_device_vector("count", count.data(), count.size(), std::cout); - //#endif - - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + // construct pair (p, q) + auto vertex_pair_buffer_p_q_for_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); - - //invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_p_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_p_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_p_r_edge_p_q_begin, vertex_pair_buffer_p_r_edge_p_q_end, e); - - if ((itr != vertex_pair_buffer_p_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - - + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + // construct pair (q, r) + auto vertex_pair_buffer_q_r_for_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.data())}); + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); - // invalid_edges - cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [ - vertex_pair_buffer_q_r_edge_p_q_begin = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q_end = get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q) - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - auto e = thrust::make_tuple(src, dst); - auto itr = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_q_r_edge_p_q_begin, vertex_pair_buffer_q_r_edge_p_q_end, e); - - if ((itr != vertex_pair_buffer_q_r_edge_p_q_end) && (*itr == e)) { - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - return count - 1; - } - - return count; - - }, - e_property_triangle_count.mutable_view(), - false); - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - printf("\nafter unrolling (p, q) edges\n"); - raft::print_device_vector("srcs", srcs.data(), srcs.size(), std::cout); - raft::print_device_vector("dsts", dsts.data(), dsts.size(), std::cout); - raft::print_device_vector("count", count.data(), count.size(), std::cout); - } - // **************************************************************************** + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + // Reconstruct (p, r) edges that didn't already have their count updated - // Unrolling p, r edges - // create pair invalid_src, invalid_edge_idx - // create a dataframe buffer of size invalid_edge_size - // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs - // with a vector counting from 0 .. - auto vertex_pair_buffer_p_tag = - allocate_dataframe_buffer>(weak_edgelist_srcs.size(), - handle.get_stream()); - - thrust::tabulate( + resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), [ - p = weak_edgelist_srcs.begin() - ] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); - - raft::print_device_vector("vertex_pair_buffer_p_tag", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); - - - vertex_frontier_t vertex_frontier(handle, 1); - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); - - printf("\nsize after inserting - part 1 = %d\n", vertex_frontier.bucket(0).size()); + vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + }); - /* - std::tie(edge_majors, edge_minors, *edge_weights, subgraph_edge_graph_ids) = - cugraph::extract_transform_v_frontier_outgoing_e( + update_count( handle, cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__( - thrust::tuple tagged_src, - vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) { - printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); - - }, - do_expensive_check); - */ - - auto [q, idx] = - cugraph::extract_transform_v_frontier_outgoing_e( + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) + ); + + + + update_count( handle, cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx{}, - do_expensive_check); - - raft::print_device_vector("q", q.data(), q.size(), std::cout); - raft::print_device_vector("i", idx.data(), idx.size(), std::cout); - vertex_frontier.bucket(0).clear(); - printf("\nsize after clearning = %d\n", vertex_frontier.bucket(0).size()); - - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end())); - - printf("\nsize after inserting - part 2 = %d\n", vertex_frontier.bucket(0).size()); - // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating - auto [q_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) + ); + + update_count( handle, cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, - do_expensive_check); - - raft::print_device_vector("q_closing", q_closing.data(), q_closing.size(), std::cout); - raft::print_device_vector("i_closing", idx_closing.data(), idx_closing.size(), std::cout); + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) + ); - - // extract pair (p, r) - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - // construct pair (p, q) - // construct pair (q, r) - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - invalid_edgelist, - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - - // construct pair (p, q) - auto vertex_pair_buffer_p_q_for_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + // Mask all the edges that have 0 count + cugraph::transform_e( + handle, + cur_graph_view, + // is it more efficient to extract edges with 0 count first? + //edges_with_no_triangle, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [] __device__( + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count != 0; + }, + edge_mask.mutable_view(), + false); - // construct pair (q, r) - auto vertex_pair_buffer_q_r_for_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + cur_graph_view.attach_edge_mask(edge_mask.view()); + /* + if (edge_weight_view) { + auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + cugraph::edge_bucket_t edges_with_triangle(handle); + // FIXME: Does 'extract_transform_e' yield sorted edges? + edges_with_triangle.insert(edgelist_srcs.begin(), + edgelist_srcs.end(), + edgelist_dsts.begin()); - raft::print_device_vector("vertex_pair_buffer_p_r", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_r", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); + + cugraph::transform_e( + handle, + cur_graph_view, + edges_with_triangle, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { + return true; + }, + edge_mask.mutable_view(), + true); // FIXME: remove expensive check - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); + cur_graph_view.attach_edge_mask(edge_mask.view()); + } + */ + - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); + thrust::sort_by_key(handle.get_thrust_policy(), + check_edgelist, + check_edgelist + srcs_f.size(), + count_f.begin()); + #endif + } - //rmm::device_uvector weak_edgelist_srcs(0, handle.get_stream()); - //rmm::device_uvector weak_edgelist_dsts(0, handle.get_stream()); + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = + decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); + + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = + symmetrize_edgelist(handle, + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(edgelist_wgts), + false); + return std::make_tuple( - std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::move(edgelist_wgts)); - - - - + std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); + } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index dc4aafe35f8..519ba315c52 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -589,11 +589,11 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG EDGE TRIANGLE COUNT tests -------------------------------------------------------------------------- - #ConfigureTest(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) + ConfigureTestMG(MG_EDGE_TRIANGLE_COUNT_TEST community/mg_edge_triangle_count_test.cpp) ############################################################################################### # - MG K-TRUSS tests -------------------------------------------------------------------------- - #ConfigureTest(MG_K_TRUSS_TEST community/mg_k_truss_test.cpp) + ConfigureTestMG(MG_K_TRUSS_TEST community/mg_k_truss_test.cpp) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 8cf12e0fc3e..b03f7a7452e 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -104,8 +104,21 @@ class Tests_MGEdgeTriangleCount hr_timer.start("MG EdgeTriangleCount"); } + /* auto d_mg_cugraph_results = cugraph::edge_triangle_count(*handle_, mg_graph_view); + */ + + auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = + cugraph::k_truss( + *handle_, + mg_graph_view, + //edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::nullopt, // FIXME: test weights + //k_truss_usecase.k_, + 4, + false); + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -116,6 +129,7 @@ class Tests_MGEdgeTriangleCount // 3. Compare SG & MG results + #if 0 if (edge_triangle_count_usecase.check_correctness_) { // 3-1. Convert to SG graph @@ -137,7 +151,11 @@ class Tests_MGEdgeTriangleCount if (handle_->get_comms().get_rank() == int{0}) { // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - auto [edgelist_srcs, edgelist_dsts, d_edgelist_weights, d_edge_triangle_counts, d_edgelist_types] = + auto [edgelist_srcs, + edgelist_dsts, + d_edgelist_weights, + d_edge_triangle_counts, + d_edgelist_type] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), @@ -155,14 +173,12 @@ class Tests_MGEdgeTriangleCount auto [ref_edgelist_srcs, ref_edgelist_dsts, ref_d_edgelist_weights, - ref_d_edge_triangle_counts, - ref_d_edgelist_types] = + ref_d_edge_triangle_counts] = cugraph::decompress_to_edgelist( *handle_, sg_graph.view(), std::optional>{std::nullopt}, std::make_optional(ref_d_sg_cugraph_results.view()), - std::optional>{std::nullopt}, std::optional>{ std::nullopt}); // FIXME: No longer needed @@ -177,6 +193,7 @@ class Tests_MGEdgeTriangleCount h_sg_edge_triangle_counts.begin())); } } + #endif } private: @@ -187,14 +204,14 @@ template std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; -using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; +//using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } - +#if 0 TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) { auto param = GetParam(); @@ -215,17 +232,22 @@ TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +#endif INSTANTIATE_TEST_SUITE_P( file_tests, Tests_MGEdgeTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + ::testing::Values(EdgeTriangleCount_Usecase{false, false} + //EdgeTriangleCount_Usecase{true, true} + ), + ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") + //cugraph::test::File_Usecase("test/datasets/dolphins.mtx") + ))); + +#if 0 INSTANTIATE_TEST_SUITE_P( rmat_small_tests, Tests_MGEdgeTriangleCount_Rmat, @@ -245,5 +267,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(EdgeTriangleCount_Usecase{false, false}, EdgeTriangleCount_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); +#endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From 1d52c845b443b855df8463ef415b5b8c7ce11677 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 13:16:48 -0700 Subject: [PATCH 41/93] undo changes to triangle count tests --- .../community/mg_triangle_count_test.cpp | 236 +++++++++--------- 1 file changed, 119 insertions(+), 117 deletions(-) diff --git a/cpp/tests/community/mg_triangle_count_test.cpp b/cpp/tests/community/mg_triangle_count_test.cpp index 8011b6310ce..297087f875f 100644 --- a/cpp/tests/community/mg_triangle_count_test.cpp +++ b/cpp/tests/community/mg_triangle_count_test.cpp @@ -39,16 +39,18 @@ #include -struct EdgeTriangleCount_Usecase { - bool edge_masking_{false}; - bool check_correctness_{true}; +struct TriangleCount_Usecase { + double vertex_subset_ratio{0.0}; + + bool edge_masking{false}; + bool check_correctness{true}; }; template -class Tests_MGEdgeTriangleCount - : public ::testing::TestWithParam> { +class Tests_MGTriangleCount + : public ::testing::TestWithParam> { public: - Tests_MGEdgeTriangleCount() {} + Tests_MGTriangleCount() {} static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } @@ -57,9 +59,9 @@ class Tests_MGEdgeTriangleCount virtual void SetUp() {} virtual void TearDown() {} - // Compare the results of running EdgeTriangleCount on multiple GPUs to that of a single-GPU run + // Compare the results of running TriangleCount on multiple GPUs to that of a single-GPU run template - void run_current_test(EdgeTriangleCount_Usecase const& edge_triangle_count_usecase, + void run_current_test(TriangleCount_Usecase const& triangle_count_usecase, input_usecase_t const& input_usecase) { using weight_t = float; @@ -90,34 +92,63 @@ class Tests_MGEdgeTriangleCount auto mg_graph_view = mg_graph.view(); std::optional> edge_mask{std::nullopt}; - if (edge_triangle_count_usecase.edge_masking_) { + if (triangle_count_usecase.edge_masking) { edge_mask = cugraph::test::generate::edge_property( *handle_, mg_graph_view, 2); mg_graph_view.attach_edge_mask((*edge_mask).view()); } - // 2. run MG EdgeTriangleCount + // 2. generate a vertex subset to compute triangle counts + + std::optional> h_mg_vertices{std::nullopt}; + if (triangle_count_usecase.vertex_subset_ratio < 1.0) { + std::default_random_engine generator{ + static_cast(handle_->get_comms().get_rank()) /* seed */}; + std::uniform_real_distribution distribution{0.0, 1.0}; + h_mg_vertices = std::vector(mg_graph_view.local_vertex_partition_range_size()); + std::iota((*h_mg_vertices).begin(), + (*h_mg_vertices).end(), + mg_graph_view.local_vertex_partition_range_first()); + (*h_mg_vertices) + .erase(std::remove_if((*h_mg_vertices).begin(), + (*h_mg_vertices).end(), + [&generator, &distribution, triangle_count_usecase](auto v) { + return distribution(generator) >= + triangle_count_usecase.vertex_subset_ratio; + }), + (*h_mg_vertices).end()); + } + + auto d_mg_vertices = h_mg_vertices ? std::make_optional>( + (*h_mg_vertices).size(), handle_->get_stream()) + : std::nullopt; + if (d_mg_vertices) { + raft::update_device((*d_mg_vertices).data(), + (*h_mg_vertices).data(), + (*h_mg_vertices).size(), + handle_->get_stream()); + } + + // 3. run MG TriangleCount + + rmm::device_uvector d_mg_triangle_counts( + d_mg_vertices ? (*d_mg_vertices).size() : mg_graph_view.local_vertex_partition_range_size(), + handle_->get_stream()); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); - hr_timer.start("MG EdgeTriangleCount"); + hr_timer.start("MG TriangleCount"); } - /* - auto d_mg_cugraph_results = - cugraph::edge_triangle_count(*handle_, mg_graph_view); - */ - - auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = - cugraph::k_truss( - *handle_, - mg_graph_view, - //edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, - std::nullopt, // FIXME: test weights - //k_truss_usecase.k_, - 4, - false); + cugraph::triangle_count( + *handle_, + mg_graph_view, + d_mg_vertices ? std::make_optional>( + (*d_mg_vertices).begin(), (*d_mg_vertices).end()) + : std::nullopt, + raft::device_span(d_mg_triangle_counts.begin(), d_mg_triangle_counts.end()), + false); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -126,20 +157,27 @@ class Tests_MGEdgeTriangleCount hr_timer.display_and_clear(std::cout); } - // 3. Compare SG & MG results + // 4. copmare SG & MG results - #if 0 - if (edge_triangle_count_usecase.check_correctness_) { - // 3-1. Convert to SG graph + if (triangle_count_usecase.check_correctness) { + // 4-1. aggregate MG results + + std::optional> d_mg_aggregate_vertices{std::nullopt}; + rmm::device_uvector d_mg_aggregate_triangle_counts(0, handle_->get_stream()); + std::tie(d_mg_aggregate_vertices, d_mg_aggregate_triangle_counts) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + d_mg_vertices ? std::make_optional>( + (*d_mg_vertices).data(), (*d_mg_vertices).size()) + : std::nullopt, + raft::device_span(d_mg_triangle_counts.data(), + d_mg_triangle_counts.size())); cugraph::graph_t sg_graph(*handle_); -<<<<<<< HEAD - std::optional< - cugraph::edge_property_t, edge_t>> - d_sg_cugraph_results{std::nullopt}; - std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = - cugraph::test::mg_graph_to_sg_graph( -======= std::tie(sg_graph, std::ignore, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( *handle_, @@ -163,62 +201,26 @@ class Tests_MGEdgeTriangleCount handle_->get_stream()); cugraph::triangle_count( ->>>>>>> upstream/branch-24.06 *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values - std::make_optional(d_mg_cugraph_results.view()), - std::make_optional>((*mg_renumber_map).data(), - (*mg_renumber_map).size()), + sg_graph_view, + d_mg_aggregate_vertices + ? std::make_optional>( + (*d_mg_aggregate_vertices).begin(), (*d_mg_aggregate_vertices).end()) + : std::nullopt, + raft::device_span(d_sg_triangle_counts.begin(), d_sg_triangle_counts.end()), false); - if (handle_->get_comms().get_rank() == int{0}) { - // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - - auto [edgelist_srcs, - edgelist_dsts, - d_edgelist_weights, - d_edge_triangle_counts, - d_edgelist_type] = - cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - std::optional>{std::nullopt}, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional((*d_sg_cugraph_results).view()), - std::optional>{std::nullopt}, - std::optional>{ - std::nullopt}); // FIXME: No longer needed - - // 3-3. Run SG EdgeTriangleCount - - auto ref_d_sg_cugraph_results = - cugraph::edge_triangle_count(*handle_, sg_graph.view()); - auto [ref_edgelist_srcs, - ref_edgelist_dsts, - ref_d_edgelist_weights, - ref_d_edge_triangle_counts] = - cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - std::optional>{std::nullopt}, - std::make_optional(ref_d_sg_cugraph_results.view()), - std::optional>{ - std::nullopt}); // FIXME: No longer needed - - // 3-4. Compare - - auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); - auto h_sg_edge_triangle_counts = - cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); - - ASSERT_TRUE(std::equal(h_mg_edge_triangle_counts.begin(), - h_mg_edge_triangle_counts.end(), - h_sg_edge_triangle_counts.begin())); + // 4-3. compare + + auto h_mg_aggregate_triangle_counts = + cugraph::test::to_host(*handle_, d_mg_aggregate_triangle_counts); + auto h_sg_triangle_counts = cugraph::test::to_host(*handle_, d_sg_triangle_counts); + + ASSERT_TRUE(std::equal(h_mg_aggregate_triangle_counts.begin(), + h_mg_aggregate_triangle_counts.end(), + h_sg_triangle_counts.begin())); } } - #endif } private: @@ -226,59 +228,58 @@ class Tests_MGEdgeTriangleCount }; template -std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; +std::unique_ptr Tests_MGTriangleCount::handle_ = nullptr; -using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; -//using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; +using Tests_MGTriangleCount_File = Tests_MGTriangleCount; +using Tests_MGTriangleCount_Rmat = Tests_MGTriangleCount; -TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) +TEST_P(Tests_MGTriangleCount_File, CheckInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -#if 0 -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) + +TEST_P(Tests_MGTriangleCount_Rmat, CheckInt32Int32) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int64) +TEST_P(Tests_MGTriangleCount_Rmat, CheckInt32Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) +TEST_P(Tests_MGTriangleCount_Rmat, CheckInt64Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -#endif INSTANTIATE_TEST_SUITE_P( file_tests, - Tests_MGEdgeTriangleCount_File, + Tests_MGTriangleCount_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, false} - //EdgeTriangleCount_Usecase{true, true} - ), - ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") - //cugraph::test::File_Usecase("test/datasets/dolphins.mtx") - ))); - -#if 0 -INSTANTIATE_TEST_SUITE_P( - rmat_small_tests, - Tests_MGEdgeTriangleCount_Rmat, - ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), - ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(TriangleCount_Usecase{0.1, false}, + TriangleCount_Usecase{0.1, true}, + TriangleCount_Usecase{1.0, false}, + TriangleCount_Usecase{1.0, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_tests, + Tests_MGTriangleCount_Rmat, + ::testing::Combine(::testing::Values(TriangleCount_Usecase{0.1, false}, + TriangleCount_Usecase{0.1, true}, + TriangleCount_Usecase{1.0, false}, + TriangleCount_Usecase{1.0, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -286,11 +287,12 @@ INSTANTIATE_TEST_SUITE_P( vertex & edge type combination) by command line arguments and do not include more than one Rmat_Usecase that differ only in scale or edge factor (to avoid running same benchmarks more than once) */ - Tests_MGEdgeTriangleCount_Rmat, + Tests_MGTriangleCount_Rmat, ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false}, - EdgeTriangleCount_Usecase{true, false}), + ::testing::Values(TriangleCount_Usecase{0.1, false, false}, + TriangleCount_Usecase{0.1, true, false}, + TriangleCount_Usecase{1.0, false, false}, + TriangleCount_Usecase{1.0, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); -#endif -CUGRAPH_MG_TEST_PROGRAM_MAIN() +CUGRAPH_MG_TEST_PROGRAM_MAIN() \ No newline at end of file From 9892314db36434d25e7659c8937860701bc026c9 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 13:52:20 -0700 Subject: [PATCH 42/93] make a copy of the invalid edgelist --- cpp/src/community/k_truss_impl.cuh | 44 +++++++++++++++++++++--------- 1 file changed, 31 insertions(+), 13 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index b493a855cfa..c18b6c6d7ce 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -846,7 +846,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); @@ -1031,6 +1031,13 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dummy_property_t{}.view(), extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); + + + + if constexpr (multi_gpu) { std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning shuffled_weak_edgelist_srcs{0, handle.get_stream()}; rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; - std::tie(weak_edgelist_srcs, weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = + std::tie(cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::nullopt, std::nullopt, std::nullopt); + handle, std::move(cp_weak_edgelist_srcs), std::move(cp_weak_edgelist_dsts), std::nullopt, std::nullopt, std::nullopt); renumber_ext_vertices(handle, - weak_edgelist_srcs.data(), - weak_edgelist_srcs.size(), + cp_weak_edgelist_srcs.data(), + cp_weak_edgelist_srcs.size(), (*renumber_map_q_r).data(), (*graph_q_r).view().local_vertex_partition_range_first(), (*graph_q_r).view().local_vertex_partition_range_last(), true); renumber_ext_vertices(handle, - weak_edgelist_dsts.data(), - weak_edgelist_dsts.size(), + cp_weak_edgelist_dsts.data(), + cp_weak_edgelist_dsts.size(), (*renumber_map_q_r).data(), (*graph_q_r).view().local_vertex_partition_range_first(), (*graph_q_r).view().local_vertex_partition_range_last(), @@ -1084,10 +1104,10 @@ k_truss(raft::handle_t const& handle, } - invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + invalid_edge_first = thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); + invalid_edge_first + cp_weak_edgelist_srcs.size()); find_unroll_p_q_q_r_edges( handle, @@ -1095,14 +1115,12 @@ k_truss(raft::handle_t const& handle, graph_q_r, e_property_triangle_count, tmp_edge_mask, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span(cp_weak_edgelist_srcs.data(), cp_weak_edgelist_srcs.size()), + raft::device_span(cp_weak_edgelist_dsts.data(), cp_weak_edgelist_dsts.size()), std::move(renumber_map_q_r), do_expensive_check ); - - // FIXME: unrenumber weak edgelist before proceeding #if 0 auto [srcs__, dsts__] = extract_transform_e(handle, (*graph_q_r).view(), From 42a1b2a73966dcdbe9d14d3995d42e1f91b2afc7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:06:43 -0700 Subject: [PATCH 43/93] unroll remaining edges --- cpp/src/community/k_truss_impl.cuh | 324 ++++++++++++++++++++--------- 1 file changed, 231 insertions(+), 93 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index c18b6c6d7ce..985955eddd2 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -950,13 +950,9 @@ k_truss(raft::handle_t const& handle, cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - auto iteration = -1; while (true) { // FIXME: Keep it at 1 iteration for debugging - iteration += 1; - if (iteration == 1) { - break; - } + // extract the edges that have counts less than k - 2. Those edges will be unrolled // FIXME: extracting 'triangle_count' is not required here. auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, @@ -966,10 +962,25 @@ k_truss(raft::handle_t const& handle, e_property_triangle_count.view(), extract_weak_edges{k}); + if constexpr (multi_gpu) { + if (num_invalid_edges == 0) { + done = 0; + } + done = host_scalar_allreduce( + handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); + if (done == 0) { + break; + } + + } else if (weak_edgelist_srcs.size() == 0) { + break; + } + + if (num_invalid_edges == 0 && done == 0) { + break; } // FIXME: Add a flag checking wether the other ranks have completed their task or // not before exiting. - if (weak_edgelist_srcs.size() == 0) { break; } auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), @@ -1121,14 +1132,7 @@ k_truss(raft::handle_t const& handle, do_expensive_check ); - #if 0 - auto [srcs__, dsts__] = extract_transform_e(handle, - (*graph_q_r).view(), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + // Unrolling p, r edges // create pair invalid_src, invalid_edge_idx @@ -1231,81 +1235,222 @@ k_truss(raft::handle_t const& handle, idx_closing.size()) }); + // FIXME: Avoid duplicated code in SG and MG when updating the counts + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + // Get global weak_edgelist + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + // Sort the weak edges if they are not already + auto invalid_edgelist = thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); + - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); + rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); + rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); + + std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), + std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), + std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + + update_count( handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_p_q_srcs.data(), pair_p_q_srcs.size()), + raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), + raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size()) + ); + + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(pair_q_r_srcs.size(), + handle.get_stream()); + + std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r)), + std::move(std::get<1>(vertex_pair_buffer_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + // Reconstruct (p, r) edges that didn't already have their count updated + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [ + vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + }); - // Reconstruct (p, r) edges that didn't already have their count updated + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) + ); - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); - }); + } else { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) - ); - - - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) - ); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) - ); + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [ + vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + }); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) + ); + } // Mask all the edges that have 0 count cugraph::transform_e( @@ -1324,7 +1469,7 @@ k_truss(raft::handle_t const& handle, false); cur_graph_view.attach_edge_mask(edge_mask.view()); - /* + if (edge_weight_view) { auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e(handle, cur_graph_view, @@ -1356,14 +1501,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view.attach_edge_mask(edge_mask.view()); } - */ - - - thrust::sort_by_key(handle.get_thrust_policy(), - check_edgelist, - check_edgelist + srcs_f.size(), - count_f.begin()); - #endif + } rmm::device_uvector edgelist_srcs(0, handle.get_stream()); From bdac0aa67fd0c3e630ee58fdf3afdc509143c1d7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:11:11 -0700 Subject: [PATCH 44/93] update cmake --- cpp/src/community/k_truss_mg.cu | 4 ++-- cpp/src/community/k_truss_sg.cu | 3 +-- cpp/tests/CMakeLists.txt | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/src/community/k_truss_mg.cu b/cpp/src/community/k_truss_mg.cu index 04845d5b73d..8da00f35275 100644 --- a/cpp/src/community/k_truss_mg.cu +++ b/cpp/src/community/k_truss_mg.cu @@ -28,7 +28,7 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int32_t k, bool do_expensive_check); -/* + template std::tuple, rmm::device_uvector, std::optional>> @@ -73,6 +73,6 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int64_t k, bool do_expensive_check); -*/ + } // namespace cugraph diff --git a/cpp/src/community/k_truss_sg.cu b/cpp/src/community/k_truss_sg.cu index 2899f9fd722..dfea62182f5 100644 --- a/cpp/src/community/k_truss_sg.cu +++ b/cpp/src/community/k_truss_sg.cu @@ -29,7 +29,6 @@ k_truss(raft::handle_t const& handle, int32_t k, bool do_expensive_check); -/* template std::tuple, rmm::device_uvector, std::optional>> @@ -74,5 +73,5 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int64_t k, bool do_expensive_check); -*/ + } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index b0136c9688f..1b2faa57b43 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -642,7 +642,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG TRIANGLE COUNT tests ------------------------------------------------------------------- - #ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) + ConfigureTestMG(MG_TRIANGLE_COUNT_TEST community/mg_triangle_count_test.cpp) ############################################################################################### # - MG coarsening tests ----------------------------------------------------------------------- From 03edaa99e30b92ff9d998b1c643bcf9f769b2a0c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:12:49 -0700 Subject: [PATCH 45/93] fix style --- cpp/src/community/k_truss_impl.cuh | 1550 +++++++++-------- cpp/src/community/k_truss_mg.cu | 1 - cpp/tests/community/mg_k_truss_test.cpp | 25 +- .../community/mg_triangle_count_test.cpp | 2 +- 4 files changed, 789 insertions(+), 789 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 985955eddd2..d9493628e81 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -15,21 +15,19 @@ */ #pragma once -#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" +#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" #include +#include #include #include #include -#include - -#include #include @@ -45,7 +43,11 @@ namespace cugraph { -template +template edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, EdgeIterator potential_closing_or_incoming_edges, @@ -67,13 +69,15 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, invalid_last = thrust::make_zip_iterator(invalid_edgelist_srcs.end(), invalid_edgelist_dsts.end())] __device__(auto e) { auto potential_edge = thrust::get<0>(e); - auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); + auto potential_or_incoming_edge = + thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); if constexpr (is_p_q_edge) { - potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + potential_or_incoming_edge = + thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); }; - - auto itr = thrust::lower_bound( - thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); + + auto itr = + thrust::lower_bound(thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); return (itr != invalid_last && *itr == potential_or_incoming_edge); }); @@ -91,7 +95,8 @@ struct extract_weak_edges { vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { return count < k - 2 - ? thrust::optional>{thrust::make_tuple(src, dst, count)} + ? thrust::optional>{thrust::make_tuple( + src, dst, count)} : thrust::nullopt; } }; @@ -99,30 +104,25 @@ struct extract_weak_edges { template struct extract_edges { __device__ thrust::optional> operator()( - + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { return thrust::make_tuple(src, dst, count); } }; - template struct extract_edges_to_q_r { - raft::device_span vertex_q_r{}; __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { // FIXME: Replace by lowerbound after validation - auto itr_src = thrust::find( - thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); + auto itr_src = thrust::find(thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); // FIXME: Replace by lowerbound after validation - auto itr_dst = thrust::find( - thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); - + auto itr_dst = thrust::find(thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); if (itr_src != vertex_q_r.end() && *itr_src == src) { return thrust::optional>{thrust::make_tuple(src, dst)}; @@ -231,8 +231,7 @@ struct extract_q_idx_closing { thrust::nullopt_t) const { edge_t idx = thrust::get<1>(tagged_src); - if (dst == weak_edgelist_dsts[idx]){ - } + if (dst == weak_edgelist_dsts[idx]) {} return dst == weak_edgelist_dsts[idx] ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) : thrust::nullopt; @@ -252,7 +251,7 @@ struct generate_p_q { auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - + return thrust::make_tuple(invalid_srcs[chunk_start + idx], invalid_dsts[chunk_start + idx]); } }; @@ -276,51 +275,53 @@ struct generate_p_q_q_r { __device__ thrust::tuple operator()(edge_t i) const { - if constexpr (generate_p_q) { - return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), q_closing[i]); + return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), + q_closing[i]); } else { - return thrust::make_tuple(q_closing[i], thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); + return thrust::make_tuple(q_closing[i], + thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); } } }; template -void update_count(raft::handle_t const& handle, - graph_view_t & cur_graph_view, - edge_property_t, edge_t> & e_property_triangle_count, - edge_property_t, bool> const & tmp_edge_mask, - raft::device_span vertex_pair_buffer_src, - raft::device_span vertex_pair_buffer_dst - ) { - +void update_count( + raft::handle_t const& handle, + graph_view_t& cur_graph_view, + edge_property_t, edge_t>& + e_property_triangle_count, + edge_property_t, bool> const& tmp_edge_mask, + raft::device_span vertex_pair_buffer_src, + raft::device_span vertex_pair_buffer_dst) +{ // FIXME: Only for debugging so remove after auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); - auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); - + auto vertex_pair_buffer_begin = + thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); + thrust::sort(handle.get_thrust_policy(), vertex_pair_buffer_begin, vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); - - auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - vertex_pair_buffer_begin, - vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); - + + auto unique_pair_count = + thrust::unique_count(handle.get_thrust_policy(), + vertex_pair_buffer_begin, + vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); rmm::device_uvector decrease_count_tmp(vertex_pair_buffer_src.size(), handle.get_stream()); - - thrust::fill(handle.get_thrust_policy(), - decrease_count_tmp.begin(), - decrease_count_tmp.end(), - size_t{1}); - + + thrust::fill( + handle.get_thrust_policy(), decrease_count_tmp.begin(), decrease_count_tmp.end(), size_t{1}); + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - + unique_pair_count, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), vertex_pair_buffer_begin, vertex_pair_buffer_begin + vertex_pair_buffer_src.size(), @@ -329,9 +330,8 @@ void update_count(raft::handle_t const& handle, decrease_count.begin(), thrust::equal_to>{}); - cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).begin()); @@ -342,316 +342,314 @@ void update_count(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), - [ - vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), - decrease_count = decrease_count.data() - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - + [vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count = decrease_count.data()] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + edge_t count) { auto e = thrust::make_tuple(src, dst); - auto itr_pair = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); - + auto itr_pair = + thrust::lower_bound(thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); + if ((itr_pair != vertex_pair_buffer_end) && (*itr_pair == e)) { auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); return count - decrease_count[idx_pair]; - } - - return count; + } + return count; }, e_property_triangle_count.mutable_view(), true); }; -template -void find_unroll_p_q_q_r_edges(raft::handle_t const& handle, - graph_view_t & cur_graph_view, - optional_graph_view_t const & graph_q_r, - edge_property_t, edge_t> & e_property_triangle_count, - edge_property_t, bool> & tmp_edge_mask, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - std::optional> renumber_map, - bool do_expensive_check - ) { - +template +void find_unroll_p_q_q_r_edges( + raft::handle_t const& handle, + graph_view_t& cur_graph_view, + optional_graph_view_t const& graph_q_r, + edge_property_t, edge_t>& + e_property_triangle_count, + edge_property_t, bool>& tmp_edge_mask, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + std::optional> renumber_map, + bool do_expensive_check) +{ size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - - auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + auto invalid_edge_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - rmm::device_uvector intersection_offsets(0, handle.get_stream()); - rmm::device_uvector intersection_indices(0, handle.get_stream()); + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + rmm::device_uvector intersection_offsets(0, handle.get_stream()); + rmm::device_uvector intersection_indices(0, handle.get_stream()); + + if constexpr (is_p_q_edge) { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + cur_graph_view, + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + // do_expensive_check : FIXME + true); + } else { + std::tie(intersection_offsets, intersection_indices) = + detail::nbr_intersection(handle, + (*graph_q_r).view(), + cugraph::edge_dummy_property_t{}.view(), + invalid_edge_first + prev_chunk_size, + invalid_edge_first + prev_chunk_size + chunk_size, + std::array{true, true}, + // do_expensive_check : FIXME + true); + } - if constexpr (is_p_q_edge) { - std::tie(intersection_offsets, intersection_indices) = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - //do_expensive_check : FIXME - true); - } else { - std::tie(intersection_offsets, intersection_indices) = - detail::nbr_intersection(handle, - (*graph_q_r).view(), - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - //do_expensive_check : FIXME - true); + // Generate (p, q) edges + // FIXME: Should this array be reduced? an edge can have an intersection size > 1 + auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>( + intersection_indices.size(), handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + if constexpr (!is_p_q_edge) { + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); // FIXME: for debugging + // Get global weak_edgelist + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_srcs)); + + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts)); + + weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()); + weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size()); + + // Sort the weak edges if they are not already + auto invalid_edgelist = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); } - - // Generate (p, q) edges - // FIXME: Should this array be reduced? an edge can have an intersection size > 1 - auto vertex_pair_buffer_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - + } + + if constexpr (is_p_q_edge) { + auto num_edges_not_overcomp = + remove_overcompensating_edges( + handle, + intersection_indices.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + + // resize initial (q, r) edges + resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); + // Reconstruct (q, r) edges that didn't already have their count updated thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_q), get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts - }); - - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - if constexpr (!is_p_q_edge) { - if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); // FIXME: for debugging - // Get global weak_edgelist - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_srcs)); - - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_dsts)); - - weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()); - weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()); - - // Sort the weak edges if they are not already - auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - - } + [vertex_pair_buffer_p_r_edge_p_q = + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q = + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), + thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); + }); + } - } + // Shuffle edges + if constexpr (multi_gpu) { + if constexpr (is_q_r_edge) { + auto vertex_partition_range_lasts = std::make_optional>( + (*graph_q_r).view().vertex_partition_range_lasts()); - if constexpr (is_p_q_edge) { - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - intersection_indices.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - - // resize initial (q, r) edges - resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); - // Reconstruct (q, r) edges that didn't already have their count updated - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - [ - vertex_pair_buffer_p_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); - }); - } + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); - // Shuffle edges - if constexpr (multi_gpu) { - if constexpr (is_q_r_edge) { - - auto vertex_partition_range_lasts = std::make_optional>((*graph_q_r).view().vertex_partition_range_lasts()); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_p_q).data(), - std::get<0>(vertex_pair_buffer_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_p_q).data(), - std::get<1>(vertex_pair_buffer_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - } + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); - rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); - rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); - rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_p_q).data(), + std::get<0>(vertex_pair_buffer_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_p_q).data(), + std::get<1>(vertex_pair_buffer_p_q).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + } + + rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); + rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); + rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - if constexpr (is_p_q_edge) { + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + if constexpr (is_p_q_edge) { std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); } else { - std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); } update_count( @@ -663,78 +661,79 @@ void find_unroll_p_q_q_r_edges(raft::handle_t const& handle, raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size())); update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_p_r_srcs.data(), pair_p_r_srcs.size()), - raft::device_span(pair_p_r_dsts.data(), pair_p_r_dsts.size()) - ); + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_p_r_srcs.data(), pair_p_r_srcs.size()), + raft::device_span(pair_p_r_dsts.data(), pair_p_r_dsts.size())); update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), - raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size()) - ); + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), + raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size())); - } else { - update_count( + } else { + update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) - ); - + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), + std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), + std::get<1>(vertex_pair_buffer_p_q).size())); + if constexpr (is_p_q_edge) { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size())); } else { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) - ); + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size())); } - + if constexpr (is_p_q_edge) { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size())); } else { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); - } - + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size())); } - - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; } + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } } } // namespace @@ -942,9 +941,11 @@ k_truss(raft::handle_t const& handle, { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + auto e_property_triangle_count = + edge_triangle_count(handle, cur_graph_view); - cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); + cugraph::edge_property_t, bool> tmp_edge_mask( + handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); cugraph::edge_property_t edge_mask(handle, cur_graph_view); @@ -952,46 +953,49 @@ k_truss(raft::handle_t const& handle, while (true) { // FIXME: Keep it at 1 iteration for debugging - + // extract the edges that have counts less than k - 2. Those edges will be unrolled - // FIXME: extracting 'triangle_count' is not required here. - auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_weak_edges{k}); - + // FIXME: extracting 'triangle_count' is not required here. + auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_weak_edges{k}); + if constexpr (multi_gpu) { - if (num_invalid_edges == 0) { - done = 0; - } + if (num_invalid_edges == 0) { done = 0; } done = host_scalar_allreduce( handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); - if (done == 0) { - break; - } - + if (done == 0) { break; } + } else if (weak_edgelist_srcs.size() == 0) { break; } - if (num_invalid_edges == 0 && done == 0) { - break; } + if (num_invalid_edges == 0 && done == 0) { break; } // FIXME: Add a flag checking wether the other ranks have completed their task or // not before exiting. - auto invalid_edge_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto invalid_edge_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); + + edge_property_t modified_triangle_count(handle, + cur_graph_view); - edge_property_t modified_triangle_count(handle, cur_graph_view); - std::optional> dummy_graph{std::nullopt}; - find_unroll_p_q_q_r_edges( + find_unroll_p_q_q_r_edges( handle, cur_graph_view, dummy_graph, @@ -1000,127 +1004,136 @@ k_truss(raft::handle_t const& handle, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), std::nullopt, - do_expensive_check - ); + do_expensive_check); - auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_edges{}); + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_edges{}); // FIXME: memory footprint overhead rmm::device_uvector vertex_q_r(weak_edgelist_srcs.size() * 2, handle.get_stream()); - + // Iterate over unique vertices that appear as either q or r // FIXME: Reduce 'weak_edgelist_srcs' and 'weak_edgelist_srcs' before calling 'set_union' thrust::set_union(handle.get_thrust_policy(), - weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - vertex_q_r.begin()); + weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + vertex_q_r.begin()); thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - auto invalid_unique_v_end = thrust::unique( - handle.get_thrust_policy(), - vertex_q_r.begin(), - vertex_q_r.end()); - - - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); + auto invalid_unique_v_end = + thrust::unique(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), + handle.get_stream()); - auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + auto invalid_edgelist = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + auto [srcs_to_q_r, dsts_to_q_r] = + extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span( + vertex_q_r.data(), vertex_q_r.size())}); rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); - - - if constexpr (multi_gpu) { std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(dsts_to_q_r), std::move(srcs_to_q_r), std::nullopt, std::nullopt, std::nullopt); - + edge_t, + weight_t, + int32_t>( + handle, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt); } std::optional> graph_q_r{std::nullopt}; - std::optional> renumber_map_q_r{std::nullopt}; - std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + std::optional> renumber_map_q_r{std::nullopt}; + std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, graph_view.is_multigraph()}, + true); if constexpr (multi_gpu) { - cp_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); cp_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator( - weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator( - weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator( - cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin() - )); - + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), + thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin())); + rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; - std::tie(cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = + std::tie( + cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(cp_weak_edgelist_srcs), std::move(cp_weak_edgelist_dsts), std::nullopt, std::nullopt, std::nullopt); - - renumber_ext_vertices(handle, - cp_weak_edgelist_srcs.data(), - cp_weak_edgelist_srcs.size(), - (*renumber_map_q_r).data(), - (*graph_q_r).view().local_vertex_partition_range_first(), - (*graph_q_r).view().local_vertex_partition_range_last(), - true); - - renumber_ext_vertices(handle, - cp_weak_edgelist_dsts.data(), - cp_weak_edgelist_dsts.size(), - (*renumber_map_q_r).data(), - (*graph_q_r).view().local_vertex_partition_range_first(), - (*graph_q_r).view().local_vertex_partition_range_last(), - true); + edge_t, + weight_t, + int32_t>( + handle, + std::move(cp_weak_edgelist_srcs), + std::move(cp_weak_edgelist_dsts), + std::nullopt, + std::nullopt, + std::nullopt); + renumber_ext_vertices( + handle, + cp_weak_edgelist_srcs.data(), + cp_weak_edgelist_srcs.size(), + (*renumber_map_q_r).data(), + (*graph_q_r).view().local_vertex_partition_range_first(), + (*graph_q_r).view().local_vertex_partition_range_last(), + true); + + renumber_ext_vertices( + handle, + cp_weak_edgelist_dsts.data(), + cp_weak_edgelist_dsts.size(), + (*renumber_map_q_r).data(), + (*graph_q_r).view().local_vertex_partition_range_first(), + (*graph_q_r).view().local_vertex_partition_range_last(), + true); } - invalid_edge_first = thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); + invalid_edge_first = + thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + cp_weak_edgelist_srcs.size()); - - find_unroll_p_q_q_r_edges( + invalid_edge_first, + invalid_edge_first + cp_weak_edgelist_srcs.size()); + + find_unroll_p_q_q_r_edges( handle, cur_graph_view, graph_q_r, @@ -1129,363 +1142,355 @@ k_truss(raft::handle_t const& handle, raft::device_span(cp_weak_edgelist_srcs.data(), cp_weak_edgelist_srcs.size()), raft::device_span(cp_weak_edgelist_dsts.data(), cp_weak_edgelist_dsts.size()), std::move(renumber_map_q_r), - do_expensive_check - ); - - + do_expensive_check); // Unrolling p, r edges // create pair invalid_src, invalid_edge_idx // create a dataframe buffer of size invalid_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs - // with a vector counting from 0 .. - auto vertex_pair_buffer_p_tag = - allocate_dataframe_buffer>(weak_edgelist_srcs.size(), - handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [ - p = weak_edgelist_srcs.begin() - ] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); - + // with a vector counting from 0 .. + auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>( + weak_edgelist_srcs.size(), handle.get_stream()); + + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [p = weak_edgelist_srcs.begin()] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + vertex_frontier_t vertex_frontier(handle, 1); vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), + std::get<1>(vertex_pair_buffer_p_tag).begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), + std::get<1>(vertex_pair_buffer_p_tag).end())); + + auto [q, idx] = cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx{}, + do_expensive_check); - auto [q, idx] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx{}, - do_expensive_check); - vertex_frontier.bucket(0).clear(); - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end())); + vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(q.begin(), idx.begin()), + thrust::make_zip_iterator(q.end(), idx.end())); + + // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid + // overcompensating + auto [q_closing, idx_closing] = cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{ + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + do_expensive_check); - // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating - auto [q_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, - do_expensive_check); - // extract pair (p, r) - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); + auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>( + q_closing.size(), handle.get_stream()); // construct pair (p, q) // construct pair (q, r) - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - invalid_edgelist, - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + invalid_edgelist, + raft::device_span(idx_closing.data(), idx_closing.size())}); + // construct pair (p, q) auto vertex_pair_buffer_p_q_for_p_r = allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), q_closing.size()), + raft::device_span(idx_closing.data(), idx_closing.size())}); // construct pair (q, r) auto vertex_pair_buffer_q_r_for_p_r = allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - + handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), q_closing.size()), + raft::device_span(idx_closing.data(), idx_closing.size())}); + // FIXME: Avoid duplicated code in SG and MG when updating the counts if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - // Get global weak_edgelist - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + auto& comm = handle.get_comms(); + // Get global weak_edgelist + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + // Sort the weak edges if they are not already + auto invalid_edgelist = thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), + global_weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + invalid_edge_first, + invalid_edge_first + weak_edgelist_srcs.size()); + + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), + false, + false>(handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), + false, + false>(handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); + rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); + rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); + + std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - comm, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), + std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - comm, - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - // Sort the weak edges if they are not already - auto invalid_edgelist = thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - - - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); - rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); - rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); - - std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), - std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), - std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - - update_count( + std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), + std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, raft::device_span(pair_p_q_srcs.data(), pair_p_q_srcs.size()), - raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size()) - ); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), - raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size()) - ); - - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(pair_q_r_srcs.size(), - handle.get_stream()); - - std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r)), - std::move(std::get<1>(vertex_pair_buffer_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - // Reconstruct (p, r) edges that didn't already have their count updated - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); - }); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) - ); + raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size())); - } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), + raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size())); - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( + auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>( + pair_q_r_srcs.size(), handle.get_stream()); + + std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); - }); + std::move(std::get<0>(vertex_pair_buffer_p_r)), + std::move(std::get<1>(vertex_pair_buffer_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + // Reconstruct (p, r) edges that didn't already have their count updated + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [vertex_pair_buffer_p_q_for_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), + thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + }); update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) - ); - + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), + std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), + std::get<1>(vertex_pair_buffer_p_r).size())); + + } else { + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), + false, + false>( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), + false, + false>( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + resize_dataframe_buffer( + vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [vertex_pair_buffer_p_q_for_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), + thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + }); + update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) - ); - + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), + std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), + std::get<1>(vertex_pair_buffer_p_r).size())); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), + std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), + std::get<1>(vertex_pair_buffer_p_q_for_p_r).size())); + update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), + std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), + std::get<1>(vertex_pair_buffer_q_r_for_p_r).size())); } // Mask all the edges that have 0 count cugraph::transform_e( + handle, + cur_graph_view, + // is it more efficient to extract edges with 0 count first? + // edges_with_no_triangle, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count != 0; + }, + edge_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(edge_mask.view()); + + if (edge_weight_view) { + auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e( handle, cur_graph_view, - // is it more efficient to extract edges with 0 count first? - //edges_with_no_triangle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), + // view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - [] __device__( - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count != 0; - }, - edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(edge_mask.view()); - - if (edge_weight_view) { - auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); + extract_edges{}); cugraph::edge_bucket_t edges_with_triangle(handle); // FIXME: Does 'extract_transform_e' yield sorted edges? - edges_with_triangle.insert(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin()); + edges_with_triangle.insert( + edgelist_srcs.begin(), edgelist_srcs.end(), edgelist_dsts.begin()); - cugraph::transform_e( handle, cur_graph_view, @@ -1497,11 +1502,10 @@ k_truss(raft::handle_t const& handle, return true; }, edge_mask.mutable_view(), - true); // FIXME: remove expensive check + true); // FIXME: remove expensive check - cur_graph_view.attach_edge_mask(edge_mask.view()); + cur_graph_view.attach_edge_mask(edge_mask.view()); } - } rmm::device_uvector edgelist_srcs(0, handle.get_stream()); @@ -1526,8 +1530,6 @@ k_truss(raft::handle_t const& handle, return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); - } - } } // namespace cugraph diff --git a/cpp/src/community/k_truss_mg.cu b/cpp/src/community/k_truss_mg.cu index 8da00f35275..048e3c34198 100644 --- a/cpp/src/community/k_truss_mg.cu +++ b/cpp/src/community/k_truss_mg.cu @@ -74,5 +74,4 @@ k_truss(raft::handle_t const& handle, int64_t k, bool do_expensive_check); - } // namespace cugraph diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index b03f7a7452e..51c12423b77 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -108,17 +108,16 @@ class Tests_MGEdgeTriangleCount auto d_mg_cugraph_results = cugraph::edge_triangle_count(*handle_, mg_graph_view); */ - + auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = cugraph::k_truss( *handle_, mg_graph_view, - //edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, - std::nullopt, // FIXME: test weights - //k_truss_usecase.k_, + // edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, + std::nullopt, // FIXME: test weights + // k_truss_usecase.k_, 4, false); - if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -129,7 +128,7 @@ class Tests_MGEdgeTriangleCount // 3. Compare SG & MG results - #if 0 +#if 0 if (edge_triangle_count_usecase.check_correctness_) { // 3-1. Convert to SG graph @@ -193,7 +192,7 @@ class Tests_MGEdgeTriangleCount h_sg_edge_triangle_counts.begin())); } } - #endif +#endif } private: @@ -204,7 +203,7 @@ template std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; -//using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; +// using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) { @@ -240,12 +239,12 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // enable correctness checks ::testing::Values(EdgeTriangleCount_Usecase{false, false} - //EdgeTriangleCount_Usecase{true, true} + // EdgeTriangleCount_Usecase{true, true} ), - ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") - //cugraph::test::File_Usecase("test/datasets/dolphins.mtx") - ))); - + ::testing::Values( + cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") + // cugraph::test::File_Usecase("test/datasets/dolphins.mtx") + ))); #if 0 INSTANTIATE_TEST_SUITE_P( diff --git a/cpp/tests/community/mg_triangle_count_test.cpp b/cpp/tests/community/mg_triangle_count_test.cpp index 297087f875f..932ff5050f1 100644 --- a/cpp/tests/community/mg_triangle_count_test.cpp +++ b/cpp/tests/community/mg_triangle_count_test.cpp @@ -295,4 +295,4 @@ INSTANTIATE_TEST_SUITE_P( TriangleCount_Usecase{1.0, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); -CUGRAPH_MG_TEST_PROGRAM_MAIN() \ No newline at end of file +CUGRAPH_MG_TEST_PROGRAM_MAIN() From 910075ebfd3f8141922816cf9b6f07a64f45afbb Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:17:33 -0700 Subject: [PATCH 46/93] undo changes to k-truss tests --- cpp/tests/community/k_truss_test.cpp | 37 ++++++++++++++-------------- 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index ac8ab6de5d3..c8010422e42 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -261,22 +261,14 @@ class Tests_KTruss : public ::testing::TestWithParam; -//using Tests_KTruss_Rmat = Tests_KTruss; +using Tests_KTruss_Rmat = Tests_KTruss; TEST_P(Tests_KTruss_File, CheckInt32Int32Float) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam())); } -/* -TEST_P(Tests_KTruss_Rmat, CheckInt32Int32Float) -{ - run_current_test( - override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); -} -*/ -/* TEST_P(Tests_KTruss_File, CheckInt64Int64Float) { run_current_test( @@ -294,19 +286,29 @@ TEST_P(Tests_KTruss_Rmat, CheckInt64Int64Float) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -*/ INSTANTIATE_TEST_SUITE_P( simple_test, Tests_KTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{4, false, true}), - ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx")))); - - + ::testing::Values(KTruss_Usecase{5, true, false}, + KTruss_Usecase{4, true, false}, + KTruss_Usecase{9, true, true}, + KTruss_Usecase{7, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/netscience.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_KTruss_Rmat, + // enable correctness checks + ::testing::Combine(::testing::Values(KTruss_Usecase{5, false, true}, + KTruss_Usecase{4, false, true}, + KTruss_Usecase{9, true, true}, + KTruss_Usecase{7, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); -#if 0 INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with --gtest_filter to select only the rmat_benchmark_test with a specific @@ -317,8 +319,7 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(KTruss_Usecase{4, false, true}, KTruss_Usecase{5, false, true}), - ::testing::Values(cugraph::test::Rmat_Usecase(12, 6, 0.57, 0.19, 0.19, 0, true, false)))); -#endif + ::testing::Values(KTruss_Usecase{12, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(14, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() From 31162ecce3e778606cd4dd17c7a913bdfb9be4b9 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:19:58 -0700 Subject: [PATCH 47/93] undo changes --- cpp/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1b2faa57b43..feb8518420c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -818,7 +818,7 @@ ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) ConfigureCTest(CAPI_COUNT_MULTI_EDGES c_api/count_multi_edges_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) -#ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) +ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) if (BUILD_CUGRAPH_MTMG_TESTS) ################################################################################################### From 566eb2277aad5230bfc463ea34a9968478f53dfa Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:31:01 -0700 Subject: [PATCH 48/93] fix style --- cpp/src/community/k_truss_impl.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index d9493628e81..268a89d680d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -963,9 +963,9 @@ k_truss(raft::handle_t const& handle, edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), extract_weak_edges{k}); - + auto done = 1 if constexpr (multi_gpu) { - if (num_invalid_edges == 0) { done = 0; } + if (weak_edgelist_srcs.size() == 0) { done = 0; } done = host_scalar_allreduce( handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); From 1996afafc68eaac9dd2e44ef87366c8ca2308589 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 29 May 2024 16:31:55 -0700 Subject: [PATCH 49/93] fix style --- cpp/src/community/k_truss_impl.cuh | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 268a89d680d..d092aaa4b77 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -963,15 +963,16 @@ k_truss(raft::handle_t const& handle, edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), extract_weak_edges{k}); - auto done = 1 - if constexpr (multi_gpu) { + auto done = 1 if constexpr (multi_gpu) + { if (weak_edgelist_srcs.size() == 0) { done = 0; } done = host_scalar_allreduce( handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); if (done == 0) { break; } - - } else if (weak_edgelist_srcs.size() == 0) { + } + else if (weak_edgelist_srcs.size() == 0) + { break; } From 287cd160da7207c1c156e601b374688b4bc2ef71 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 4 Jun 2024 11:30:07 -0700 Subject: [PATCH 50/93] rename variable --- cpp/src/community/k_truss_impl.cuh | 1555 ++++++++++++---------------- 1 file changed, 691 insertions(+), 864 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index d092aaa4b77..5bfe6caa33c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -15,16 +15,15 @@ */ #pragma once +#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" -#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" #include "prims/update_edge_src_dst_property.cuh" #include -#include #include #include #include @@ -43,11 +42,7 @@ namespace cugraph { -template +template edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, EdgeIterator potential_closing_or_incoming_edges, @@ -69,15 +64,15 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, invalid_last = thrust::make_zip_iterator(invalid_edgelist_srcs.end(), invalid_edgelist_dsts.end())] __device__(auto e) { auto potential_edge = thrust::get<0>(e); - auto potential_or_incoming_edge = - thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); - if constexpr (is_p_q_edge) { - potential_or_incoming_edge = - thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); + if constexpr (is_q_r_edge) { + potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); }; - - auto itr = - thrust::lower_bound(thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); + + //auto transposed_potential_or_incoming_edge = + // thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + auto itr = thrust::lower_bound( + thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); return (itr != invalid_last && *itr == potential_or_incoming_edge); }); @@ -94,9 +89,9 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { + //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); return count < k - 2 - ? thrust::optional>{thrust::make_tuple( - src, dst, count)} + ? thrust::optional>{thrust::make_tuple(src, dst, count)} : thrust::nullopt; } }; @@ -104,33 +99,46 @@ struct extract_weak_edges { template struct extract_edges { __device__ thrust::optional> operator()( - + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { + //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); return thrust::make_tuple(src, dst, count); } }; template struct extract_edges_to_q_r { + raft::device_span vertex_q_r{}; __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - // FIXME: Replace by lowerbound after validation - auto itr_src = thrust::find(thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); + //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); + + auto itr_src = thrust::find( + thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); - // FIXME: Replace by lowerbound after validation - auto itr_dst = thrust::find(thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); + auto itr_dst = thrust::find( + thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); + // (itr != vertex_q_r.end() && ((*itr_dst == dst) || (*itr_dst == dst)) + + //bool select_edge = false; if (itr_src != vertex_q_r.end() && *itr_src == src) { + //select_edge = true; return thrust::optional>{thrust::make_tuple(src, dst)}; } else if (itr_dst != vertex_q_r.end() && *itr_dst == dst) { return thrust::optional>{thrust::make_tuple(src, dst)}; } else { return thrust::nullopt; } + /* + return (itr != vertex_q_r.end() && *itr == dst) + ? thrust::optional>{thrust::make_tuple(src, dst)} + : thrust::nullopt; + */ } }; @@ -215,6 +223,7 @@ struct extract_q_idx { thrust::nullopt_t, thrust::nullopt_t) const { + //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); } }; @@ -230,8 +239,11 @@ struct extract_q_idx_closing { thrust::nullopt_t, thrust::nullopt_t) const { + //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); edge_t idx = thrust::get<1>(tagged_src); - if (dst == weak_edgelist_dsts[idx]) {} + if (dst == weak_edgelist_dsts[idx]){ + //printf("\nsrc = %d --- dst = %d, tag = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); + } return dst == weak_edgelist_dsts[idx] ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) : thrust::nullopt; @@ -251,7 +263,7 @@ struct generate_p_q { auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - + return thrust::make_tuple(invalid_srcs[chunk_start + idx], invalid_dsts[chunk_start + idx]); } }; @@ -275,53 +287,60 @@ struct generate_p_q_q_r { __device__ thrust::tuple operator()(edge_t i) const { + if constexpr (generate_p_q) { - return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), - q_closing[i]); + return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), q_closing[i]); } else { - return thrust::make_tuple(q_closing[i], - thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); + return thrust::make_tuple(q_closing[i], thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); } } }; +// FIXME: remove 'EdgeIterator' template template -void update_count( - raft::handle_t const& handle, - graph_view_t& cur_graph_view, - edge_property_t, edge_t>& - e_property_triangle_count, - edge_property_t, bool> const& tmp_edge_mask, - raft::device_span vertex_pair_buffer_src, - raft::device_span vertex_pair_buffer_dst) -{ - // FIXME: Only for debugging so remove after - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - - auto vertex_pair_buffer_begin = - thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); - +void update_count(raft::handle_t const& handle, + graph_view_t & cur_graph_view, + //thrust::optional(q_r_graph) + edge_property_t, edge_t> & e_property_triangle_count, + edge_property_t, bool> const & tmp_edge_mask, + raft::device_span vertex_pair_buffer_src, + raft::device_span vertex_pair_buffer_dst + //EdgeIterator vertex_pair_buffer, + //vertex_t buffer_size + ) { + + /* + cugraph::edge_bucket_t edges_to_decrement_count(handle); + edges_to_decrement_count.insert(vertex_pair_buffer_src.begin(), + vertex_pair_buffer_src.end(), + vertex_pair_buffer_dst.begin()); + */ + + // Before updating the count, we need to clear the mask + // cur_graph_view.clear_edge_mask(); + auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); + thrust::sort(handle.get_thrust_policy(), vertex_pair_buffer_begin, vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); - - auto unique_pair_count = - thrust::unique_count(handle.get_thrust_policy(), - vertex_pair_buffer_begin, - vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); - + + auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), + vertex_pair_buffer_begin, + vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); rmm::device_uvector decrease_count_tmp(vertex_pair_buffer_src.size(), handle.get_stream()); - - thrust::fill( - handle.get_thrust_policy(), decrease_count_tmp.begin(), decrease_count_tmp.end(), size_t{1}); - + + thrust::fill(handle.get_thrust_policy(), + decrease_count_tmp.begin(), + decrease_count_tmp.end(), + size_t{1}); + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - + unique_pair_count, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), vertex_pair_buffer_begin, vertex_pair_buffer_begin + vertex_pair_buffer_src.size(), @@ -329,9 +348,25 @@ void update_count( get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); + /* + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(unique_pair_count), + [vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique)] __device__(auto i) { + + auto src = thrust::get<0>(*(vertex_pair_buffer_begin + i)); + auto dst = thrust::get<1>(*(vertex_pair_buffer_begin + i)); + if ((src == 394) && (dst = 4)) { + printf("\nfound edge 394 -> 4\n"); + } + + }); + */ + cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).begin()); @@ -342,349 +377,204 @@ void update_count( cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), - [vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), - decrease_count = decrease_count.data()] __device__(auto src, - auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - edge_t count) { + [ + vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count = decrease_count.data() + ] + __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { + auto e = thrust::make_tuple(src, dst); - auto itr_pair = - thrust::lower_bound(thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); - + auto itr_pair = thrust::lower_bound( + thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); + + // FIXME: This check shouldn't be necessary if ((itr_pair != vertex_pair_buffer_end) && (*itr_pair == e)) { auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); + //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); + //cuda::atomic_ref atomic_counter(count); + //auto r = atomic_counter.fetch_sub(edge_t{decrease_count[idx_pair]}, cuda::std::memory_order_relaxed); + //if ((src == 394) && (dst == 4)) { + // auto new_count = count - decrease_count[idx_pair]; + //printf("\nold count for edge 394 -> 4 = %d and new count = %d\n", count, new_count); + //} return count - decrease_count[idx_pair]; - } - + //return count - 1; + //return count; + } + return count; + }, e_property_triangle_count.mutable_view(), - true); + true); // FIXME: set expensive check to False + //cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + }; -template -void find_unroll_p_q_q_r_edges( - raft::handle_t const& handle, - graph_view_t& cur_graph_view, - optional_graph_view_t const& graph_q_r, - edge_property_t, edge_t>& - e_property_triangle_count, - edge_property_t, bool>& tmp_edge_mask, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - std::optional> renumber_map, - bool do_expensive_check) -{ + + +// FIXME: Update return to void +template +vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, + graph_view_t & cur_graph_view, + //thrust::optional(q_r_graph) + thrust::optional> const & graph_q_r, + edge_property_t, edge_t> & e_property_triangle_count, + edge_property_t, bool> & tmp_edge_mask, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + bool do_expensive_check) { + size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - - auto invalid_edge_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - + + auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + auto graph_view = graph_q_r ? *graph_q_r : cur_graph_view; + for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - rmm::device_uvector intersection_offsets(0, handle.get_stream()); - rmm::device_uvector intersection_indices(0, handle.get_stream()); + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - if constexpr (is_p_q_edge) { - std::tie(intersection_offsets, intersection_indices) = - detail::nbr_intersection(handle, - cur_graph_view, - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - // do_expensive_check : FIXME - true); - } else { - std::tie(intersection_offsets, intersection_indices) = + auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, - (*graph_q_r).view(), - cugraph::edge_dummy_property_t{}.view(), - invalid_edge_first + prev_chunk_size, - invalid_edge_first + prev_chunk_size + chunk_size, - std::array{true, true}, - // do_expensive_check : FIXME - true); - } - - // Generate (p, q) edges - // FIXME: Should this array be reduced? an edge can have an intersection size > 1 - auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>( - intersection_indices.size(), handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - if constexpr (!is_p_q_edge) { - if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); // FIXME: for debugging - // Get global weak_edgelist - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_srcs)); - - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts)); - - weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()); - weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size()); - - // Sort the weak edges if they are not already - auto invalid_edgelist = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - } - } - - if constexpr (is_p_q_edge) { - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - intersection_indices.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - resize_dataframe_buffer( - vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - - // resize initial (q, r) edges - resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); - // Reconstruct (q, r) edges that didn't already have their count updated + graph_view, + cugraph::edge_dummy_property_t{}.view(), + weak_edgelist_first + prev_chunk_size, + weak_edgelist_first + prev_chunk_size + chunk_size, + std::array{true, true}, + //do_expensive_check : FIXME + true); + + // Generate (p, q) edges + // FIXME: Should this array be reduced? an edge can have an intersection size > 1 + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_q), get_dataframe_buffer_end(vertex_pair_buffer_p_q), - [vertex_pair_buffer_p_r_edge_p_q = - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q = - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), - thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); - }); - } - - // Shuffle edges - if constexpr (multi_gpu) { - if constexpr (is_q_r_edge) { - auto vertex_partition_range_lasts = std::make_optional>( - (*graph_q_r).view().vertex_partition_range_lasts()); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_p_q).data(), - std::get<0>(vertex_pair_buffer_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_p_q).data(), - std::get<1>(vertex_pair_buffer_p_q).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - } - - rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); - rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); - rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts + }); + + //raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + auto vertex_pair_buffer_p_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + if constexpr (! is_p_q_edge) { + auto num_edges_not_overcomp = + remove_overcompensating_edges( + handle, + intersection_indices.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) + ); + + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); + + // Reconstruct (q, r) edges that didn't already have their count updated + // resize initial (q, r) edges + resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + [ + vertex_pair_buffer_p_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + vertex_pair_buffer_q_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); + }); + /* + printf("\n***************after removing overcompensating edges***************\n"); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q", std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q", std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); - if constexpr (is_p_q_edge) { - std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - } else { - std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q", std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q", std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), std::cout); + */ } - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_p_q_srcs.data(), pair_p_q_srcs.size()), - raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size())); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_p_r_srcs.data(), pair_p_r_srcs.size()), - raft::device_span(pair_p_r_dsts.data(), pair_p_r_dsts.size())); - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), - raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size())); - } else { + // unroll (p, q) edges + // FIXME: remove 'EdgeIterator' template + //if constexpr (is_p_q_edge) { + update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), - std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), - std::get<1>(vertex_pair_buffer_p_q).size())); + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) + ); + + //} + /* + else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()) + ); + } + */ if constexpr (is_p_q_edge) { update_count( @@ -692,48 +582,93 @@ void find_unroll_p_q_q_r_edges( cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size())); + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) + ); } else { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size())); + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) + ); } - + if constexpr (is_p_q_edge) { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size())); + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); } else { update_count( handle, cur_graph_view, e_property_triangle_count, tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size())); + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); } + + // FIXME: Might be a problem when chunking becuase you will be setting the property of all edges to True + //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); + //if constexpr (is_p_q_edge) { + // FIXME: this temporary mask should also be for (q, r) edges but mask should be cleared before + // before unrolling so that weak edges found can have their count decremented as well + // Note: is this necessary to decrement the count of weak edges we already found. We know their count would + // be zero any ways. + + // FIXME: This might not work when chunking because the invalid (p. q) edges should be + // temporarily masked at the end when completly unrolling (p, q) edges. Failing to do + // this might cause some invalid edges (p, q) to not have their count decremented + + //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); + //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); + /* + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + cugraph::edge_bucket_t edges_to_tmp_mask(handle); + edges_to_tmp_mask.clear(); // Continuously mask (p, q) edges as they are processed in chunks + edges_to_tmp_mask.insert(std::get<0>(vertex_pair_buffer_p_q).begin(), + std::get<0>(vertex_pair_buffer_p_q).end(), + std::get<1>(vertex_pair_buffer_p_q).begin()); + + cugraph::transform_e( + handle, + cur_graph_view, + edges_to_tmp_mask, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { + return false; + }, + tmp_edge_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + */ + + + + auto [srcs, dsts, count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; } - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } + return 0; } } // namespace @@ -800,6 +735,7 @@ k_truss(raft::handle_t const& handle, } // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + //#if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -845,7 +781,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); + false); modified_graph_view = (*modified_graph).view(); @@ -858,6 +794,7 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } + //#endif // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -922,7 +859,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - true); + false); modified_graph_view = (*modified_graph).view(); if (renumber_map) { // collapse renumber_map @@ -940,451 +877,303 @@ k_truss(raft::handle_t const& handle, { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - - auto e_property_triangle_count = - edge_triangle_count(handle, cur_graph_view); - - cugraph::edge_property_t, bool> tmp_edge_mask( - handle, cur_graph_view); + /* + Design + 1) create a new graph with with the edge property from which we will iterate + a) Directly update the property of the edges + a) How do you traverse the graph? + */ + + // FIXME: This mask should be intialized in the while loop I think? + auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); + //cugraph::edge_property_t tmp_edge_mask(handle, cur_graph_view); + //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); + //cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); + + // extract the edges that have counts less than k - 2. THose edges will be unrolled + auto iteration = -1; while (true) { - // FIXME: Keep it at 1 iteration for debugging - - // extract the edges that have counts less than k - 2. Those edges will be unrolled - // FIXME: extracting 'triangle_count' is not required here. - auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_weak_edges{k}); - auto done = 1 if constexpr (multi_gpu) - { - if (weak_edgelist_srcs.size() == 0) { done = 0; } - done = host_scalar_allreduce( - handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); - - if (done == 0) { break; } - } - else if (weak_edgelist_srcs.size() == 0) - { - break; - } - - if (num_invalid_edges == 0 && done == 0) { break; } - // FIXME: Add a flag checking wether the other ranks have completed their task or - // not before exiting. - auto invalid_edge_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - - edge_property_t modified_triangle_count(handle, - cur_graph_view); - - std::optional> dummy_graph{std::nullopt}; - find_unroll_p_q_q_r_edges( + // FIXME: No need to extract the count of invalid edges because we don't use them ************ + iteration += 1; + printf("\n********************************iteration = %d********************************\n", iteration); + auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_weak_edges{k}); + + + //if (iteration == 8) { break; } + if (weak_edgelist_srcs.size() == 0) { break; } + auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + // FIXME: No need to extract the count of invalid edges because we don't use them ************ + thrust::sort_by_key(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_srcs.size(), + triangle_count.begin()); + + //raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); + //raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); + //raft::print_device_vector("n_tr", triangle_count.data(), triangle_count.size(), std::cout); + + // Call nbr_intersection unroll (p, q) edges + size_t edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + + size_t prev_chunk_size = 0; + size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); + + auto num_chunks = + raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + + edge_property_t modified_triangle_count(handle, cur_graph_view); + + // find intersection edges + + + find_unroll_p_q_q_r_edges( handle, cur_graph_view, - dummy_graph, + thrust::nullopt, e_property_triangle_count, tmp_edge_mask, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - std::nullopt, - do_expensive_check); + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + do_expensive_check + //weak_edgelist_first, + //weak_edgelist_srcs.size() + ); + auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_edges{}); + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + // Iterate over unique vertices that appear as either q or r + printf("\nweak_edgelist size = %d\n", weak_edgelist_srcs.size()); + + rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); + + // Get unique srcs and dsts + thrust::copy(handle.get_thrust_policy(), + weak_edgelist_srcs.begin(), + weak_edgelist_srcs.end(), + unique_weak_edgelist_srcs.begin() + ); + + thrust::copy(handle.get_thrust_policy(), + weak_edgelist_dsts.begin(), + weak_edgelist_dsts.end(), + unique_weak_edgelist_dsts.begin() + ); + + thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end()); + + thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_dsts.begin(), unique_weak_edgelist_dsts.end()); + + auto unique_srcs_end = thrust::unique( + handle.get_thrust_policy(), + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs.end()); + + auto unique_dsts_end = thrust::unique( + handle.get_thrust_policy(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end()); + + auto num_unique_weak_edgelist_srcs = thrust::distance(unique_weak_edgelist_srcs.begin(), unique_srcs_end); + auto num_unique_weak_edgelist_dsts = thrust::distance(unique_weak_edgelist_dsts.begin(), unique_dsts_end); + unique_weak_edgelist_srcs.resize(num_unique_weak_edgelist_srcs, handle.get_stream()); + unique_weak_edgelist_dsts.resize(num_unique_weak_edgelist_dsts, handle.get_stream()); + + //rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); + //rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); + + rmm::device_uvector vertex_q_r(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); + - // FIXME: memory footprint overhead - rmm::device_uvector vertex_q_r(weak_edgelist_srcs.size() * 2, handle.get_stream()); - // Iterate over unique vertices that appear as either q or r - // FIXME: Reduce 'weak_edgelist_srcs' and 'weak_edgelist_srcs' before calling 'set_union' thrust::set_union(handle.get_thrust_policy(), - weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - vertex_q_r.begin()); + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs.end(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end(), + vertex_q_r.begin()); thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - auto invalid_unique_v_end = - thrust::unique(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); + auto invalid_unique_v_end = thrust::unique( + handle.get_thrust_policy(), + vertex_q_r.begin(), + vertex_q_r.end()); + + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), - handle.get_stream()); + auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - auto invalid_edgelist = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - - auto [srcs_to_q_r, dsts_to_q_r] = - extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span( - vertex_q_r.data(), vertex_q_r.size())}); - - rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); - - if constexpr (multi_gpu) { - std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), - std::nullopt, - std::nullopt, - std::nullopt); - } + auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + cugraph::edge_dummy_property_t{}.view(), + //e_property_triangle_count.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); std::optional> graph_q_r{std::nullopt}; - std::optional> renumber_map_q_r{std::nullopt}; - std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); - - if constexpr (multi_gpu) { - cp_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); - cp_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin())); - - rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; - rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; - - std::tie( - cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(cp_weak_edgelist_srcs), - std::move(cp_weak_edgelist_dsts), - std::nullopt, - std::nullopt, - std::nullopt); - - renumber_ext_vertices( - handle, - cp_weak_edgelist_srcs.data(), - cp_weak_edgelist_srcs.size(), - (*renumber_map_q_r).data(), - (*graph_q_r).view().local_vertex_partition_range_first(), - (*graph_q_r).view().local_vertex_partition_range_last(), - true); - - renumber_ext_vertices( - handle, - cp_weak_edgelist_dsts.data(), - cp_weak_edgelist_dsts.size(), - (*renumber_map_q_r).data(), - (*graph_q_r).view().local_vertex_partition_range_first(), - (*graph_q_r).view().local_vertex_partition_range_last(), - true); - } - - invalid_edge_first = - thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + cp_weak_edgelist_srcs.size()); - - find_unroll_p_q_q_r_edges( + std::optional> renumber_map_q_r{std::nullopt}; + std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, graph_view.is_multigraph()}, + false); + + find_unroll_p_q_q_r_edges( handle, cur_graph_view, - graph_q_r, + thrust::make_optional((*graph_q_r).view()), e_property_triangle_count, tmp_edge_mask, - raft::device_span(cp_weak_edgelist_srcs.data(), cp_weak_edgelist_srcs.size()), - raft::device_span(cp_weak_edgelist_dsts.data(), cp_weak_edgelist_dsts.size()), - std::move(renumber_map_q_r), - do_expensive_check); + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + do_expensive_check + //weak_edgelist_first, + //weak_edgelist_srcs.size() + ); + + auto [srcs__, dsts__] = extract_transform_e(handle, + (*graph_q_r).view(), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + cugraph::edge_dummy_property_t{}.view(), + extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); // Unrolling p, r edges // create pair invalid_src, invalid_edge_idx // create a dataframe buffer of size invalid_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs - // with a vector counting from 0 .. - auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>( - weak_edgelist_srcs.size(), handle.get_stream()); - - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [p = weak_edgelist_srcs.begin()] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); - + // with a vector counting from 0 .. + auto vertex_pair_buffer_p_tag = + allocate_dataframe_buffer>(weak_edgelist_srcs.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [ + p = weak_edgelist_srcs.begin() + ] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + vertex_frontier_t vertex_frontier(handle, 1); vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), - std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), - std::get<1>(vertex_pair_buffer_p_tag).end())); - - auto [q, idx] = cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx{}, - do_expensive_check); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); + auto [q, idx] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx{}, + do_expensive_check); + vertex_frontier.bucket(0).clear(); - vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end())); - - // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid - // overcompensating - auto [q_closing, idx_closing] = cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, - do_expensive_check); + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(q.begin(), idx.begin()), + thrust::make_zip_iterator(q.end(), idx.end())); + // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating + auto [q_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + do_expensive_check); + // extract pair (p, r) - auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>( - q_closing.size(), handle.get_stream()); + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); // construct pair (p, q) // construct pair (q, r) - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - invalid_edgelist, - raft::device_span(idx_closing.data(), idx_closing.size())}); - + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + invalid_edgelist, + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + // construct pair (p, q) auto vertex_pair_buffer_p_q_for_p_r = allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), q_closing.size()), - raft::device_span(idx_closing.data(), idx_closing.size())}); + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); // construct pair (q, r) auto vertex_pair_buffer_q_r_for_p_r = allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, - raft::device_span(q_closing.data(), q_closing.size()), - raft::device_span(idx_closing.data(), idx_closing.size())}); - - // FIXME: Avoid duplicated code in SG and MG when updating the counts - if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - // Get global weak_edgelist - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - // Sort the weak edges if they are not already - auto invalid_edgelist = thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), - global_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - invalid_edge_first, - invalid_edge_first + weak_edgelist_srcs.size()); - - auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), - false, - false>(handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size())); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), - false, - false>(handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size())); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - rmm::device_uvector pair_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_q_dsts(0, handle.get_stream()); - rmm::device_uvector pair_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_p_r_dsts(0, handle.get_stream()); - rmm::device_uvector pair_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector pair_q_r_dsts(0, handle.get_stream()); - - std::tie(pair_p_q_srcs, pair_p_q_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), - std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - std::tie(pair_q_r_srcs, pair_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), - std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_p_q_srcs.data(), pair_p_q_srcs.size()), - raft::device_span(pair_p_q_dsts.data(), pair_p_q_dsts.size())); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(pair_q_r_srcs.data(), pair_q_r_srcs.size()), - raft::device_span(pair_q_r_dsts.data(), pair_q_r_dsts.size())); - - auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>( - pair_q_r_srcs.size(), handle.get_stream()); - - std::tie(pair_p_r_srcs, pair_p_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r)), - std::move(std::get<1>(vertex_pair_buffer_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - // Reconstruct (p, r) edges that didn't already have their count updated - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [vertex_pair_buffer_p_q_for_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), - thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), + generate_p_q_q_r{ + invalid_edgelist, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) }); + - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), - std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), - std::get<1>(vertex_pair_buffer_p_r).size())); - - } else { - auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), - false, - false>( + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( handle, q_closing.size(), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), @@ -1392,106 +1181,105 @@ k_truss(raft::handle_t const& handle, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - resize_dataframe_buffer( - vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r)), - false, - false>( + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( handle, num_edges_not_overcomp_p_q, get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + // Reconstruct (p, r) edges that didn't already have their count updated - resize_dataframe_buffer( - vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_r), get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [vertex_pair_buffer_p_q_for_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), - thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); + [ + vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), + vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); }); - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), - std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), - std::get<1>(vertex_pair_buffer_p_r).size())); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), - std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), - std::get<1>(vertex_pair_buffer_p_q_for_p_r).size())); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), - std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), - std::get<1>(vertex_pair_buffer_q_r_for_p_r).size())); - } - - // Mask all the edges that have 0 count - cugraph::transform_e( + update_count( handle, cur_graph_view, - // is it more efficient to extract edges with 0 count first? - // edges_with_no_triangle, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count != 0; - }, - edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(edge_mask.view()); + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) + ); + + + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + tmp_edge_mask, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) + ); - if (edge_weight_view) { - auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e( + // Mask all the edges that have 0 count + // cur_graph_view.clear_edge_mask(); //FIXME: Make sure the mask is cleared here + + cugraph::transform_e( handle, cur_graph_view, + // is it more efficient to extract edges with 0 count first? + //edges_with_no_triangle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - // view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - extract_edges{}); + [] __device__( + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + // printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); + //if (count == 0) + return count != 0; + }, + edge_mask.mutable_view(), + false); + + cur_graph_view.attach_edge_mask(edge_mask.view()); + /* + if (edge_weight_view) { + auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); cugraph::edge_bucket_t edges_with_triangle(handle); // FIXME: Does 'extract_transform_e' yield sorted edges? - edges_with_triangle.insert( - edgelist_srcs.begin(), edgelist_srcs.end(), edgelist_dsts.begin()); + edges_with_triangle.insert(edgelist_srcs.begin(), + edgelist_srcs.end(), + edgelist_dsts.begin()); + cugraph::transform_e( handle, cur_graph_view, @@ -1503,10 +1291,37 @@ k_truss(raft::handle_t const& handle, return true; }, edge_mask.mutable_view(), - true); // FIXME: remove expensive check + true); // FIXME: remove expensive check - cur_graph_view.attach_edge_mask(edge_mask.view()); + cur_graph_view.attach_edge_mask(edge_mask.view()); } + */ + + /* + printf("\n*****************unrolling p, r edges*************\n"); + raft::print_device_vector("vertex_pair_buffer_p_r", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_r", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); + + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); + + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); + */ + auto [srcs_f, dsts_f, count_f] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + auto check_edgelist = thrust::make_zip_iterator(srcs_f.begin(), dsts_f.begin()); + + thrust::sort_by_key(handle.get_thrust_policy(), + check_edgelist, + check_edgelist + srcs_f.size(), + count_f.begin()); } rmm::device_uvector edgelist_srcs(0, handle.get_stream()); @@ -1531,6 +1346,18 @@ k_truss(raft::handle_t const& handle, return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); + /* + rmm::device_uvector weak_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector weak_edgelist_dsts(0, handle.get_stream()); + std::optional> edgelist_wgts{std::nullopt}; + return std::make_tuple( + std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::move(edgelist_wgts)); + */ + + + + } + } } // namespace cugraph From df1fb0ebdf445f8c484f3c0019f3dd199649bdab Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 7 Jun 2024 01:01:10 -0700 Subject: [PATCH 51/93] restructure k-truss implementation --- cpp/src/community/k_truss_impl.cuh | 958 ++++++++++++++--------------- 1 file changed, 464 insertions(+), 494 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 5bfe6caa33c..e93d7ca713d 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -24,6 +24,7 @@ #include "prims/update_edge_src_dst_property.cuh" #include +#include #include #include #include @@ -68,9 +69,7 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, if constexpr (is_q_r_edge) { potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); }; - - //auto transposed_potential_or_incoming_edge = - // thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + auto itr = thrust::lower_bound( thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); return (itr != invalid_last && *itr == potential_or_incoming_edge); @@ -89,7 +88,6 @@ struct extract_weak_edges { __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { - //printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); return count < k - 2 ? thrust::optional>{thrust::make_tuple(src, dst, count)} : thrust::nullopt; @@ -102,7 +100,6 @@ struct extract_edges { auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { - //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); return thrust::make_tuple(src, dst, count); } }; @@ -115,30 +112,19 @@ struct extract_edges_to_q_r { auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - //printf("\nchecking the count - src = %d, dst = %d, count = %d\n", src, dst, count); - auto itr_src = thrust::find( thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); auto itr_dst = thrust::find( thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); - // (itr != vertex_q_r.end() && ((*itr_dst == dst) || (*itr_dst == dst)) - - //bool select_edge = false; if (itr_src != vertex_q_r.end() && *itr_src == src) { - //select_edge = true; return thrust::optional>{thrust::make_tuple(src, dst)}; } else if (itr_dst != vertex_q_r.end() && *itr_dst == dst) { return thrust::optional>{thrust::make_tuple(src, dst)}; } else { return thrust::nullopt; } - /* - return (itr != vertex_q_r.end() && *itr == dst) - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; - */ } }; @@ -223,7 +209,6 @@ struct extract_q_idx { thrust::nullopt_t, thrust::nullopt_t) const { - //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); } }; @@ -239,11 +224,7 @@ struct extract_q_idx_closing { thrust::nullopt_t, thrust::nullopt_t) const { - //printf("\n dst = %d, tag = %d\n", dst, thrust::get<1>(tagged_src)); edge_t idx = thrust::get<1>(tagged_src); - if (dst == weak_edgelist_dsts[idx]){ - //printf("\nsrc = %d --- dst = %d, tag = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); - } return dst == weak_edgelist_dsts[idx] ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) : thrust::nullopt; @@ -296,26 +277,14 @@ struct generate_p_q_q_r { } }; -// FIXME: remove 'EdgeIterator' template template void update_count(raft::handle_t const& handle, graph_view_t & cur_graph_view, - //thrust::optional(q_r_graph) edge_property_t, edge_t> & e_property_triangle_count, - edge_property_t, bool> const & tmp_edge_mask, raft::device_span vertex_pair_buffer_src, raft::device_span vertex_pair_buffer_dst - //EdgeIterator vertex_pair_buffer, - //vertex_t buffer_size ) { - /* - cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(vertex_pair_buffer_src.begin(), - vertex_pair_buffer_src.end(), - vertex_pair_buffer_dst.begin()); - */ - // Before updating the count, we need to clear the mask // cur_graph_view.clear_edge_mask(); auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); @@ -348,28 +317,12 @@ void update_count(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - /* - thrust::for_each( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(unique_pair_count), - [vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique)] __device__(auto i) { - - auto src = thrust::get<0>(*(vertex_pair_buffer_begin + i)); - auto dst = thrust::get<1>(*(vertex_pair_buffer_begin + i)); - if ((src == 394) && (dst = 4)) { - printf("\nfound edge 394 -> 4\n"); - } - - }); - */ - cugraph::edge_bucket_t edges_to_decrement_count(handle); edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).begin()); - + cugraph::transform_e( handle, cur_graph_view, @@ -387,289 +340,92 @@ void update_count(raft::handle_t const& handle, auto e = thrust::make_tuple(src, dst); auto itr_pair = thrust::lower_bound( thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); - - // FIXME: This check shouldn't be necessary - if ((itr_pair != vertex_pair_buffer_end) && (*itr_pair == e)) { - auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); - //printf("\nupdating the count - src = %d, dst = %d, count = %d\n", src, dst, count); - //cuda::atomic_ref atomic_counter(count); - //auto r = atomic_counter.fetch_sub(edge_t{decrease_count[idx_pair]}, cuda::std::memory_order_relaxed); - //if ((src == 394) && (dst == 4)) { - // auto new_count = count - decrease_count[idx_pair]; - //printf("\nold count for edge 394 -> 4 = %d and new count = %d\n", count, new_count); - //} - return count - decrease_count[idx_pair]; - //return count - 1; - //return count; - } - - return count; + auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); + return count - decrease_count[idx_pair]; }, e_property_triangle_count.mutable_view(), true); // FIXME: set expensive check to False - //cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); }; - - -// FIXME: Update return to void -template -vertex_t find_unroll_p_q_q_r_edges(raft::handle_t const& handle, - graph_view_t & cur_graph_view, - //thrust::optional(q_r_graph) - thrust::optional> const & graph_q_r, - edge_property_t, edge_t> & e_property_triangle_count, - edge_property_t, bool> & tmp_edge_mask, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - bool do_expensive_check) { - - size_t prev_chunk_size = 0; - size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - auto num_chunks = - raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); +template +std::tuple +accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, + graph_view_t & graph_view, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + size_t prev_chunk_size, + size_t chunk_size, + bool do_expensive_check) { auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + + // Call nbr_intersection unroll (p, q) and (q, r) edges + auto [intersection_offsets, intersection_indices] = + detail::nbr_intersection(handle, + graph_view, + cugraph::edge_dummy_property_t{}.view(), + weak_edgelist_first + prev_chunk_size, + weak_edgelist_first + prev_chunk_size + chunk_size, + std::array{true, true}, + //do_expensive_check : FIXME + true); - auto graph_view = graph_q_r ? *graph_q_r : cur_graph_view; + // Generate (p, q) edges + auto vertex_pair_buffer_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_q), + generate_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts + }); - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - graph_view, - cugraph::edge_dummy_property_t{}.view(), - weak_edgelist_first + prev_chunk_size, - weak_edgelist_first + prev_chunk_size + chunk_size, - std::array{true, true}, - //do_expensive_check : FIXME - true); - - // Generate (p, q) edges - // FIXME: Should this array be reduced? an edge can have an intersection size > 1 - auto vertex_pair_buffer_p_q = + auto vertex_pair_buffer_p_r_edge_p_q = allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts - }); - - //raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - if constexpr (! is_p_q_edge) { - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - intersection_indices.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()) - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_q, num_edges_not_overcomp, handle.get_stream()); - - // Reconstruct (q, r) edges that didn't already have their count updated - // resize initial (q, r) edges - resize_dataframe_buffer(vertex_pair_buffer_p_q, num_edges_not_overcomp, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - [ - vertex_pair_buffer_p_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - vertex_pair_buffer_q_r_edge_p_q = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_r_edge_p_q[i]), thrust::get<0>(vertex_pair_buffer_q_r_edge_p_q[i])); - }); - /* - printf("\n***************after removing overcompensating edges***************\n"); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - - raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q", std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q", std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); - - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q", std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q", std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size(), std::cout); - */ - } - - - // unroll (p, q) edges - // FIXME: remove 'EdgeIterator' template - //if constexpr (is_p_q_edge) { - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) - ); - - //} - /* - else { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()) - ); - } - */ - - if constexpr (is_p_q_edge) { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) - ); - } else { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) - ); - } - - if constexpr (is_p_q_edge) { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); - } else { - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); - } - - // FIXME: Might be a problem when chunking becuase you will be setting the property of all edges to True - //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); - //if constexpr (is_p_q_edge) { - // FIXME: this temporary mask should also be for (q, r) edges but mask should be cleared before - // before unrolling so that weak edges found can have their count decremented as well - // Note: is this necessary to decrement the count of weak edges we already found. We know their count would - // be zero any ways. - - // FIXME: This might not work when chunking because the invalid (p. q) edges should be - // temporarily masked at the end when completly unrolling (p, q) edges. Failing to do - // this might cause some invalid edges (p, q) to not have their count decremented - - //cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); - //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); - /* - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - cugraph::edge_bucket_t edges_to_tmp_mask(handle); - edges_to_tmp_mask.clear(); // Continuously mask (p, q) edges as they are processed in chunks - edges_to_tmp_mask.insert(std::get<0>(vertex_pair_buffer_p_q).begin(), - std::get<0>(vertex_pair_buffer_p_q).end(), - std::get<1>(vertex_pair_buffer_p_q).begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edges_to_tmp_mask, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return false; - }, - tmp_edge_mask.mutable_view(), - false); - - cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); - */ - - - - auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); - prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; - } - - return 0; + auto vertex_pair_buffer_q_r_edge_p_q = + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), + generate_p_r_or_q_r_from_p_q{ + prev_chunk_size, + raft::device_span(intersection_offsets.data(), + intersection_offsets.size()), + raft::device_span(intersection_indices.data(), + intersection_indices.size()), + weak_edgelist_srcs, + weak_edgelist_dsts}); + + return std::make_tuple(std::move(vertex_pair_buffer_p_q), std::move(vertex_pair_buffer_p_r_edge_p_q), std::move(vertex_pair_buffer_q_r_edge_p_q)); } + } // namespace template @@ -735,7 +491,7 @@ k_truss(raft::handle_t const& handle, } // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - //#if 0 + #if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -794,7 +550,7 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } - //#endif + #endif // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -877,30 +633,24 @@ k_truss(raft::handle_t const& handle, { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; - /* - Design - 1) create a new graph with with the edge property from which we will iterate - a) Directly update the property of the edges - a) How do you traverse the graph? - */ - - // FIXME: This mask should be intialized in the while loop I think? - auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); - cugraph::edge_property_t, bool> tmp_edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); + + edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; + + auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - - //cugraph::edge_property_t tmp_edge_mask(handle, cur_graph_view); - //cugraph::fill_edge_property(handle, cur_graph_view, true, tmp_edge_mask); - //cur_graph_view.attach_edge_mask(tmp_edge_mask.view()); // extract the edges that have counts less than k - 2. THose edges will be unrolled auto iteration = -1; while (true) { // FIXME: No need to extract the count of invalid edges because we don't use them ************ iteration += 1; + if (iteration == 1) { + break; + } printf("\n********************************iteration = %d********************************\n", iteration); auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, cur_graph_view, @@ -908,50 +658,109 @@ k_truss(raft::handle_t const& handle, edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), extract_weak_edges{k}); - - //if (iteration == 8) { break; } - if (weak_edgelist_srcs.size() == 0) { break; } + //if (weak_edgelist_srcs.size() == 0) { break; } + auto done = 1; + if constexpr (multi_gpu) + { + if (weak_edgelist_srcs.size() == 0) { done = 0; } + done = host_scalar_allreduce( + handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); + + if (done == 0) { break; } + } + else if (weak_edgelist_srcs.size() == 0) + { + break; + } auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - // FIXME: No need to extract the count of invalid edges because we don't use them ************ - thrust::sort_by_key(handle.get_thrust_policy(), + thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, - weak_edgelist_first + weak_edgelist_srcs.size(), - triangle_count.begin()); + weak_edgelist_first + weak_edgelist_srcs.size()); - //raft::print_device_vector("srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); - //raft::print_device_vector("dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); - //raft::print_device_vector("n_tr", triangle_count.data(), triangle_count.size(), std::cout); - - // Call nbr_intersection unroll (p, q) edges - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - + // Find intersection edges size_t prev_chunk_size = 0; size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); - + size_t edges_to_intersect_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + edges_to_intersect_per_iteration = 100; auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - - edge_property_t modified_triangle_count(handle, cur_graph_view); - // find intersection edges + for (size_t i = 0; i < num_chunks; ++i) { + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto [vertex_pair_buffer_p_q, vertex_pair_buffer_p_r_edge_p_q, vertex_pair_buffer_q_r_edge_p_q] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( + handle, + cur_graph_view, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + prev_chunk_size, + chunk_size, + do_expensive_check); + + rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_dsts(0, handle.get_stream()); + // Shuffle edges + if constexpr (multi_gpu) { + // FIXME: Check whether we need to shuffle (p, q) edges + std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, vertex_pair_buffer_p_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(vertex_pair_buffer_q_r_edge_p_q_srcs, vertex_pair_buffer_q_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + } - - find_unroll_p_q_q_r_edges( - handle, - cur_graph_view, - thrust::nullopt, - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - do_expensive_check - //weak_edgelist_first, - //weak_edgelist_srcs.size() - ); + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), // FIXME: Make sure multi_gpu is properly handles + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) // FIXME: Make sure multi_gpu is properly handles + ); + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) + ); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + } + // FIXME: Remove this as it is only used for debugging auto [srcs, dsts, count] = extract_transform_e(handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), @@ -959,10 +768,8 @@ k_truss(raft::handle_t const& handle, //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), extract_edges{}); - - // Iterate over unique vertices that appear as either q or r - printf("\nweak_edgelist size = %d\n", weak_edgelist_srcs.size()); + // Iterate over unique vertices that appear as either q or r rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); @@ -997,14 +804,9 @@ k_truss(raft::handle_t const& handle, auto num_unique_weak_edgelist_dsts = thrust::distance(unique_weak_edgelist_dsts.begin(), unique_dsts_end); unique_weak_edgelist_srcs.resize(num_unique_weak_edgelist_srcs, handle.get_stream()); unique_weak_edgelist_dsts.resize(num_unique_weak_edgelist_dsts, handle.get_stream()); - - //rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); - //rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); rmm::device_uvector vertex_q_r(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); - - thrust::set_union(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end(), @@ -1027,46 +829,287 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), cugraph::edge_dummy_property_t{}.view(), - //e_property_triangle_count.view(), extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); - + + if constexpr (multi_gpu) { + std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = + detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt); + } + std::optional> graph_q_r{std::nullopt}; - std::optional> renumber_map_q_r{std::nullopt}; - std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = - create_graph_from_edgelist( + std::optional> renumber_map_q_r{std::nullopt}; + std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(dsts_to_q_r), + std::move(srcs_to_q_r), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{true, graph_view.is_multigraph()}, + false); + + auto csc_q_r_graph_view = (*graph_q_r).view(); + prev_chunk_size = 0; + chunk_num_invalid_edges = weak_edgelist_srcs.size(); + + num_chunks = + raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + + + rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); + size_t weak_edgelist_size = weak_edgelist_srcs.size(); + + if constexpr (multi_gpu) { + cp_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); + cp_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), + thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin())); + + rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; + rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; + + std::tie( + cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(cp_weak_edgelist_srcs), + std::move(cp_weak_edgelist_dsts), + std::nullopt, + std::nullopt, + std::nullopt); + + renumber_ext_vertices( + handle, + cp_weak_edgelist_srcs.data(), + cp_weak_edgelist_srcs.size(), + (*renumber_map_q_r).data(), + csc_q_r_graph_view.local_vertex_partition_range_first(), + csc_q_r_graph_view.local_vertex_partition_range_last(), + true); + + renumber_ext_vertices( + handle, + cp_weak_edgelist_dsts.data(), + cp_weak_edgelist_dsts.size(), + (*renumber_map_q_r).data(), + csc_q_r_graph_view.local_vertex_partition_range_first(), + csc_q_r_graph_view.local_vertex_partition_range_last(), + true); + + weak_edgelist_size = cp_weak_edgelist_srcs.size(); + weak_edgelist_first = + thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + cp_weak_edgelist_srcs.size()); + } + + for (size_t i = 0; i < num_chunks; ++i) { + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + // Find intersection of weak edges + + auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); + auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); + auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( + handle, + csc_q_r_graph_view, + raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), + raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size), + prev_chunk_size, + chunk_size, + do_expensive_check); + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); // FIXME: for debugging + // Get global weak_edgelist + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + /* + weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()); + weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size()); + */ + weak_edgelist_size = global_weak_edgelist_srcs.size(); + // Sort the weak edges if they are not already + weak_edgelist_first = + thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_srcs.size()); + } + + sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); + sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); + auto num_edges_not_overcomp = + remove_overcompensating_edges( handle, - std::nullopt, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), + size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), + raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), + raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size) + ); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + + // resize initial (q, r) edges + resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); + // Reconstruct (q, r) edges that didn't already have their count updated + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r), + [ + vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); + }); + + + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); + if constexpr (multi_gpu) { + // Unrenumber + auto vertex_partition_range_lasts = std::make_optional>( + csc_q_r_graph_view.vertex_partition_range_lasts()); + + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_q_r).data(), + std::get<0>(vertex_pair_buffer_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_q_r).data(), + std::get<1>(vertex_pair_buffer_q_r).size(), + (*renumber_map).data(), + *vertex_partition_range_lasts, + true); + + // Shuffle + std::tie(vertex_pair_buffer_p_q_edge_q_r_srcs, vertex_pair_buffer_p_q_edge_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_q_edge_q_r)), + std::move(std::get<0>(vertex_pair_buffer_p_q_edge_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + std::tie(vertex_pair_buffer_p_r_edge_q_r_srcs, vertex_pair_buffer_p_r_edge_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), std::nullopt, std::nullopt, std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); - - find_unroll_p_q_q_r_edges( - handle, - cur_graph_view, - thrust::make_optional((*graph_q_r).view()), - e_property_triangle_count, - tmp_edge_mask, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - do_expensive_check - //weak_edgelist_first, - //weak_edgelist_srcs.size() - ); - - auto [srcs__, dsts__] = extract_transform_e(handle, - (*graph_q_r).view(), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - cugraph::edge_dummy_property_t{}.view(), - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + cur_graph_view.vertex_partition_range_lasts()); + } + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size()) + ); + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size()) + ); + + prev_chunk_size += chunk_size; + chunk_num_invalid_edges -= chunk_size; + + } + #if 0 // Unrolling p, r edges // create pair invalid_src, invalid_edge_idx // create a dataframe buffer of size invalid_edge_size @@ -1124,8 +1167,6 @@ k_truss(raft::handle_t const& handle, auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>(q_closing.size(), handle.get_stream()); - // construct pair (p, q) - // construct pair (q, r) thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_r), @@ -1200,7 +1241,6 @@ k_truss(raft::handle_t const& handle, resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); // Reconstruct (p, r) edges that didn't already have their count updated - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), @@ -1217,18 +1257,14 @@ k_truss(raft::handle_t const& handle, handle, cur_graph_view, e_property_triangle_count, - tmp_edge_mask, raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) ); - - update_count( handle, cur_graph_view, e_property_triangle_count, - tmp_edge_mask, raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) ); @@ -1237,106 +1273,50 @@ k_truss(raft::handle_t const& handle, handle, cur_graph_view, e_property_triangle_count, - tmp_edge_mask, raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) ); // Mask all the edges that have 0 count - // cur_graph_view.clear_edge_mask(); //FIXME: Make sure the mask is cleared here - cugraph::transform_e( handle, cur_graph_view, - // is it more efficient to extract edges with 0 count first? - //edges_with_no_triangle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), [] __device__( auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - // printf("\nsrc = %d, dst = %d, count = %d\n", src, dst, count); - //if (count == 0) return count != 0; }, edge_mask.mutable_view(), false); cur_graph_view.attach_edge_mask(edge_mask.view()); - /* - if (edge_weight_view) { - auto [edgelist_srcs, edgelist_dsts, edgelist_count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - - cugraph::edge_bucket_t edges_with_triangle(handle); - // FIXME: Does 'extract_transform_e' yield sorted edges? - edges_with_triangle.insert(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edges_with_triangle, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - *edge_weight_view, - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto wgt) { - return true; - }, - edge_mask.mutable_view(), - true); // FIXME: remove expensive check - - cur_graph_view.attach_edge_mask(edge_mask.view()); - } - */ - - /* - printf("\n*****************unrolling p, r edges*************\n"); - raft::print_device_vector("vertex_pair_buffer_p_r", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_r", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); - - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r", std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size(), std::cout); - - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r", std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size(), std::cout); - */ - auto [srcs_f, dsts_f, count_f] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - - auto check_edgelist = thrust::make_zip_iterator(srcs_f.begin(), dsts_f.begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - check_edgelist, - check_edgelist + srcs_f.size(), - count_f.begin()); + auto [edgelist_srcs_, edgelist_dsts_, edgelist_wgts_, dummy_0, dummy_1] = + decompress_to_edgelist( + handle, + cur_graph_view, + edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + std::optional>(std::nullopt)); + #endif } rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; - + #if 0 std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, + edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, std::optional>(std::nullopt)); - + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), @@ -1346,17 +1326,7 @@ k_truss(raft::handle_t const& handle, return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); - /* - rmm::device_uvector weak_edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector weak_edgelist_dsts(0, handle.get_stream()); - std::optional> edgelist_wgts{std::nullopt}; - return std::make_tuple( - std::move(weak_edgelist_srcs), std::move(weak_edgelist_dsts), std::move(edgelist_wgts)); - */ - - - - + #endif } } From b86697a0cd86a967e8ee762ad611ae7f9fecf46b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 10 Jun 2024 09:23:53 -0700 Subject: [PATCH 52/93] unroll (q, r) edges for MG --- cpp/src/community/k_truss_impl.cuh | 299 ++++++++++++++++------------- 1 file changed, 168 insertions(+), 131 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index e93d7ca713d..cc8656ca1cb 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -85,11 +85,11 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, template struct extract_weak_edges { edge_t k{}; - __device__ thrust::optional> operator()( + __device__ thrust::optional> operator()( vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const { return count < k - 2 - ? thrust::optional>{thrust::make_tuple(src, dst, count)} + ? thrust::optional>{thrust::make_tuple(src, dst)} : thrust::nullopt; } }; @@ -362,16 +362,21 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // Call nbr_intersection unroll (p, q) and (q, r) edges + //printf("\nbefore nbr_intersection, weak_edgelist_srcs_size = %d, vs %d\n", weak_edgelist_srcs.size(), prev_chunk_size + chunk_size); + //raft::print_device_vector("sorted_weak_edgelist_srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); + //raft::print_device_vector("sorted_weak_edgelist_dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, cugraph::edge_dummy_property_t{}.view(), weak_edgelist_first + prev_chunk_size, weak_edgelist_first + prev_chunk_size + chunk_size, + //weak_edgelist_first, + //weak_edgelist_first + weak_edgelist_srcs.size(), std::array{true, true}, //do_expensive_check : FIXME true); - + //printf("\nafter nbr_intersection\n"); // Generate (p, q) edges auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>(intersection_indices.size(), @@ -491,7 +496,7 @@ k_truss(raft::handle_t const& handle, } // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - #if 0 + //#if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -537,7 +542,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); @@ -550,7 +555,7 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } - #endif + //#endif // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -615,7 +620,7 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{false /* now asymmetric */, cur_graph_view.is_multigraph()}, - false); + true); modified_graph_view = (*modified_graph).view(); if (renumber_map) { // collapse renumber_map @@ -642,24 +647,21 @@ k_truss(raft::handle_t const& handle, cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - - // extract the edges that have counts less than k - 2. THose edges will be unrolled + auto iteration = -1; while (true) { - // FIXME: No need to extract the count of invalid edges because we don't use them ************ iteration += 1; if (iteration == 1) { - break; + break; // FIXME: Only for debugging purposes } - printf("\n********************************iteration = %d********************************\n", iteration); - auto [weak_edgelist_srcs, weak_edgelist_dsts, triangle_count] = extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), - extract_weak_edges{k}); - //if (iteration == 8) { break; } - //if (weak_edgelist_srcs.size() == 0) { break; } + // extract the edges that have counts less than k - 2. THose edges will be unrolled + + auto [weak_edgelist_srcs, weak_edgelist_dsts] = extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + e_property_triangle_count.view(), + extract_weak_edges{k}); auto done = 1; if constexpr (multi_gpu) { @@ -760,15 +762,6 @@ k_truss(raft::handle_t const& handle, chunk_num_invalid_edges -= chunk_size; } - // FIXME: Remove this as it is only used for debugging - auto [srcs, dsts, count] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), - extract_edges{}); - // Iterate over unique vertices that appear as either q or r rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); @@ -858,15 +851,9 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - false); + true); auto csc_q_r_graph_view = (*graph_q_r).view(); - prev_chunk_size = 0; - chunk_num_invalid_edges = weak_edgelist_srcs.size(); - - num_chunks = - raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); @@ -923,13 +910,18 @@ k_truss(raft::handle_t const& handle, weak_edgelist_first, weak_edgelist_first + cp_weak_edgelist_srcs.size()); } - + prev_chunk_size = 0; + chunk_num_invalid_edges = weak_edgelist_size; + + num_chunks = + raft::div_rounding_up_safe(weak_edgelist_size, edges_to_intersect_per_iteration); + for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - // Find intersection of weak edges - auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); + + // Find intersection of weak edges auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, csc_q_r_graph_view, @@ -939,7 +931,62 @@ k_truss(raft::handle_t const& handle, chunk_size, do_expensive_check); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); if constexpr (multi_gpu) { + + // Unrenumber + auto vertex_partition_range_lasts = std::make_optional>( + csc_q_r_graph_view.vertex_partition_range_lasts()); + + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_q_r).data(), + std::get<0>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_q_r).data(), + std::get<1>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); // FIXME: for debugging // Get global weak_edgelist @@ -949,13 +996,7 @@ k_truss(raft::handle_t const& handle, // FIXME: Perform all-to-all in chunks auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - /* - weak_edgelist_srcs = raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()); - weak_edgelist_dsts = raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size()); - */ + weak_edgelist_size = global_weak_edgelist_srcs.size(); // Sort the weak edges if they are not already weak_edgelist_first = @@ -964,7 +1005,8 @@ k_truss(raft::handle_t const& handle, weak_edgelist_first, weak_edgelist_first + weak_edgelist_srcs.size()); } - + + sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); auto num_edges_not_overcomp = @@ -988,7 +1030,7 @@ k_truss(raft::handle_t const& handle, // Reconstruct (q, r) edges that didn't already have their count updated thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ get_dataframe_buffer_end(vertex_pair_buffer_q_r), [ vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), @@ -996,69 +1038,42 @@ k_truss(raft::handle_t const& handle, ] __device__(auto i) { return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); }); - - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - if constexpr (multi_gpu) { - // Unrenumber - auto vertex_partition_range_lasts = std::make_optional>( - csc_q_r_graph_view.vertex_partition_range_lasts()); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( + if constexpr (multi_gpu) { + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_q_r_dsts(0, handle.get_stream()); + + std::tie(vertex_pair_buffer_q_r_srcs, vertex_pair_buffer_q_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( + std::move(std::get<0>(vertex_pair_buffer_q_r)), + std::move(std::get<1>(vertex_pair_buffer_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( handle, - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_q_r).data(), - std::get<0>(vertex_pair_buffer_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_q_r_srcs.data(), vertex_pair_buffer_q_r_srcs.size()), + raft::device_span(vertex_pair_buffer_q_r_dsts.data(), vertex_pair_buffer_q_r_dsts.size()) + ); - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_q_r).data(), - std::get<1>(vertex_pair_buffer_q_r).size(), - (*renumber_map).data(), - *vertex_partition_range_lasts, - true); + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - // Shuffle - std::tie(vertex_pair_buffer_p_q_edge_q_r_srcs, vertex_pair_buffer_p_q_edge_q_r_dsts, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, vertex_pair_buffer_p_q_edge_q_r_srcs, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + edge_t, + weight_t, + int32_t>( handle, std::move(std::get<1>(vertex_pair_buffer_p_q_edge_q_r)), std::move(std::get<0>(vertex_pair_buffer_p_q_edge_q_r)), @@ -1067,48 +1082,70 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - std::tie(vertex_pair_buffer_p_r_edge_q_r_srcs, vertex_pair_buffer_p_r_edge_q_r_dsts, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - } - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size()) - ); + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_p_q_edge_q_r_dsts.data(), vertex_pair_buffer_p_q_edge_q_r_dsts.size()), + raft::device_span(vertex_pair_buffer_p_q_edge_q_r_srcs.data(), vertex_pair_buffer_p_q_edge_q_r_srcs.size()) + ); - update_count( + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); + + std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, vertex_pair_buffer_p_r_edge_q_r_srcs, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_p_r_edge_q_r_dsts.data(), vertex_pair_buffer_p_r_edge_q_r_dsts.size()), + raft::device_span(vertex_pair_buffer_p_r_edge_q_r_srcs.data(), vertex_pair_buffer_p_r_edge_q_r_srcs.size()) + ); + + } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size()) + ); + update_count( handle, cur_graph_view, e_property_triangle_count, raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size()) - ); - - update_count( + ); + update_count( handle, cur_graph_view, e_property_triangle_count, raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size()) ); + } prev_chunk_size += chunk_size; chunk_num_invalid_edges -= chunk_size; } + + weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); #if 0 // Unrolling p, r edges // create pair invalid_src, invalid_edge_idx @@ -1324,9 +1361,9 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_wgts), false); + #endif return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); - #endif } } From b0d63bbb2fe6a87744ec3d9dc339d61ac6eae742 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 10 Jun 2024 16:17:12 -0700 Subject: [PATCH 53/93] unroll (p, r) edges for MG --- cpp/src/community/k_truss_impl.cuh | 286 ++++++++++++++++++++--------- 1 file changed, 197 insertions(+), 89 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index cc8656ca1cb..b0eeec7e38e 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -48,22 +48,22 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, EdgeIterator potential_closing_or_incoming_edges, EdgeIterator incoming_or_potential_closing_edges, - raft::device_span invalid_edgelist_srcs, - raft::device_span invalid_edgelist_dsts) + raft::device_span weak_edgelist_first_srcs, + raft::device_span weak_edgelist_first_dsts) { // To avoid over-compensating, check whether the 'potential_closing_edges' - // are within the invalid edges. If yes, the was already unrolled + // are within the weak edges. If yes, the was already unrolled auto edges_not_overcomp = thrust::remove_if( handle.get_thrust_policy(), thrust::make_zip_iterator(potential_closing_or_incoming_edges, incoming_or_potential_closing_edges), thrust::make_zip_iterator(potential_closing_or_incoming_edges + buffer_size, incoming_or_potential_closing_edges + buffer_size), - [num_invalid_edges = invalid_edgelist_dsts.size(), - invalid_first = - thrust::make_zip_iterator(invalid_edgelist_srcs.begin(), invalid_edgelist_dsts.begin()), - invalid_last = thrust::make_zip_iterator(invalid_edgelist_srcs.end(), - invalid_edgelist_dsts.end())] __device__(auto e) { + [num_weak_edges = weak_edgelist_first_dsts.size(), + weak_first = + thrust::make_zip_iterator(weak_edgelist_first_srcs.begin(), weak_edgelist_first_dsts.begin()), + weak_last = thrust::make_zip_iterator(weak_edgelist_first_srcs.end(), + weak_edgelist_first_dsts.end())] __device__(auto e) { auto potential_edge = thrust::get<0>(e); auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); if constexpr (is_q_r_edge) { @@ -71,8 +71,8 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, }; auto itr = thrust::lower_bound( - thrust::seq, invalid_first, invalid_last, potential_or_incoming_edge); - return (itr != invalid_last && *itr == potential_or_incoming_edge); + thrust::seq, weak_first, weak_last, potential_or_incoming_edge); + return (itr != weak_last && *itr == potential_or_incoming_edge); }); auto dist = thrust::distance(thrust::make_zip_iterator(potential_closing_or_incoming_edges, @@ -181,8 +181,8 @@ struct generate_p_r_or_q_r_from_p_q { size_t chunk_start{}; raft::device_span intersection_offsets{}; raft::device_span intersection_indices{}; - raft::device_span invalid_srcs{}; - raft::device_span invalid_dsts{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; __device__ thrust::tuple operator()(edge_t i) const { @@ -191,10 +191,10 @@ struct generate_p_r_or_q_r_from_p_q { auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); if constexpr (generate_p_r) { - return thrust::make_tuple(invalid_srcs[chunk_start + idx], intersection_indices[i]); + return thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); } else { - return thrust::make_tuple(invalid_dsts[chunk_start + idx], intersection_indices[i]); + return thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); } } }; @@ -236,8 +236,8 @@ struct generate_p_q { size_t chunk_start{}; raft::device_span intersection_offsets{}; raft::device_span intersection_indices{}; - raft::device_span invalid_srcs{}; - raft::device_span invalid_dsts{}; + raft::device_span weak_srcs{}; + raft::device_span weak_dsts{}; __device__ thrust::tuple operator()(edge_t i) const { @@ -245,34 +245,34 @@ struct generate_p_q { thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - return thrust::make_tuple(invalid_srcs[chunk_start + idx], invalid_dsts[chunk_start + idx]); + return thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); } }; template struct generate_p_r { - EdgeIterator invalid_edge{}; - raft::device_span invalid_edge_idx{}; + EdgeIterator weak_edge{}; + raft::device_span weak_edge_idx{}; __device__ thrust::tuple operator()(edge_t i) const { - return *(invalid_edge + invalid_edge_idx[i]); + return *(weak_edge + weak_edge_idx[i]); } }; template struct generate_p_q_q_r { - EdgeIterator invalid_edge{}; + EdgeIterator weak_edge{}; raft::device_span q_closing{}; - raft::device_span invalid_edge_idx{}; + raft::device_span weak_edge_idx{}; __device__ thrust::tuple operator()(edge_t i) const { if constexpr (generate_p_q) { - return thrust::make_tuple(thrust::get<0>(*(invalid_edge + invalid_edge_idx[i])), q_closing[i]); + return thrust::make_tuple(thrust::get<0>(*(weak_edge + weak_edge_idx[i])), q_closing[i]); } else { - return thrust::make_tuple(q_closing[i], thrust::get<1>(*(invalid_edge + invalid_edge_idx[i]))); + return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + weak_edge_idx[i]))); } } }; @@ -362,21 +362,16 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // Call nbr_intersection unroll (p, q) and (q, r) edges - //printf("\nbefore nbr_intersection, weak_edgelist_srcs_size = %d, vs %d\n", weak_edgelist_srcs.size(), prev_chunk_size + chunk_size); - //raft::print_device_vector("sorted_weak_edgelist_srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); - //raft::print_device_vector("sorted_weak_edgelist_dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, cugraph::edge_dummy_property_t{}.view(), weak_edgelist_first + prev_chunk_size, weak_edgelist_first + prev_chunk_size + chunk_size, - //weak_edgelist_first, - //weak_edgelist_first + weak_edgelist_srcs.size(), std::array{true, true}, //do_expensive_check : FIXME true); - //printf("\nafter nbr_intersection\n"); + // Generate (p, q) edges auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>(intersection_indices.size(), @@ -682,7 +677,7 @@ k_truss(raft::handle_t const& handle, // Find intersection edges size_t prev_chunk_size = 0; - size_t chunk_num_invalid_edges = weak_edgelist_srcs.size(); + size_t chunk_num_weak_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); edges_to_intersect_per_iteration = 100; @@ -690,7 +685,7 @@ k_truss(raft::handle_t const& handle, raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); auto [vertex_pair_buffer_p_q, vertex_pair_buffer_p_r_edge_p_q, vertex_pair_buffer_q_r_edge_p_q] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, cur_graph_view, @@ -759,7 +754,7 @@ k_truss(raft::handle_t const& handle, ); prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; + chunk_num_weak_edges -= chunk_size; } // Iterate over unique vertices that appear as either q or r @@ -809,14 +804,14 @@ k_truss(raft::handle_t const& handle, thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - auto invalid_unique_v_end = thrust::unique( + auto weak_unique_v_end = thrust::unique( handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), invalid_unique_v_end), handle.get_stream()); + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), weak_unique_v_end), handle.get_stream()); - auto invalid_edgelist = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, cur_graph_view, @@ -911,16 +906,17 @@ k_truss(raft::handle_t const& handle, weak_edgelist_first + cp_weak_edgelist_srcs.size()); } prev_chunk_size = 0; - chunk_num_invalid_edges = weak_edgelist_size; + chunk_num_weak_edges = weak_edgelist_size; num_chunks = raft::div_rounding_up_safe(weak_edgelist_size, edges_to_intersect_per_iteration); + + auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); + auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_invalid_edges); - auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); - auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); - + auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); + // Find intersection of weak edges auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, @@ -1140,31 +1136,66 @@ k_truss(raft::handle_t const& handle, } prev_chunk_size += chunk_size; - chunk_num_invalid_edges -= chunk_size; + chunk_num_weak_edges -= chunk_size; } weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - #if 0 + //#if 0 // Unrolling p, r edges - // create pair invalid_src, invalid_edge_idx - // create a dataframe buffer of size invalid_edge_size + // create pair weak_src, weak_edge_idx + // create a dataframe buffer of size weak_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs // with a vector counting from 0 .. auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>(weak_edgelist_srcs.size(), handle.get_stream()); + if constexpr (multi_gpu) { + std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; + rmm::device_uvector num_weak_edges(1, handle.get_stream()); + + raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); + + auto& comm = handle.get_comms(); + auto comm_rank = comm.get_rank(); + // Get global weak_edgelist + auto global_num_weak_edges = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(num_weak_edges.data(), num_weak_edges.size())); + + rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + global_num_weak_edges.begin(), + global_num_weak_edges.end(), + prefix_sum_global_num_weak_edges.begin()); + + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [rank = comm_rank, + num_weak_edges = prefix_sum_global_num_weak_edges.begin(), + p = weak_edgelist_srcs.begin()] __device__(auto idx) { + if (rank != 0) { + auto idx_tag = idx + (num_weak_edges[rank - 1]); + return thrust::make_tuple(p[idx], idx_tag); + } + + return thrust::make_tuple(p[idx], idx); + }); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [ - p = weak_edgelist_srcs.begin() - ] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); + } else { + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [ + p = weak_edgelist_srcs.begin() + ] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + } vertex_frontier_t vertex_frontier(handle, 1); vertex_frontier.bucket(0).insert( @@ -1208,8 +1239,8 @@ k_truss(raft::handle_t const& handle, handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_r), get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - invalid_edgelist, + generate_p_r{ + weak_edgelist_first, raft::device_span(idx_closing.data(), idx_closing.size()) }); @@ -1222,8 +1253,8 @@ k_truss(raft::handle_t const& handle, handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, + generate_p_q_q_r{ + weak_edgelist_first, raft::device_span(q_closing.data(), q_closing.size()), raft::device_span(idx_closing.data(), @@ -1238,14 +1269,13 @@ k_truss(raft::handle_t const& handle, handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), - generate_p_q_q_r{ - invalid_edgelist, + generate_p_q_q_r{ + weak_edgelist_first, raft::device_span(q_closing.data(), q_closing.size()), raft::device_span(idx_closing.data(), idx_closing.size()) }); - auto num_edges_not_overcomp_p_q = remove_overcompensating_edges(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), + raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size)); resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); @@ -1271,8 +1301,8 @@ k_truss(raft::handle_t const& handle, num_edges_not_overcomp_p_q, get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), + raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size)); resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); @@ -1289,30 +1319,109 @@ k_truss(raft::handle_t const& handle, ] __device__(auto i) { return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); }); - - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) - ); + - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) - ); + if constexpr (multi_gpu) { + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_dsts(0, handle.get_stream()); + + std::tie(vertex_pair_buffer_p_r_srcs, vertex_pair_buffer_p_r_dsts, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r)), + std::move(std::get<1>(vertex_pair_buffer_p_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_p_r_srcs.data(), vertex_pair_buffer_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size()) + ); + + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_dsts(0, handle.get_stream()); + + std::tie(vertex_pair_buffer_p_q_edge_p_r_dsts, vertex_pair_buffer_p_q_edge_p_r_srcs, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size()) + ); + + // Shuffle before updating count + rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_dsts(0, handle.get_stream()); + + std::tie(vertex_pair_buffer_q_r_edge_p_r_dsts, vertex_pair_buffer_q_r_edge_p_r_srcs, std::ignore, std::ignore, std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size()) + ); - update_count( - handle, - cur_graph_view, - e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) - ); + } else { + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) + ); + + update_count( + handle, + cur_graph_view, + e_property_triangle_count, + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) + ); + } // Mask all the edges that have 0 count cugraph::transform_e( @@ -1337,8 +1446,7 @@ k_truss(raft::handle_t const& handle, edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - #endif + std::optional>(std::nullopt)); } rmm::device_uvector edgelist_srcs(0, handle.get_stream()); From b34dc4174918da5ff6d8c19e4b95f42d1ad6859e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 12 Jun 2024 08:31:06 -0700 Subject: [PATCH 54/93] remove unsued variables --- cpp/src/community/k_truss_impl.cuh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index b0eeec7e38e..c49ba2314e7 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -456,7 +456,6 @@ k_truss(raft::handle_t const& handle, std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; - std::optional> wgts{std::nullopt}; if (graph_view.count_self_loops(handle) > edge_t{0}) { auto [srcs, dsts] = extract_transform_e(handle, @@ -507,9 +506,7 @@ k_truss(raft::handle_t const& handle, raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; - rmm::device_uvector srcs{0, handle.get_stream()}; - rmm::device_uvector dsts{0, handle.get_stream()}; - std::tie(srcs, dsts, wgts) = k_core(handle, + auto [srcs, dsts, wgts] = k_core(handle, cur_graph_view, edge_weight_view, k - 1, From 9d7e9761cfd0e78acb346fe9956e7c6ef04449af Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 12 Jun 2024 08:32:52 -0700 Subject: [PATCH 55/93] add 'fixme' --- cpp/src/community/k_truss_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index c49ba2314e7..37258be8a54 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -677,7 +677,7 @@ k_truss(raft::handle_t const& handle, size_t chunk_num_weak_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - edges_to_intersect_per_iteration = 100; + edges_to_intersect_per_iteration = 100; // FIXME: For testing auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); From 811237ca12e8c72ee182e9493cd10c8f0850488a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 12 Jun 2024 08:36:41 -0700 Subject: [PATCH 56/93] update docstrings --- cpp/src/community/k_truss_impl.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 37258be8a54..84d24462ed7 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -625,8 +625,7 @@ k_truss(raft::handle_t const& handle, renumber_map = std::move(tmp_renumber_map); } - // 5. Decompress the resulting graph to an edges list and ind intersection of edges endpoints - // for each partition using detail::nbr_intersection + // 5. Compute triangle count using nbr_intersection and unroll weak edges { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; From 1fde8027aceb4c7c75433bef50eaf0782655285f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 12 Jun 2024 08:38:23 -0700 Subject: [PATCH 57/93] update docstrings --- cpp/src/community/k_truss_impl.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 84d24462ed7..4630c844a87 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -489,7 +489,7 @@ k_truss(raft::handle_t const& handle, modified_graph_view = (*modified_graph).view(); } - // 3. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core //#if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -549,7 +549,7 @@ k_truss(raft::handle_t const& handle, } //#endif - // 4. Keep only the edges from a low-degree vertex to a high-degree vertex. + // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -625,7 +625,7 @@ k_truss(raft::handle_t const& handle, renumber_map = std::move(tmp_renumber_map); } - // 5. Compute triangle count using nbr_intersection and unroll weak edges + // 4. Compute triangle count using nbr_intersection and unroll weak edges { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; From 707ee27150342b4d798797fd6dc5272803c2fae3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 12 Jun 2024 08:40:43 -0700 Subject: [PATCH 58/93] reorder stopping condition --- cpp/src/community/k_truss_impl.cuh | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 4630c844a87..e513aeb6638 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -653,23 +653,15 @@ k_truss(raft::handle_t const& handle, edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), extract_weak_edges{k}); - auto done = 1; - if constexpr (multi_gpu) - { - if (weak_edgelist_srcs.size() == 0) { done = 0; } - done = host_scalar_allreduce( - handle.get_comms(), done, raft::comms::op_t::MAX, handle.get_stream()); - - if (done == 0) { break; } - } - else if (weak_edgelist_srcs.size() == 0) - { - break; + auto num_weak_edges = weak_edgelist_srcs.size(); + if constexpr (multi_gpu) { + num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); } - auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_srcs.size()); + if (num_weak_edges == 0) { break; } + auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_srcs.size()); // Find intersection edges size_t prev_chunk_size = 0; From e486492ebe5c7cd866a4f3c26d8d453050b6afb5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 6 Jul 2024 18:19:49 -0700 Subject: [PATCH 59/93] return rx_counts --- .../cugraph/detail/shuffle_wrappers.hpp | 6 +- .../mtmg/detail/per_device_edgelist.hpp | 3 +- cpp/src/c_api/graph_functions.cpp | 2 +- cpp/src/c_api/graph_mg.cpp | 3 +- cpp/src/community/detail/refine_impl.cuh | 1 + .../community/edge_triangle_count_impl.cuh | 2 +- cpp/src/community/k_truss_impl.cuh | 1495 ++++++++++++++--- cpp/src/community/k_truss_mg.cu | 3 +- cpp/src/community/triangle_count_impl.cuh | 2 +- .../weakly_connected_components_impl.cuh | 1 + cpp/src/link_prediction/similarity_impl.cuh | 4 +- cpp/src/structure/coarsen_graph_impl.cuh | 3 +- .../structure/symmetrize_edgelist_impl.cuh | 2 + cpp/src/structure/transpose_graph_impl.cuh | 1 + .../transpose_graph_storage_impl.cuh | 1 + cpp/src/utilities/shuffle_vertex_pairs.cu | 97 +- cpp/tests/CMakeLists.txt | 4 +- cpp/tests/community/k_truss_test.cpp | 34 +- .../link_prediction/mg_similarity_test.cpp | 2 +- .../mg_weighted_similarity_test.cpp | 2 +- ...r_v_pair_transform_dst_nbr_intersection.cu | 1 + ...transform_dst_nbr_weighted_intersection.cu | 1 + ...has_edge_and_compute_multiplicity_test.cpp | 1 + cpp/tests/utilities/test_graphs.hpp | 1 + 24 files changed, 1346 insertions(+), 326 deletions(-) diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index 69d48098a5d..37130bf3c64 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -53,7 +53,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -93,7 +94,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, diff --git a/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp index 63d7fd9685e..61ad833a529 100644 --- a/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp +++ b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp @@ -251,7 +251,8 @@ class per_device_edgelist_t { store_transposed ? src_[0] : dst_[0], tmp_wgt, tmp_edge_id, - tmp_edge_type) = + tmp_edge_type, + std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle.raft_handle(), store_transposed ? std::move(dst_[0]) : std::move(src_[0]), diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index 91371b988b3..df741a349d2 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -72,7 +72,7 @@ struct create_vertex_pairs_functor : public cugraph::c_api::abstract_functor { second_copy.data(), second_->as_type(), second_->size_, handle_.get_stream()); if constexpr (multi_gpu) { - std::tie(first_copy, second_copy, std::ignore, std::ignore, std::ignore) = + std::tie(first_copy, second_copy, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 22ceea3f629..cc4acd31743 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -167,7 +167,8 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { store_transposed ? edgelist_srcs : edgelist_dsts, edgelist_weights, edgelist_edge_ids, - edgelist_edge_types) = + edgelist_edge_types, + std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle_, std::move(store_transposed ? edgelist_dsts : edgelist_srcs), diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index ef34ad90584..f8b5702c95a 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -623,6 +623,7 @@ refine_clustering( store_transposed ? d_srcs : d_dsts, d_weights, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index c4277e240be..225687c4cf0 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -250,7 +250,7 @@ edge_property_t, edge_t> edge_t handle.get_stream()); // There are still multiple copies here but is it worth sorting and reducing again? - std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore) = + std::tie(pair_srcs, pair_dsts, std::ignore, pair_count, std::ignore, std::ignore) = shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning +template +// difference something. edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, - EdgeIterator potential_closing_or_incoming_edges, - EdgeIterator incoming_or_potential_closing_edges, - raft::device_span weak_edgelist_first_srcs, - raft::device_span weak_edgelist_first_dsts) + EdgeIterator set_a_query_edges, // (p, q) edges + EdgeIterator set_b_query_edges, + // rename querry_edge_first + // rename querry_edge_last + raft::device_span set_c_weak_edges_srcs, // FIXME: rename this, no need for first + raft::device_span set_c_weak_edges_dsts, + std::vector vertex_partition_range_lasts) // FIXME: rename this { + // To avoid over-compensating, check whether the 'potential_closing_edges' // are within the weak edges. If yes, the was already unrolled - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - thrust::make_zip_iterator(potential_closing_or_incoming_edges + buffer_size, - incoming_or_potential_closing_edges + buffer_size), - [num_weak_edges = weak_edgelist_first_dsts.size(), - weak_first = - thrust::make_zip_iterator(weak_edgelist_first_srcs.begin(), weak_edgelist_first_dsts.begin()), - weak_last = thrust::make_zip_iterator(weak_edgelist_first_srcs.end(), - weak_edgelist_first_dsts.end())] __device__(auto e) { - auto potential_edge = thrust::get<0>(e); - auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); - if constexpr (is_q_r_edge) { - potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); - }; - - auto itr = thrust::lower_bound( - thrust::seq, weak_first, weak_last, potential_or_incoming_edge); - return (itr != weak_last && *itr == potential_or_incoming_edge); - }); - - auto dist = thrust::distance(thrust::make_zip_iterator(potential_closing_or_incoming_edges, - incoming_or_potential_closing_edges), - edges_not_overcomp); - - return dist; + + // FIXME: thrust::set_difference for SG + // set_difference once for major or minor comm + // rename set_A_last and set B. finding the difference + // Make it more general, not k-truss oriented + + rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); + rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); + std::vector rx_count{}; + + + if constexpr (multi_gpu) { + + + // FIXME: Just zip src and dst to copy at once for the edges + thrust::copy(handle.get_thrust_policy(), + set_a_query_edges, + set_a_query_edges + buffer_size, + thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); + + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + + + + // group_by_count to get the destination of each edges + std::tie(set_a_query_edges_srcs, set_a_query_edges_dsts, std::ignore, std::ignore, std::ignore, rx_count) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, std::move(set_a_query_edges_srcs), std::move(set_a_query_edges_dsts), std::nullopt, std::nullopt, std::nullopt, vertex_partition_range_lasts); + + + rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t + + auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges + auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); + auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); + + /* + thrust::transform( + handle.get_thrust_policy() + ... + ) + */ + + thrust::tabulate( + handle.get_thrust_policy(), + has_edge.begin(), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + has_edge.end(), + [ + set_c_weak_edges_first, + set_c_weak_edges_last, + set_a_query_edges_first + ] __device__(auto i) { + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); + }); + + + if (comm_rank == 1) { + raft::print_device_vector("has_edge_b_s_v", has_edge.data(), has_edge.size(), std::cout); + } + + std::tie(has_edge, std::ignore) = + shuffle_values(handle.get_comms(), has_edge.begin(), rx_count, handle.get_stream()); + + + //if (comm_rank == 0) { + raft::print_device_vector("has_edge_a_s_v", has_edge.data(), has_edge.size(), std::cout); + //} + + + + if (comm_rank == 1) { + + raft::print_device_vector("set_c_weak_edges_srcs", set_c_weak_edges_srcs.data(), set_c_weak_edges_srcs.size(), std::cout); + raft::print_device_vector("set_c_weak_edges_dsts", set_c_weak_edges_dsts.data(), set_c_weak_edges_dsts.size(), std::cout); + + raft::print_device_vector("set_a_query_edges_srcs", set_a_query_edges_srcs.data(), set_a_query_edges_srcs.size(), std::cout); + raft::print_device_vector("set_a_query_edges_dsts", set_a_query_edges_dsts.data(), set_a_query_edges_dsts.size(), std::cout); + } + + // FIXME: thrust::remove_if (resize). No need for sort_by_key and upper_bound + + thrust::sort_by_key(handle.get_thrust_policy(), + has_edge.begin(), + has_edge.end(), + thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges) + ); + + //raft::print_device_vector("sorted_has_edge_a_s_v", has_edge.data(), has_edge.size(), std::cout); + + // thrust upper_bound 0 + auto itr = thrust::upper_bound( + handle.get_thrust_policy(), has_edge.begin(), has_edge.end(), vertex_t{0}); + + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + + //printf("\nnumber of potential weak edges = %d\n", has_edge.size()); + auto dist = thrust::distance(has_edge.begin(), itr); // FIXME: Check whether -1 is necessary + + printf("\ndistance = %d\n", dist); + + return dist; + //return 0; + + + + + + } else { + auto edges_not_overcomp = thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), + thrust::make_zip_iterator(set_a_query_edges + buffer_size, + set_b_query_edges + buffer_size), + [num_weak_edges = set_c_weak_edges_dsts.size(), + set_c_weak_edges_first = + thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()), + set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), + set_c_weak_edges_dsts.end())] __device__(auto e) { + auto potential_edge = thrust::get<0>(e); + auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); + if constexpr (is_q_r_edge) { + potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); + }; + + auto itr = thrust::lower_bound( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, potential_or_incoming_edge); + return (itr != set_c_weak_edges_last && *itr == potential_or_incoming_edge); + }); + + auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), + edges_not_overcomp); + + printf("\nlegacy - distance = %d\n", dist); + return dist; + } } template @@ -209,14 +330,25 @@ struct extract_q_idx { thrust::nullopt_t, thrust::nullopt_t) const { + + //printf("\nsrc = %d, dst = %d, idx = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); + /* + if (thrust::get<0>(tagged_src) == 3) { + printf("\nsrc = 3, dst = %d, idx = %d\n", dst, thrust::get<1>(tagged_src)); + } + */ return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); } }; -template +template struct extract_q_idx_closing { - using return_type = thrust::optional>; - raft::device_span weak_edgelist_dsts; + using return_type = thrust::optional>; + EdgeIterator major_weak_edgelist_dsts_tag_first{}; + EdgeIterator major_weak_edgelist_dsts_tag_last{}; + raft::device_span major_weak_edgelist_srcs{}; + raft::device_span weak_edgelist_dsts{}; + raft::device_span weak_edgelist_tags{}; // FIXME: keep this when performing chunking return_type __device__ operator()(thrust::tuple tagged_src, vertex_t dst, @@ -224,10 +356,26 @@ struct extract_q_idx_closing { thrust::nullopt_t, thrust::nullopt_t) const { + + auto itr = thrust::lower_bound( + thrust::seq, + major_weak_edgelist_dsts_tag_first, + major_weak_edgelist_dsts_tag_last, + thrust::make_tuple(dst, thrust::get<1>(tagged_src))); + + auto idx = thrust::distance(major_weak_edgelist_dsts_tag_first, itr); + + return (itr != major_weak_edgelist_dsts_tag_last && *itr == thrust::make_tuple(dst, thrust::get<1>(tagged_src))) + ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, major_weak_edgelist_srcs[idx], thrust::get<1>(tagged_src))) + : thrust::nullopt; + + + /* edge_t idx = thrust::get<1>(tagged_src); return dst == weak_edgelist_dsts[idx] ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) : thrust::nullopt; + */ } }; @@ -251,12 +399,41 @@ struct generate_p_q { template struct generate_p_r { - EdgeIterator weak_edge{}; + EdgeIterator weak_edge_first{}; + EdgeIterator weak_edge_dst_tag_first{}; + EdgeIterator weak_edge_dst_tag_last{}; + EdgeIterator closing_r_tag{}; + raft::device_span weak_edge_idx{}; + raft::device_span chunk_global_weak_edgelist_tags{}; __device__ thrust::tuple operator()(edge_t i) const { - return *(weak_edge + weak_edge_idx[i]); + // FIXME: When performing chunking, run binary search on the idx of the weak + // edges for multi-GPU. similar to the example in 'extract_q_idx_closing'. + // 1) Have the pair(weak_edges, tag) sorted by (tag) + // 2) Perform a binary search on the tag to find if the weak edge exist in the selected + // chunk + // 3) if it exists, return a pair, otherwise, return a thrust::nullopt + + /* + auto itr = thrust::lower_bound( + thrust::seq, chunk_global_weak_edgelist_tags.begin(), chunk_global_weak_edgelist_tags.end(), weak_edge_idx[i]); + */ + + auto itr = thrust::lower_bound( + thrust::seq, weak_edge_dst_tag_first, weak_edge_dst_tag_last, closing_r_tag[i]); + + + auto idx = thrust::distance(weak_edge_dst_tag_first, itr); + + return *(weak_edge_first + idx); + + + + + //return *(weak_edge + weak_edge_idx[i]); + } }; @@ -265,15 +442,40 @@ struct generate_p_q_q_r { EdgeIterator weak_edge{}; raft::device_span q_closing{}; raft::device_span weak_edge_idx{}; + raft::device_span chunk_global_weak_edgelist_tags{}; __device__ thrust::tuple operator()(edge_t i) const { + // FIXME: When performing chunking, run binary search on the idx of the weak + // edges for multi-GPU. similar to the example in 'extract_q_idx_closing'. + // 1) Have the pair(weak_edges, tag) sorted by (tag) + // 2) Perform a binary search on the tag to find if the weak edge exist in the selected + // chunk + // 3) if it exists, return a pair, otherwise, return a thrust::nullopt + + /* + auto itr = thrust::lower_bound( + thrust::seq, chunk_global_weak_edgelist_tags.begin(), chunk_global_weak_edgelist_tags.end(), weak_edge_idx[i]); + + auto idx = thrust::distance(chunk_global_weak_edgelist_tags.begin(), itr); + + if constexpr (generate_p_q) { + return thrust::make_tuple(thrust::get<0>(*(weak_edge + idx)), q_closing[i]); + // FIXME: If single GPU, return this. Add template for multi_gpu + } else { + return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + idx))); + } + */ + + if constexpr (generate_p_q) { return thrust::make_tuple(thrust::get<0>(*(weak_edge + weak_edge_idx[i])), q_closing[i]); } else { return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + weak_edge_idx[i]))); } + + } }; @@ -361,6 +563,9 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + //printf("\nin 'accumulate_triangles_p_q_or_q_r' and size = %d\n", weak_edgelist_srcs.size()); + // Call nbr_intersection unroll (p, q) and (q, r) edges auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, @@ -372,6 +577,7 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, //do_expensive_check : FIXME true); + //std::cout<< "The intersection size for (p, q) or (q, r) edges = " << intersection_indices.size() << std::endl; // Generate (p, q) edges auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>(intersection_indices.size(), @@ -456,6 +662,7 @@ k_truss(raft::handle_t const& handle, std::optional> renumber_map{std::nullopt}; std::optional, weight_t>> edge_weight{std::nullopt}; + std::optional> wgts{std::nullopt}; if (graph_view.count_self_loops(handle) > edge_t{0}) { auto [srcs, dsts] = extract_transform_e(handle, @@ -466,7 +673,7 @@ k_truss(raft::handle_t const& handle, exclude_self_loop_t{}); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); - auto iteration = -1; while (true) { - iteration += 1; - if (iteration == 1) { - break; // FIXME: Only for debugging purposes - } - // extract the edges that have counts less than k - 2. THose edges will be unrolled + // extract the edges that have counts less than k - 2. Those edges will be unrolled auto [weak_edgelist_srcs, weak_edgelist_dsts] = extract_transform_e(handle, cur_graph_view, edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), e_property_triangle_count.view(), extract_weak_edges{k}); + + auto num_weak_edges = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); @@ -668,12 +870,37 @@ k_truss(raft::handle_t const& handle, size_t chunk_num_weak_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - edges_to_intersect_per_iteration = 100; // FIXME: For testing + + auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); + + if constexpr (multi_gpu) { + num_chunks = host_scalar_allreduce(handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); + } + + printf("\nnum_chunks = %d\n", num_chunks); + + // FIXME: In case some ranks have no weak edges to process + // Or simply in the for loop set i <= 0 but need to make sure the chunking process is not broken + /* + if (num_chunks == 0) { + num_chunks = 1; + } + */ for (size_t i = 0; i < num_chunks; ++i) { + printf("\n in for loop chunk, i = %d, num_chunks = %d\n", i, num_chunks); auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); + //RAFT_CUDA_TRY(cudaDeviceSynchronize()); + //printf("\ntracking hang\n"); + + //auto x = raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()); + //auto x = weak_edgelist_srcs.size(); + //RAFT_CUDA_TRY(cudaDeviceSynchronize()); + //printf("\nafter tracking hang, size = %d\n", x); + + //#if 0 auto [vertex_pair_buffer_p_q, vertex_pair_buffer_p_r_edge_p_q, vertex_pair_buffer_q_r_edge_p_q] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, cur_graph_view, @@ -683,6 +910,13 @@ k_truss(raft::handle_t const& handle, chunk_size, do_expensive_check); + //#if 0 + //raft::print_device_vector("vertex_pair_buffer_p_q_srcs", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_p_q_dsts", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + + //raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_srcs", std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_dsts", std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); + rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_srcs(0, handle.get_stream()); @@ -690,7 +924,7 @@ k_truss(raft::handle_t const& handle, // Shuffle edges if constexpr (multi_gpu) { // FIXME: Check whether we need to shuffle (p, q) edges - std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, vertex_pair_buffer_p_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, vertex_pair_buffer_p_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); + printf("\n"); + raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_srcs", vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_dsts", vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size(), std::cout); + printf("\n"); + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q_srcs", vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q_dsts", vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size(), std::cout); + */ update_count( handle, @@ -737,14 +984,35 @@ k_truss(raft::handle_t const& handle, handle, cur_graph_view, e_property_triangle_count, - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) + multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) ); prev_chunk_size += chunk_size; chunk_num_weak_edges -= chunk_size; + + //#endif } + + //#if 0 + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + //printf("\nafter unrolling (p, q) edges\n"); + auto [srcs_0, dsts_0, count_0] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + /* + raft::print_device_vector("unrolled_srcs", srcs_0.data(), srcs_0.size(), std::cout); + raft::print_device_vector("unrolled_dsts", dsts_0.data(), dsts_0.size(), std::cout); + raft::print_device_vector("unrolled_n_tr", count_0.data(), count_0.size(), std::cout); + */ + + + //#if 0 // Iterate over unique vertices that appear as either q or r rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); @@ -761,11 +1029,11 @@ k_truss(raft::handle_t const& handle, weak_edgelist_dsts.end(), unique_weak_edgelist_dsts.begin() ); - - thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end()); + + thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end()); // No need to sort the 'dst' since they are already sorted thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_dsts.begin(), unique_weak_edgelist_dsts.end()); - + auto unique_srcs_end = thrust::unique( handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), @@ -783,12 +1051,14 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector vertex_q_r(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); - thrust::set_union(handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs.end(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end(), - vertex_q_r.begin()); + auto vertex_q_r_end = thrust::set_union(handle.get_thrust_policy(), + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs.end(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end(), + vertex_q_r.begin()); + + vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), vertex_q_r_end), handle.get_stream()); thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); @@ -799,8 +1069,53 @@ k_truss(raft::handle_t const& handle, vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), weak_unique_v_end), handle.get_stream()); + // FIXME: perform all to all 'vertex_q_r'. ******************************** + // FIXME: Might not be able to perform this in chunk for MG + // e.g: giving 4 weak edges where 2 belongs to the same triangle. If we were to process + // each of these edges in different batches, we might not be able to find a triangle + // Need a view of the whole graph to find triangles. But why was batching working in SG with + // nbr_intersection in case 1 ?(because I already have a view of the whole graph). For + // case 2 we can still process the edges in batches as long as we already created the full csc graph + // Isn't better to just create the csc graph with all edges at this point. + // Cannot create partial CSR, need the full one. Can we create ehe CSC in chunks by adding set of + // edges at a time? + + if constexpr (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); // FIXME: for debugging + // Get global weak_edgelist + // FIXME: Perform all-to-all in chunks + auto global_vertex_q_r = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(vertex_q_r.data(), vertex_q_r.size())); + + thrust::sort(handle.get_thrust_policy(), global_vertex_q_r.begin(), global_vertex_q_r.end()); + + weak_unique_v_end = thrust::unique( + handle.get_thrust_policy(), + global_vertex_q_r.begin(), + global_vertex_q_r.end()); + + global_vertex_q_r.resize(thrust::distance(global_vertex_q_r.begin(), weak_unique_v_end), handle.get_stream()); + + //raft::print_device_vector("1_global_vertex_q_r", global_vertex_q_r.data(), global_vertex_q_r.size(), std::cout); + + // FIXME: Can be very expensive and increase peak memory + vertex_q_r.resize(global_vertex_q_r.size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + global_vertex_q_r.begin(), + global_vertex_q_r.end(), + vertex_q_r.begin()); + } + + //raft::print_device_vector("2_vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); + weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + //std::cout << "before extracting edges" << std::endl; auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), @@ -808,8 +1123,17 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dummy_property_t{}.view(), extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + /* + std::cout << "arrays to be shuffled" << std::endl; + raft::print_device_vector("vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); + //raft::print_device_vector("vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); + raft::print_device_vector("srcs_to_q_r", srcs_to_q_r.data(), srcs_to_q_r.size(), std::cout); + raft::print_device_vector("dsts_to_q_r", dsts_to_q_r.data(), dsts_to_q_r.size(), std::cout); + */ if constexpr (multi_gpu) { - std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore) = + std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning> graph_q_r{std::nullopt}; std::optional> renumber_map_q_r{std::nullopt}; @@ -838,40 +1165,40 @@ k_truss(raft::handle_t const& handle, auto csc_q_r_graph_view = (*graph_q_r).view(); - rmm::device_uvector cp_weak_edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector cp_weak_edgelist_dsts(0, handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_dsts(0, handle.get_stream()); size_t weak_edgelist_size = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { - cp_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); - cp_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); + renumbered_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); + renumbered_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin())); + thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin())); rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; std::tie( - cp_weak_edgelist_srcs, cp_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore) = + renumbered_weak_edgelist_srcs, renumbered_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - std::move(cp_weak_edgelist_srcs), - std::move(cp_weak_edgelist_dsts), + std::move(renumbered_weak_edgelist_srcs), + std::move(renumbered_weak_edgelist_dsts), std::nullopt, std::nullopt, std::nullopt); renumber_ext_vertices( handle, - cp_weak_edgelist_srcs.data(), - cp_weak_edgelist_srcs.size(), + renumbered_weak_edgelist_srcs.data(), + renumbered_weak_edgelist_srcs.size(), (*renumber_map_q_r).data(), csc_q_r_graph_view.local_vertex_partition_range_first(), csc_q_r_graph_view.local_vertex_partition_range_last(), @@ -879,28 +1206,34 @@ k_truss(raft::handle_t const& handle, renumber_ext_vertices( handle, - cp_weak_edgelist_dsts.data(), - cp_weak_edgelist_dsts.size(), + renumbered_weak_edgelist_dsts.data(), + renumbered_weak_edgelist_dsts.size(), (*renumber_map_q_r).data(), csc_q_r_graph_view.local_vertex_partition_range_first(), csc_q_r_graph_view.local_vertex_partition_range_last(), true); - weak_edgelist_size = cp_weak_edgelist_srcs.size(); + weak_edgelist_size = renumbered_weak_edgelist_srcs.size(); weak_edgelist_first = - thrust::make_zip_iterator(cp_weak_edgelist_srcs.begin(), cp_weak_edgelist_dsts.begin()); + thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, - weak_edgelist_first + cp_weak_edgelist_srcs.size()); + weak_edgelist_first + renumbered_weak_edgelist_srcs.size()); } prev_chunk_size = 0; chunk_num_weak_edges = weak_edgelist_size; + // FIXME: No need to recompute this. It's the same value as above when unrolling (p, q) edges num_chunks = raft::div_rounding_up_safe(weak_edgelist_size, edges_to_intersect_per_iteration); - auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); - auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); + if (num_chunks == 0) { + num_chunks = 1; + } + + //auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); // FIXME: Remove this + //auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); + for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); @@ -909,8 +1242,10 @@ k_truss(raft::handle_t const& handle, auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, csc_q_r_graph_view, - raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), - raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size), + //raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), + //raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size), + raft::device_span(renumbered_weak_edgelist_srcs.data(), renumbered_weak_edgelist_srcs.size()), + raft::device_span(renumbered_weak_edgelist_dsts.data(), renumbered_weak_edgelist_dsts.size()), prev_chunk_size, chunk_size, do_expensive_check); @@ -956,6 +1291,12 @@ k_truss(raft::handle_t const& handle, (*renumber_map_q_r).data(), *vertex_partition_range_lasts, true); + + //printf("\ndebugging (q, r) edges unrolling\n"); + + //raft::print_device_vector("vertex_pair_buffer_q_r_srcs_b_u", std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_q_r_dsts_b_u", std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size(), std::cout); + //printf("(q, r) edge size = %d\n", std::get<0>(vertex_pair_buffer_q_r).size()); unrenumber_int_vertices(handle, std::get<0>(vertex_pair_buffer_q_r).data(), @@ -970,65 +1311,131 @@ k_truss(raft::handle_t const& handle, (*renumber_map_q_r).data(), *vertex_partition_range_lasts, true); + } + + + //printf("\ndebugging (q, r) edges unrolling\n"); + //raft::print_device_vector("vertex_pair_buffer_q_r_srcs_", std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_q_r_dsts_", std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size(), std::cout); + + edge_t num_edges_not_overcomp = 0; + if constexpr (multi_gpu) { + + // Get global weak edges + // FIXME: Retrieve onlu a fraction of the weak edges. + auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); // FIXME: for debugging // Get global weak_edgelist // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + auto chunk_global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + auto chunk_global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - weak_edgelist_size = global_weak_edgelist_srcs.size(); + + + //raft::print_device_vector("chunk_global_weak_edgelist_srcs", chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size(), std::cout); + //raft::print_device_vector("chunk_global_weak_edgelist_dsts", chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size(), std::cout); + + // Sort the weak edges if they are not already - weak_edgelist_first = - thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + auto chunk_global_weak_edgelist_first = + thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_srcs.size()); - } + chunk_global_weak_edgelist_first, + chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size()); + + num_edges_not_overcomp = + remove_overcompensating_edges( + handle, + size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), + raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts() + ); + + //std::cout << "num (q, r) edges after removing = " << num_edges_not_overcomp << std::endl; + //printf("\n\n"); + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + + // resize initial (q, r) edges + // Note: Once chunking is implemented, reconstruct the (q, r) edges only outside + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + // of the chunk's 'for loop' + resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); + + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + // Reconstruct (q, r) edges that didn't already have their count updated + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + get_dataframe_buffer_end(vertex_pair_buffer_q_r), + [ + vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); + }); + + + + } else { - sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); - sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); - auto num_edges_not_overcomp = + num_edges_not_overcomp = remove_overcompensating_edges( + true, + false // FIXME: Set it to False for now + >( handle, size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), - raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), - raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size) - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts() + ); + + //std::cout << "num (q, r) edges after removing = " << num_edges_not_overcomp << std::endl; + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - // resize initial (q, r) edges - resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); - // Reconstruct (q, r) edges that didn't already have their count updated - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ - get_dataframe_buffer_end(vertex_pair_buffer_q_r), - [ - vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); - }); + // resize initial (q, r) edges + resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); + + // Reconstruct (q, r) edges that didn't already have their count updated + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + get_dataframe_buffer_end(vertex_pair_buffer_q_r), + [ + vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); + }); + } if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_q_r_srcs, vertex_pair_buffer_q_r_dsts, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_q_r_srcs, vertex_pair_buffer_q_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, vertex_pair_buffer_p_q_edge_q_r_srcs, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, vertex_pair_buffer_p_q_edge_q_r_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, vertex_pair_buffer_p_r_edge_q_r_srcs, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, vertex_pair_buffer_p_r_edge_q_r_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning{}); + + /* + raft::print_device_vector("unrolled_srcs", srcs_1.data(), srcs_1.size(), std::cout); + raft::print_device_vector("unrolled_dsts", dsts_1.data(), dsts_1.size(), std::cout); + raft::print_device_vector("unrolled_n_tr", count_1.data(), count_1.size(), std::cout); + printf("\n"); + */ + + /* + std::cout<< "before zipping edgelist" << std::endl; + raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); + raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); + */ weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + /* + std::cout<< "after zipping edgelist" << std::endl; + raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); + raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); + */ //#if 0 // Unrolling p, r edges // create pair weak_src, weak_edge_idx // create a dataframe buffer of size weak_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs // with a vector counting from 0 .. + auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>(weak_edgelist_srcs.size(), handle.get_stream()); - if constexpr (multi_gpu) { - std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; - rmm::device_uvector num_weak_edges(1, handle.get_stream()); + //#if 0 + if constexpr (multi_gpu) { + std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; + rmm::device_uvector num_weak_edges(1, handle.get_stream()); - raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); - - auto& comm = handle.get_comms(); - auto comm_rank = comm.get_rank(); - // Get global weak_edgelist - auto global_num_weak_edges = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(num_weak_edges.data(), num_weak_edges.size())); - - rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); - thrust::inclusive_scan(handle.get_thrust_policy(), - global_num_weak_edges.begin(), - global_num_weak_edges.end(), - prefix_sum_global_num_weak_edges.begin()); + raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); + + auto& comm = handle.get_comms(); + auto comm_rank = comm.get_rank(); + // Get global weak_edgelist + auto global_num_weak_edges = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(num_weak_edges.data(), num_weak_edges.size())); + + rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + global_num_weak_edges.begin(), + global_num_weak_edges.end(), + prefix_sum_global_num_weak_edges.begin()); + + /* + std::cout << "weak_edge_list size = " << weak_edgelist_srcs.size() << std::endl; + raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); + raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); + raft::print_device_vector("weak_edgelist_srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); + raft::print_device_vector("weak_edgelist_dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); + */ + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [rank = comm_rank, + num_weak_edges = prefix_sum_global_num_weak_edges.begin(), + p = weak_edgelist_srcs.begin()] __device__(auto idx) { + if (rank != 0) { + auto idx_tag = idx + (num_weak_edges[rank - 1]); + return thrust::make_tuple(p[idx], idx_tag); + } + + return thrust::make_tuple(p[idx], idx); + }); - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [rank = comm_rank, - num_weak_edges = prefix_sum_global_num_weak_edges.begin(), - p = weak_edgelist_srcs.begin()] __device__(auto idx) { - if (rank != 0) { - auto idx_tag = idx + (num_weak_edges[rank - 1]); - return thrust::make_tuple(p[idx], idx_tag); - } - - return thrust::make_tuple(p[idx], idx); - }); - - } else { - thrust::tabulate( + } else { + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [ + p = weak_edgelist_srcs.begin() + ] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); + } + //#endif + #if 0 + thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), get_dataframe_buffer_end(vertex_pair_buffer_p_tag), @@ -1183,14 +1640,64 @@ k_truss(raft::handle_t const& handle, ] __device__(auto idx) { return thrust::make_tuple(p[idx], idx); }); - } + #endif + + //raft::print_device_vector("vertex_pair_buffer_src", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); + //raft::print_device_vector("vertex_pair_buffer_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); vertex_frontier_t vertex_frontier(handle, 1); - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end())); + rmm::device_uvector tag_cpy(std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), + std::get<1>(vertex_pair_buffer_p_tag).begin(), + std::get<1>(vertex_pair_buffer_p_tag).end(), + tag_cpy.begin()); + //std::cout << "emptying the vertex frontier" << std::endl; + + if constexpr (multi_gpu) { + //printf("\nbefore shuffling\n"); + //raft::print_device_vector("b_vertex_pair_buffer_src", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); + //raft::print_device_vector("b_vertex_pair_buffer_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); + + // Shuffle vertices + auto [p_vrtx, p_tag] = + detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_tag)), + std::move(std::get<1>(vertex_pair_buffer_p_tag)), + cur_graph_view.vertex_partition_range_lasts()); + + //printf("\nafter shuffling\n"); + raft::print_device_vector("a_vertex_pair_buffer_src", p_vrtx.data(), p_vrtx.size(), std::cout); + raft::print_device_vector("a_vertex_pair_buffer_tag", p_tag.data(), p_tag.size(), std::cout); + + + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(p_vrtx.begin(), p_tag.begin()), + //thrust::make_zip_iterator(p_vrtx.begin() + 1, p_tag.begin() + 1) + //thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()) + thrust::make_zip_iterator(p_vrtx.end(), p_tag.end()) + ); + } else { + vertex_frontier.bucket(0).insert( + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), + //thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin() + 10, std::get<1>(vertex_pair_buffer_p_tag).begin() + 10) + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end()) + ); + + } + + - auto [q, idx] = + rmm::device_uvector q(0, handle.get_stream()); + rmm::device_uvector idx(0, handle.get_stream()); + + //auto [q, idx] = + auto& comm = handle.get_comms(); + auto comm_rank = comm.get_rank(); + //if (comm_rank == 1) { + printf("\nbefore calling 'extract_transform_v_frontier_outgoing_e'\n"); + std::tie(q, idx) = cugraph::extract_transform_v_frontier_outgoing_e( handle, cur_graph_view, @@ -1199,122 +1706,517 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), extract_q_idx{}, - do_expensive_check); + true); + + std::cout << "initial q's size = " << q.size() << std::endl; + + raft::print_device_vector("q", q.data(), q.size(), std::cout); + raft::print_device_vector("i", idx.data(), q.size(), std::cout); vertex_frontier.bucket(0).clear(); + // Shuffle vertices + std::tie(q, idx) = + detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, + std::move(q), + std::move(idx), + cur_graph_view.vertex_partition_range_lasts()); + vertex_frontier.bucket(0).insert( thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end())); + //thrust::make_zip_iterator(q.begin() + 1, idx.begin() + 1) + thrust::make_zip_iterator(q.end(), idx.end()) + ); + //} + - // FIXME: Need to mask (p, q) and (q, r) edges before unrolling (p, r) edges to avoid overcompensating - auto [q_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, - do_expensive_check); - - // extract pair (p, r) auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - weak_edgelist_first, - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + allocate_dataframe_buffer>(0, + handle.get_stream()); - // construct pair (p, q) - auto vertex_pair_buffer_p_q_for_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_for_p_r), - generate_p_q_q_r{ - weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - - // construct pair (q, r) - auto vertex_pair_buffer_q_r_for_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_for_p_r), - generate_p_q_q_r{ - weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), - raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size)); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), - raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size)); + auto vertex_pair_buffer_p_q_edge_p_r = + allocate_dataframe_buffer>(0, + handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_q_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_for_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + auto vertex_pair_buffer_q_r_edge_p_r = + allocate_dataframe_buffer>(0, + handle.get_stream()); - // Reconstruct (p, r) edges that didn't already have their count updated - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( + // Get chunk global weak edges + // FIXME: To avoid copying and performing all-to-all + // when twice when unrolling (q, r) and (p, r) edges, unroll both edge type back to + // back in with the chunk global weak edgelist + + if constexpr (multi_gpu) { + + // Get global weak edges + // FIXME: Retrieve onlu a fraction of the weak edges. + + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); // FIXME: for debugging + + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto chunk_major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto chunk_major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + auto chunk_major_weak_edgelist_tags = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); + + auto major_weak_edgelist_first = + thrust::make_zip_iterator(chunk_major_weak_edgelist_srcs.begin(), chunk_major_weak_edgelist_dsts.begin()); + + auto major_weak_edgelist_dsts_tags_first = + thrust::make_zip_iterator(chunk_major_weak_edgelist_dsts.begin(), chunk_major_weak_edgelist_tags.begin()); + + thrust::sort_by_key(handle.get_thrust_policy(), + major_weak_edgelist_dsts_tags_first, + major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_dsts.size(), + chunk_major_weak_edgelist_srcs.begin() + ); + + // Get global weak_edgelist + // FIXME: Perform all-to-all in chunks + auto chunk_global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto chunk_global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + raft::print_device_vector("weak_edgelist_tags", tag_cpy.data(), tag_cpy.size(), std::cout); + auto chunk_global_weak_edgelist_tags = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); + + + + // Sort the weak edges if they are not already + auto chunk_global_weak_edgelist_first = + thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); + /* + thrust::sort_by_key(handle.get_thrust_policy(), + chunk_global_weak_edgelist_first, + chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size(), + chunk_global_weak_edgelist_tags.begin()); + */ + + /* + thrust::sort_by_key(handle.get_thrust_policy(), + chunk_global_weak_edgelist_tags.begin(), + chunk_global_weak_edgelist_tags.end(), + chunk_global_weak_edgelist_first + ); + */ + + + raft::print_device_vector("chunk_global_weak_edgelist_srcs", chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size(), std::cout); + raft::print_device_vector("chunk_global_weak_edgelist_dsts", chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size(), std::cout); + raft::print_device_vector("chunk_global_weak_edgelist_tags", chunk_global_weak_edgelist_tags.data(), chunk_global_weak_edgelist_tags.size(), std::cout); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + printf("\nnumber of weak edges before 'extract_q_idx_closing' = %d\n", chunk_global_weak_edgelist_dsts.size()); + auto& comm_ = handle.get_comms(); + auto const comm_rank_ = comm.get_rank(); // FIXME: for debugging, remove after + + printf("\nrank %d bucket_size = %d\n", comm_rank_, vertex_frontier.bucket(0).size()); + + //#if 0 + // FIXME: Might not even need the 'idx_closing' anymore - remove it + auto [q_closing, r_closing, p_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{ + major_weak_edgelist_dsts_tags_first, + major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_dsts.size(), + raft::device_span(chunk_major_weak_edgelist_srcs.data(), chunk_major_weak_edgelist_srcs.size()), + raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), // FIXME: Unused, remove afterwards + raft::device_span(chunk_global_weak_edgelist_tags.data(), chunk_global_weak_edgelist_tags.size()) + }, + true); + + //#if 0 + raft::print_device_vector("q_closing", q_closing.data(), q_closing.size(), std::cout); + raft::print_device_vector("idx_closing", idx_closing.data(), idx_closing.size(), std::cout); + + std::cout << "num_closing_edges = " << q_closing.size() << std::endl; + + // extract pair (p, r) + /* + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + + resize_dataframe_buffer(vertex_pair_buffer_p_r, + q_closing.size(), + handle.get_stream()); + /* + thrust::sort_by_key(handle.get_thrust_policy(), + chunk_global_weak_edgelist_tags.begin(), + chunk_global_weak_edgelist_tags.end(), + chunk_global_weak_edgelist_first + ); + */ + thrust::copy( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_for_p_r), - vertex_pair_buffer_q_r_for_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_for_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_for_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_for_p_r[i])); - }); - + thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), r_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_p_r).begin(), std::get<1>(vertex_pair_buffer_p_r).begin()) + ); + /* + auto closing_r_tag = thrust::make_zip_iterator(r_closing.begin(), idx_closing.begin()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + //weak_edgelist_first, // FIXME: might need to use + //major_weak_edgelist_first, + //major_weak_edgelist_first + chunk_major_weak_edgelist_tags.size() + //chunk_global_weak_edgelist_first, + major_weak_edgelist_first, + major_weak_edgelist_dsts_tags_first, + major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_tags.size(), + closing_r_tag, + raft::device_span(idx_closing.data(), + idx_closing.size()), + raft::device_span(chunk_global_weak_edgelist_tags.data(), // FIXME: Unused + chunk_global_weak_edgelist_tags.size()) + }); + */ + + //raft::print_device_vector("check_vertex_pair_buffer_p_r_tags", chunk_major_weak_edgelist_tags.data(), chunk_major_weak_edgelist_tags.size(), std::cout); + + //raft::print_device_vector("check_vertex_pair_buffer_p_r_srcs", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); + //raft::print_device_vector("check_vertex_pair_buffer_p_r_dsts", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); + + + // construct pair (p, q) + /* + auto vertex_pair_buffer_p_q_edge_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, + q_closing.size(), + handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), q_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin()) + ); + + /* + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_edge_p_r), + generate_p_q_q_r{ + //major_weak_edgelist_first, + chunk_global_weak_edgelist_first, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()), + raft::device_span(chunk_global_weak_edgelist_tags.data(), + chunk_global_weak_edgelist_tags.size()) + }); + */ + + + std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; + + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + + // construct pair (q, r) + /* + auto vertex_pair_buffer_q_r_edge_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + + printf("\nbefore resizing = %d, after resizing = %d\n", size_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r), q_closing.size()); + + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, + q_closing.size(), + handle.get_stream()); + /* + printf("\nDone resizing\n"); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_r), + generate_p_q_q_r{ + chunk_global_weak_edgelist_first, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + */ + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(q_closing.end(), r_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) + ); + + printf("\nDone generating 'p_q_q_r'\n"); + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_srcs", std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_dsts", std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); + + + + + + std::cout << "num (p, r) edges before removing = " << q_closing.size() << std::endl; + //raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); + //raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); + + // FIXME: Check if neccessary + /* + auto chunk_global_weak_edgelist_first = + thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); + */ + + // Resort the edges. + thrust::sort(handle.get_thrust_policy(), + chunk_global_weak_edgelist_first, + chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size()); + + printf("\nDone sorting\n"); + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), + raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), + //raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + //raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts() + ); + + std::cout << "1) num (p, r) edges after removing = " << num_edges_not_overcomp_p_q << std::endl; + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_srcs", std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_dsts", std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); + + //break; // FIXME: Break here ************** + std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + + //break; // FIXME: Break here ************** + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), + raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts()); + + std::cout << "2) num (p, r) edges after removing = " << num_edges_not_overcomp_q_r << std::endl; + + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + // Reconstruct (p, r) edges that didn't already have their count updated + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [ + vertex_pair_buffer_p_q_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + vertex_pair_buffer_q_r_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); + }); + + std::cout << "after removing overcompensating edges" << std::endl; + raft::print_device_vector("check_vertex_pair_buffer_p_r_srcs", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); + raft::print_device_vector("check_vertex_pair_buffer_p_r_dsts", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + //#endif + + } else { + + // FIXME: refactor SG to use r_closing + auto [q_closing, r_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + do_expensive_check); + + // extract pair (p, r) + /* + auto vertex_pair_buffer_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + resize_dataframe_buffer(vertex_pair_buffer_p_r, + q_closing.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + generate_p_r{ + weak_edgelist_first, + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + + // construct pair (p, q) + /* + auto vertex_pair_buffer_p_q_edge_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, + q_closing.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_q_edge_p_r), + generate_p_q_q_r{ + weak_edgelist_first, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + + std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + + // construct pair (q, r) + /* + auto vertex_pair_buffer_q_r_edge_p_r = + allocate_dataframe_buffer>(q_closing.size(), + handle.get_stream()); + */ + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, + q_closing.size(), + handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_r), + generate_p_q_q_r{ + weak_edgelist_first, + raft::device_span(q_closing.data(), + q_closing.size()), + raft::device_span(idx_closing.data(), + idx_closing.size()) + }); + + auto num_edges_not_overcomp_p_q = + remove_overcompensating_edges( + handle, + q_closing.size(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts()); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + std::cout << "Before removing overcompensating edges when unrolling (p, r) edges" << std::endl; + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); + + auto num_edges_not_overcomp_q_r = + remove_overcompensating_edges( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.vertex_partition_range_lasts()); + + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + // Reconstruct (p, r) edges that didn't already have their count updated. + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [ + vertex_pair_buffer_p_q_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + vertex_pair_buffer_q_r_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r) + ] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); + }); + + } if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_p_r_srcs, vertex_pair_buffer_p_r_dsts, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_r_srcs, vertex_pair_buffer_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, cur_graph_view, @@ -1335,22 +2241,27 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size()) ); + std::cout << "updating count_1" < vertex_pair_buffer_p_q_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_p_q_edge_p_r_dsts, vertex_pair_buffer_p_q_edge_p_r_srcs, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_q_edge_p_r_srcs, vertex_pair_buffer_p_q_edge_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - std::move(std::get<0>(vertex_pair_buffer_p_q_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>(vertex_pair_buffer_p_q_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<0>(vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>(vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency std::nullopt, std::nullopt, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); + + raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_srcs", vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_dsts", vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size(), std::cout); update_count( handle, @@ -1360,22 +2271,27 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size()) ); + std::cout << "updating count_2" < vertex_pair_buffer_q_r_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_q_r_edge_p_r_dsts, vertex_pair_buffer_q_r_edge_p_r_srcs, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_q_r_edge_p_r_srcs, vertex_pair_buffer_q_r_edge_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>(vertex_pair_buffer_q_r_for_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency std::nullopt, std::nullopt, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); + + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_srcs", vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size(), std::cout); + raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_dsts", vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size(), std::cout); update_count( handle, @@ -1384,6 +2300,8 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size()), raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size()) ); + + std::cout << "Done updating count_2" <( @@ -1398,19 +2316,34 @@ k_truss(raft::handle_t const& handle, handle, cur_graph_view, e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_for_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_for_p_r).size()) + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size()) ); update_count( handle, cur_graph_view, e_property_triangle_count, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_for_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_for_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_for_p_r).size()) + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size()) ); } + + + printf("\nafter unrolling (p, r) edges\n"); + auto [srcs_2, dsts_2, count_2] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + raft::print_device_vector("unrolled_srcs_2", srcs_2.data(), srcs_2.size(), std::cout); + raft::print_device_vector("unrolled_dsts_2", dsts_2.data(), dsts_2.size(), std::cout); + raft::print_device_vector("unrolled_n_tr_2", count_2.data(), count_2.size(), std::cout); + // Mask all the edges that have 0 count cugraph::transform_e( handle, @@ -1427,6 +2360,22 @@ k_truss(raft::handle_t const& handle, cur_graph_view.attach_edge_mask(edge_mask.view()); + auto [srcs_3, dsts_3, count_3] = extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), + e_property_triangle_count.view(), + extract_edges{}); + + + printf("\nafter removing edges with no count: num_edges = %d\n", srcs_3.size()); + + raft::print_device_vector("unrolled_srcs_3", srcs_3.data(), srcs_3.size(), std::cout); + raft::print_device_vector("unrolled_dsts_3", dsts_3.data(), dsts_3.size(), std::cout); + raft::print_device_vector("unrolled_n_tr_3", count_3.data(), count_3.size(), std::cout); + + /* auto [edgelist_srcs_, edgelist_dsts_, edgelist_wgts_, dummy_0, dummy_1] = decompress_to_edgelist( handle, @@ -1435,8 +2384,20 @@ k_truss(raft::handle_t const& handle, std::optional>{std::nullopt}, std::optional>{std::nullopt}, std::optional>(std::nullopt)); + */ + + //#endif // While loop } + + + + + + + + + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; diff --git a/cpp/src/community/k_truss_mg.cu b/cpp/src/community/k_truss_mg.cu index 048e3c34198..04845d5b73d 100644 --- a/cpp/src/community/k_truss_mg.cu +++ b/cpp/src/community/k_truss_mg.cu @@ -28,7 +28,7 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int32_t k, bool do_expensive_check); - +/* template std::tuple, rmm::device_uvector, std::optional>> @@ -73,5 +73,6 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int64_t k, bool do_expensive_check); +*/ } // namespace cugraph diff --git a/cpp/src/community/triangle_count_impl.cuh b/cpp/src/community/triangle_count_impl.cuh index 6346aef0736..8c9e4e040e2 100644 --- a/cpp/src/community/triangle_count_impl.cuh +++ b/cpp/src/community/triangle_count_impl.cuh @@ -423,7 +423,7 @@ void triangle_count(raft::handle_t const& handle, extract_low_to_high_degree_edges_t{}); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(edge_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 00f7bc6cbe7..4d4f0d3b9c7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -370,7 +370,7 @@ all_pairs_similarity(raft::handle_t const& handle, // shuffle vertex pairs auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts(); - std::tie(v1, v2, std::ignore, std::ignore, std::ignore) = + std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_vertex_pairs_with_values_by_gpu_id_impl( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -175,25 +176,27 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( handle.get_stream()); handle.sync_stream(); + std::vector rx_counts{}; + if (mem_frugal_flag) { // trade-off potential parallelism to lower peak memory - std::tie(majors, std::ignore) = + std::tie(majors, rx_counts) = shuffle_values(comm, majors.begin(), h_tx_value_counts, handle.get_stream()); - std::tie(minors, std::ignore) = + std::tie(minors, rx_counts) = shuffle_values(comm, minors.begin(), h_tx_value_counts, handle.get_stream()); if (weights) { - std::tie(weights, std::ignore) = + std::tie(weights, rx_counts) = shuffle_values(comm, (*weights).begin(), h_tx_value_counts, handle.get_stream()); } if (edge_ids) { - std::tie(edge_ids, std::ignore) = + std::tie(edge_ids, rx_counts) = shuffle_values(comm, (*edge_ids).begin(), h_tx_value_counts, handle.get_stream()); } if (edge_types) { - std::tie(edge_types, std::ignore) = + std::tie(edge_types, rx_counts) = shuffle_values(comm, (*edge_types).begin(), h_tx_value_counts, handle.get_stream()); } } else { @@ -201,7 +204,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( if (edge_ids) { if (edge_types) { std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids, edge_types), - std::ignore) = + rx_counts) = shuffle_values(comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), @@ -211,7 +214,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, weights, edge_ids), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator( majors.begin(), minors.begin(), weights->begin(), edge_ids->begin()), @@ -220,14 +223,14 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } } else { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, weights, edge_types), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, weights, edge_types), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator( majors.begin(), minors.begin(), weights->begin(), edge_types->begin()), h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, weights), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, weights), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), weights->begin()), h_tx_value_counts, @@ -237,7 +240,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } else { if (edge_ids) { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, edge_ids, edge_types), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors, edge_ids, edge_types), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator( @@ -245,7 +248,7 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors, edge_ids), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, edge_ids), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_ids->begin()), h_tx_value_counts, @@ -253,13 +256,13 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( } } else { if (edge_types) { - std::forward_as_tuple(std::tie(majors, minors, edge_types), std::ignore) = shuffle_values( + std::forward_as_tuple(std::tie(majors, minors, edge_types), rx_counts) = shuffle_values( comm, thrust::make_zip_iterator(majors.begin(), minors.begin(), edge_types->begin()), h_tx_value_counts, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(majors, minors), std::ignore) = + std::forward_as_tuple(std::tie(majors, minors), rx_counts) = shuffle_values(comm, thrust::make_zip_iterator(majors.begin(), minors.begin()), h_tx_value_counts, @@ -273,7 +276,8 @@ shuffle_vertex_pairs_with_values_by_gpu_id_impl( std::move(minors), std::move(weights), std::move(edge_ids), - std::move(edge_types)); + std::move(edge_types), + std::move(rx_counts)); } } // namespace @@ -285,7 +289,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -317,7 +322,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -360,7 +366,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -373,7 +380,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -386,7 +394,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -399,7 +408,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -412,7 +422,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -425,7 +436,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -438,7 +450,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -452,7 +465,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -466,7 +480,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -480,7 +495,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -494,7 +510,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -508,7 +525,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( raft::handle_t const& handle, rmm::device_uvector&& majors, @@ -525,7 +543,8 @@ std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& edge_srcs, rmm::device_uvector&& edge_dsts, @@ -553,7 +572,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -565,7 +585,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -577,7 +598,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -589,7 +611,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -601,7 +624,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, @@ -613,7 +637,8 @@ template std::tuple, rmm::device_uvector, std::optional>, std::optional>, - std::optional>> + std::optional>, + std::vector> shuffle_external_edges(raft::handle_t const& handle, rmm::device_uvector&& majors, rmm::device_uvector&& minors, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index feb8518420c..f7c92b0ae0c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -500,7 +500,7 @@ ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) ############################################################################################### # - K-truss tests -------------------------------------------------------------------------- -ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) +#ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) ################################################################################################### # - Triangle Count tests -------------------------------------------------------------------------- @@ -818,7 +818,7 @@ ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) ConfigureCTest(CAPI_COUNT_MULTI_EDGES c_api/count_multi_edges_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) -ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) +#ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) if (BUILD_CUGRAPH_MTMG_TESTS) ################################################################################################### diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index c8010422e42..5945c76bd27 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -244,13 +244,19 @@ class Tests_KTruss : public ::testing::TestWithParam{ weight_t{1e-3}, weight_t{(weight_t{1} / static_cast((h_cugraph_wgts).size())) * weight_t{1e-3}}}; + EXPECT_TRUE(std::equal((h_cugraph_wgts).begin(), (h_cugraph_wgts).end(), (*h_reference_wgts).begin(), @@ -261,14 +267,14 @@ class Tests_KTruss : public ::testing::TestWithParam; -using Tests_KTruss_Rmat = Tests_KTruss; +//using Tests_KTruss_Rmat = Tests_KTruss; TEST_P(Tests_KTruss_File, CheckInt32Int32Float) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam())); } - +#if 0 TEST_P(Tests_KTruss_File, CheckInt64Int64Float) { run_current_test( @@ -286,14 +292,25 @@ TEST_P(Tests_KTruss_Rmat, CheckInt64Int64Float) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } +#endif + +INSTANTIATE_TEST_SUITE_P( + simple_test, + Tests_KTruss_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(//KTruss_Usecase{4, true, true}, + KTruss_Usecase{4, true, true}), + ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets_.mtx")))); +#if 0 INSTANTIATE_TEST_SUITE_P( simple_test, Tests_KTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{5, true, false}, - KTruss_Usecase{4, true, false}, + ::testing::Values(KTruss_Usecase{5, true, true}, + KTruss_Usecase{4, true, true}, KTruss_Usecase{9, true, true}, KTruss_Usecase{7, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/netscience.mtx"), @@ -302,8 +319,8 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_KTruss_Rmat, // enable correctness checks - ::testing::Combine(::testing::Values(KTruss_Usecase{5, false, true}, - KTruss_Usecase{4, false, true}, + ::testing::Combine(::testing::Values(KTruss_Usecase{5, true, true}, + KTruss_Usecase{4, true, true}, KTruss_Usecase{9, true, true}, KTruss_Usecase{7, true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( @@ -319,7 +336,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(KTruss_Usecase{12, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(14, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{4, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(18, 16, 0.57, 0.19, 0.19, 0, true, false)))); +#endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp index 3a71f8ee221..c4e1c5954db 100644 --- a/cpp/tests/link_prediction/mg_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_similarity_test.cpp @@ -106,7 +106,7 @@ class Tests_MGSimilarity auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); - std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp index 3d891484818..9a4e901fe83 100644 --- a/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/mg_weighted_similarity_test.cpp @@ -108,7 +108,7 @@ class Tests_MGSimilarity auto d_v1 = cugraph::test::to_device(*handle_, h_v1); auto d_v2 = std::move(two_hop_nbrs); - std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore) = + std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index 75b711fbd9c..09955e876e1 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -150,6 +150,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::get<1>(mg_vertex_pair_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 48bbc6176d8..06a23880d81 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -175,6 +175,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection std::get<1>(mg_vertex_pair_buffer), std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp index 3d3d881fb23..b8ad06dd18b 100644 --- a/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp +++ b/cpp/tests/structure/mg_has_edge_and_compute_multiplicity_test.cpp @@ -123,6 +123,7 @@ class Tests_MGHasEdgeAndComputeMultiplicity store_transposed ? d_mg_edge_srcs : d_mg_edge_dsts, std::ignore, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 0f3224bfc52..bd570c9b037 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -302,6 +302,7 @@ class Rmat_Usecase : public detail::TranslateGraph_Usecase { store_transposed ? tmp_src_v : tmp_dst_v, tmp_weights_v, std::ignore, + std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, From 97609c4723f9f6d6e4f8bbf0671d233b0e408789 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 6 Jul 2024 18:20:18 -0700 Subject: [PATCH 60/93] update cmake --- cpp/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3e14cc9e5e0..c2e5e65a568 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -209,7 +209,7 @@ set(CUGRAPH_SOURCES src/community/legacy/ecg.cu src/community/egonet_sg.cu src/community/egonet_mg.cu - src/community/k_truss_sg.cu + #src/community/k_truss_sg.cu src/community/k_truss_mg.cu src/sampling/random_walks.cu src/sampling/random_walks_sg.cu @@ -390,7 +390,7 @@ add_library(cugraph_c src/c_api/eigenvector_centrality.cpp src/c_api/betweenness_centrality.cpp src/c_api/core_number.cpp - src/c_api/k_truss.cpp + #src/c_api/k_truss.cpp src/c_api/core_result.cpp src/c_api/extract_ego.cpp src/c_api/ecg.cpp From 09b0b96eca3ef6190cc98044de539d41c3cd7bc3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 6 Jul 2024 18:45:47 -0700 Subject: [PATCH 61/93] rename functor --- cpp/src/community/k_truss_impl.cuh | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index f01090f8da5..a357e774f0b 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -178,8 +178,7 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, set_b_query_edges), thrust::make_zip_iterator(set_a_query_edges + buffer_size, set_b_query_edges + buffer_size), - [num_weak_edges = set_c_weak_edges_dsts.size(), - set_c_weak_edges_first = + [set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()), set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end())] __device__(auto e) { @@ -189,9 +188,12 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); }; + /* auto itr = thrust::lower_bound( thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, potential_or_incoming_edge); - return (itr != set_c_weak_edges_last && *itr == potential_or_incoming_edge); + */ + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, potential_or_incoming_edge); }); auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, @@ -216,7 +218,7 @@ struct extract_weak_edges { }; template -struct extract_edges { +struct extract_edges_and_triangle_counts { __device__ thrust::optional> operator()( auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const @@ -1003,7 +1005,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - extract_edges{}); + extract_edges_and_triangle_counts{}); /* raft::print_device_vector("unrolled_srcs", srcs_0.data(), srcs_0.size(), std::cout); raft::print_device_vector("unrolled_dsts", dsts_0.data(), dsts_0.size(), std::cout); @@ -1545,7 +1547,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - extract_edges{}); + extract_edges_and_triangle_counts{}); /* raft::print_device_vector("unrolled_srcs", srcs_1.data(), srcs_1.size(), std::cout); @@ -2338,7 +2340,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - extract_edges{}); + extract_edges_and_triangle_counts{}); raft::print_device_vector("unrolled_srcs_2", srcs_2.data(), srcs_2.size(), std::cout); raft::print_device_vector("unrolled_dsts_2", dsts_2.data(), dsts_2.size(), std::cout); @@ -2366,7 +2368,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), e_property_triangle_count.view(), - extract_edges{}); + extract_edges_and_triangle_counts{}); printf("\nafter removing edges with no count: num_edges = %d\n", srcs_3.size()); From 8eec69166af6669834cac24dca611f0d2bb3b194 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 6 Jul 2024 19:00:32 -0700 Subject: [PATCH 62/93] rename variable --- cpp/src/community/k_truss_impl.cuh | 110 +++++++++++++++-------------- 1 file changed, 56 insertions(+), 54 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index a357e774f0b..c4d9a07c87c 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -482,42 +482,44 @@ struct generate_p_q_q_r { }; template -void update_count(raft::handle_t const& handle, +void decrease_triangle_count(raft::handle_t const& handle, graph_view_t & cur_graph_view, - edge_property_t, edge_t> & e_property_triangle_count, - raft::device_span vertex_pair_buffer_src, - raft::device_span vertex_pair_buffer_dst + edge_property_t, edge_t> & edge_triangle_counts, + raft::device_span edge_srcs, + raft::device_span edge_dsts ) { // Before updating the count, we need to clear the mask // cur_graph_view.clear_edge_mask(); - auto vertex_pair_buffer_begin = thrust::make_zip_iterator(vertex_pair_buffer_src.begin(), vertex_pair_buffer_dst.begin()); + auto vertex_pair_buffer_begin = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); thrust::sort(handle.get_thrust_policy(), vertex_pair_buffer_begin, - vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + vertex_pair_buffer_begin + edge_srcs.size()); auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), vertex_pair_buffer_begin, - vertex_pair_buffer_begin + vertex_pair_buffer_src.size()); + vertex_pair_buffer_begin + edge_srcs.size()); rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - rmm::device_uvector decrease_count_tmp(vertex_pair_buffer_src.size(), + /* + rmm::device_uvector decrease_count_tmp(edge_srcs.size(), handle.get_stream()); thrust::fill(handle.get_thrust_policy(), decrease_count_tmp.begin(), decrease_count_tmp.end(), size_t{1}); + */ auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( unique_pair_count, handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), vertex_pair_buffer_begin, - vertex_pair_buffer_begin + vertex_pair_buffer_src.size(), - decrease_count_tmp.begin(), + vertex_pair_buffer_begin + edge_srcs.size(), + thrust::make_constant_iterator(size_t{1}), get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); @@ -533,7 +535,7 @@ void update_count(raft::handle_t const& handle, edges_to_decrement_count, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), + edge_triangle_counts.view(), [ vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), @@ -548,7 +550,7 @@ void update_count(raft::handle_t const& handle, auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); return count - decrease_count[idx_pair]; }, - e_property_triangle_count.mutable_view(), + edge_triangle_counts.mutable_view(), true); // FIXME: set expensive check to False }; @@ -841,7 +843,7 @@ k_truss(raft::handle_t const& handle, edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - auto e_property_triangle_count = edge_triangle_count(handle, cur_graph_view); + auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view); cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); @@ -853,7 +855,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view, edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), + edge_triangle_counts.view(), extract_weak_edges{k}); @@ -966,26 +968,26 @@ k_truss(raft::handle_t const& handle, raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q_dsts", vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size(), std::cout); */ - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), // FIXME: Make sure multi_gpu is properly handles multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) // FIXME: Make sure multi_gpu is properly handles ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) ); @@ -1003,8 +1005,8 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), + //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), + edge_triangle_counts.view(), extract_edges_and_triangle_counts{}); /* raft::print_device_vector("unrolled_srcs", srcs_0.data(), srcs_0.size(), std::cout); @@ -1450,10 +1452,10 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_q_r_srcs.data(), vertex_pair_buffer_q_r_srcs.size()), raft::device_span(vertex_pair_buffer_q_r_dsts.data(), vertex_pair_buffer_q_r_dsts.size()) ); @@ -1475,10 +1477,10 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_p_q_edge_q_r_dsts.data(), vertex_pair_buffer_p_q_edge_q_r_dsts.size()), raft::device_span(vertex_pair_buffer_p_q_edge_q_r_srcs.data(), vertex_pair_buffer_p_q_edge_q_r_srcs.size()) ); @@ -1500,33 +1502,33 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_p_r_edge_q_r_dsts.data(), vertex_pair_buffer_p_r_edge_q_r_dsts.size()), raft::device_span(vertex_pair_buffer_p_r_edge_q_r_srcs.data(), vertex_pair_buffer_p_r_edge_q_r_srcs.size()) ); } else { - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size()) ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size()) ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size()) ); @@ -1545,8 +1547,8 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), + //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), + edge_triangle_counts.view(), extract_edges_and_triangle_counts{}); /* @@ -1644,7 +1646,7 @@ k_truss(raft::handle_t const& handle, }); #endif - //raft::print_device_vector("vertex_pair_buffer_src", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); + //raft::print_device_vector("edge_srcs", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); //raft::print_device_vector("vertex_pair_buffer_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); vertex_frontier_t vertex_frontier(handle, 1); @@ -2235,10 +2237,10 @@ k_truss(raft::handle_t const& handle, raft::print_device_vector("vertex_pair_buffer_p_r_srcs", vertex_pair_buffer_p_r_srcs.data(), vertex_pair_buffer_p_r_srcs.size(), std::cout); raft::print_device_vector("vertex_pair_buffer_p_r_dsts", vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size(), std::cout); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_p_r_srcs.data(), vertex_pair_buffer_p_r_srcs.size()), raft::device_span(vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size()) ); @@ -2265,10 +2267,10 @@ k_truss(raft::handle_t const& handle, raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_srcs", vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size(), std::cout); raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_dsts", vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size(), std::cout); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size()), raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size()) ); @@ -2295,10 +2297,10 @@ k_truss(raft::handle_t const& handle, raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_srcs", vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size(), std::cout); raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_dsts", vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size(), std::cout); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size()), raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size()) ); @@ -2306,26 +2308,26 @@ k_truss(raft::handle_t const& handle, std::cout << "Done updating count_2" <( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size()) ); - update_count( + decrease_triangle_count( handle, cur_graph_view, - e_property_triangle_count, + edge_triangle_counts, raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size()), raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size()) ); @@ -2338,8 +2340,8 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), + //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), + edge_triangle_counts.view(), extract_edges_and_triangle_counts{}); raft::print_device_vector("unrolled_srcs_2", srcs_2.data(), srcs_2.size(), std::cout); @@ -2352,7 +2354,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - e_property_triangle_count.view(), + edge_triangle_counts.view(), [] __device__( auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { return count != 0; @@ -2366,8 +2368,8 @@ k_truss(raft::handle_t const& handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(e_property_triangle_count.view(), modified_triangle_count.view()), - e_property_triangle_count.view(), + //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), + edge_triangle_counts.view(), extract_edges_and_triangle_counts{}); From f88f8b02231c067da6b606018d207371700c570e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 8 Jul 2024 04:34:29 -0700 Subject: [PATCH 63/93] rename variable --- cpp/src/community/k_truss_impl.cuh | 45 ++++++++++++++++-------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index c4d9a07c87c..b57cdccb9f4 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -491,15 +491,15 @@ void decrease_triangle_count(raft::handle_t const& handle, // Before updating the count, we need to clear the mask // cur_graph_view.clear_edge_mask(); - auto vertex_pair_buffer_begin = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); + auto edge_buffer_first = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); thrust::sort(handle.get_thrust_policy(), - vertex_pair_buffer_begin, - vertex_pair_buffer_begin + edge_srcs.size()); + edge_buffer_first, + edge_buffer_first + edge_srcs.size()); auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - vertex_pair_buffer_begin, - vertex_pair_buffer_begin + edge_srcs.size()); + edge_buffer_first, + edge_buffer_first + edge_srcs.size()); rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); @@ -517,8 +517,8 @@ void decrease_triangle_count(raft::handle_t const& handle, unique_pair_count, handle.get_stream()); thrust::reduce_by_key(handle.get_thrust_policy(), - vertex_pair_buffer_begin, - vertex_pair_buffer_begin + edge_srcs.size(), + edge_buffer_first, + edge_buffer_first + edge_srcs.size(), thrust::make_constant_iterator(size_t{1}), get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), @@ -537,17 +537,17 @@ void decrease_triangle_count(raft::handle_t const& handle, cugraph::edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), [ - vertex_pair_buffer_begin = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - vertex_pair_buffer_end = get_dataframe_buffer_end(vertex_pair_buffer_unique), + edge_buffer_first = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + edge_buffer_last = get_dataframe_buffer_end(vertex_pair_buffer_unique), decrease_count = decrease_count.data() ] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { auto e = thrust::make_tuple(src, dst); auto itr_pair = thrust::lower_bound( - thrust::seq, vertex_pair_buffer_begin, vertex_pair_buffer_end, e); + thrust::seq, edge_buffer_first, edge_buffer_last, e); - auto idx_pair = thrust::distance(vertex_pair_buffer_begin, itr_pair); + auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); return count - decrease_count[idx_pair]; }, edge_triangle_counts.mutable_view(), @@ -856,6 +856,7 @@ k_truss(raft::handle_t const& handle, edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), + // FIXME: Replace by lambda function extract_weak_edges{k}); @@ -871,7 +872,7 @@ k_truss(raft::handle_t const& handle, // Find intersection edges size_t prev_chunk_size = 0; - size_t chunk_num_weak_edges = weak_edgelist_srcs.size(); + size_t num_remaining_weak_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); @@ -895,7 +896,7 @@ k_truss(raft::handle_t const& handle, for (size_t i = 0; i < num_chunks; ++i) { printf("\n in for loop chunk, i = %d, num_chunks = %d\n", i, num_chunks); - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); + auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); //RAFT_CUDA_TRY(cudaDeviceSynchronize()); //printf("\ntracking hang\n"); @@ -993,9 +994,8 @@ k_truss(raft::handle_t const& handle, ); prev_chunk_size += chunk_size; - chunk_num_weak_edges -= chunk_size; + num_remaining_weak_edges -= chunk_size; - //#endif } //#if 0 @@ -1125,9 +1125,14 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), + // FIXME: Lambda function instead of functor extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + + + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); /* std::cout << "arrays to be shuffled" << std::endl; @@ -1225,7 +1230,7 @@ k_truss(raft::handle_t const& handle, weak_edgelist_first + renumbered_weak_edgelist_srcs.size()); } prev_chunk_size = 0; - chunk_num_weak_edges = weak_edgelist_size; + num_remaining_weak_edges = weak_edgelist_size; // FIXME: No need to recompute this. It's the same value as above when unrolling (p, q) edges num_chunks = @@ -1240,7 +1245,7 @@ k_truss(raft::handle_t const& handle, for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, chunk_num_weak_edges); + auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); // Find intersection of weak edges auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( @@ -1535,7 +1540,7 @@ k_truss(raft::handle_t const& handle, } prev_chunk_size += chunk_size; - chunk_num_weak_edges -= chunk_size; + num_remaining_weak_edges -= chunk_size; } @@ -1633,7 +1638,7 @@ k_truss(raft::handle_t const& handle, return thrust::make_tuple(p[idx], idx); }); } - //#endif + #if 0 thrust::tabulate( handle.get_thrust_policy(), @@ -2078,7 +2083,6 @@ k_truss(raft::handle_t const& handle, raft::print_device_vector("check_vertex_pair_buffer_p_r_dsts", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - //#endif } else { @@ -2390,7 +2394,6 @@ k_truss(raft::handle_t const& handle, std::optional>(std::nullopt)); */ - //#endif // While loop } From 463ad7c091cc67f172435557733c6acd7cb3c67f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 10 Jul 2024 07:53:44 -0700 Subject: [PATCH 64/93] remove unnecessary and unused variables --- cpp/src/community/k_truss_impl.cuh | 825 ++++++++--------------------- 1 file changed, 215 insertions(+), 610 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index b57cdccb9f4..c66661830ab 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -43,46 +43,59 @@ namespace cugraph { -template +template // difference something. edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, - EdgeIterator set_a_query_edges, // (p, q) edges + EdgeIterator set_a_query_edges, EdgeIterator set_b_query_edges, - // rename querry_edge_first - // rename querry_edge_last - raft::device_span set_c_weak_edges_srcs, // FIXME: rename this, no need for first + raft::device_span global_set_c_weak_edges_srcs, + raft::device_span global_set_c_weak_edges_dsts, + raft::device_span set_c_weak_edges_srcs, raft::device_span set_c_weak_edges_dsts, - std::vector vertex_partition_range_lasts) // FIXME: rename this + std::vector vertex_partition_range_lasts) { // To avoid over-compensating, check whether the 'potential_closing_edges' - // are within the weak edges. If yes, the was already unrolled + // are within the weak edges. If yes, those edges were already unrolled - // FIXME: thrust::set_difference for SG - // set_difference once for major or minor comm - // rename set_A_last and set B. finding the difference - // Make it more general, not k-truss oriented + if constexpr (global_weak) { + // FIXME: can use thrust::set_difference for SG + auto edges_not_overcomp = thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), + thrust::make_zip_iterator(set_a_query_edges + buffer_size, + set_b_query_edges + buffer_size), + [set_c_weak_edges_first = + thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), global_set_c_weak_edges_dsts.begin()), + set_c_weak_edges_last = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), + global_set_c_weak_edges_dsts.end())] __device__(auto e) { - rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); - rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); - std::vector rx_count{}; + auto set_a_query_edge = thrust::get<0>(e); + if constexpr (is_q_r_edge) { + set_a_query_edge = thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); + }; - if constexpr (multi_gpu) { + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edge); + }); - - // FIXME: Just zip src and dst to copy at once for the edges + auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), + edges_not_overcomp); + return dist; + } else { + rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); + rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); + std::vector rx_count{}; + thrust::copy(handle.get_thrust_policy(), set_a_query_edges, set_a_query_edges + buffer_size, thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - - - // group_by_count to get the destination of each edges std::tie(set_a_query_edges_srcs, set_a_query_edges_dsts, std::ignore, std::ignore, std::ignore, rx_count) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t - auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges - auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); + auto set_c_weak_edges_first = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), global_set_c_weak_edges_dsts.begin()); // setBedges + auto set_c_weak_edges_last = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), global_set_c_weak_edges_dsts.end()); auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); /* @@ -104,10 +117,10 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, ... ) */ - + // FIXME: Use thrust::transform instead thrust::tabulate( handle.get_thrust_policy(), - has_edge.begin(), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + has_edge.begin(), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation has_edge.end(), [ set_c_weak_edges_first, @@ -119,31 +132,32 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, }); - if (comm_rank == 1) { - raft::print_device_vector("has_edge_b_s_v", has_edge.data(), has_edge.size(), std::cout); - } std::tie(has_edge, std::ignore) = shuffle_values(handle.get_comms(), has_edge.begin(), rx_count, handle.get_stream()); + /* + auto edges_not_overcomp = thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), + thrust::make_zip_iterator(set_a_query_edges + buffer_size, + set_b_query_edges + buffer_size), + [has_edge = raft::device_span(has_edge.data(), has_edge.size()) + ] __device__(auto e) { - //if (comm_rank == 0) { - raft::print_device_vector("has_edge_a_s_v", has_edge.data(), has_edge.size(), std::cout); - //} - - - - if (comm_rank == 1) { - - raft::print_device_vector("set_c_weak_edges_srcs", set_c_weak_edges_srcs.data(), set_c_weak_edges_srcs.size(), std::cout); - raft::print_device_vector("set_c_weak_edges_dsts", set_c_weak_edges_dsts.data(), set_c_weak_edges_dsts.size(), std::cout); + auto set_a_query_edge = thrust::get<0>(e); + if constexpr (is_q_r_edge) { + set_a_query_edge = thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); - raft::print_device_vector("set_a_query_edges_srcs", set_a_query_edges_srcs.data(), set_a_query_edges_srcs.size(), std::cout); - raft::print_device_vector("set_a_query_edges_dsts", set_a_query_edges_dsts.data(), set_a_query_edges_dsts.size(), std::cout); - } + }; - // FIXME: thrust::remove_if (resize). No need for sort_by_key and upper_bound + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edge); + }); + */ + // FIXME: use thrust::remove_if (resize). No need for sort_by_key and upper_bound thrust::sort_by_key(handle.get_thrust_policy(), has_edge.begin(), has_edge.end(), @@ -159,49 +173,9 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - //printf("\nnumber of potential weak edges = %d\n", has_edge.size()); - auto dist = thrust::distance(has_edge.begin(), itr); // FIXME: Check whether -1 is necessary - - printf("\ndistance = %d\n", dist); - + auto dist = thrust::distance(has_edge.begin(), itr); return dist; - //return 0; - - - - - - } else { - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), - thrust::make_zip_iterator(set_a_query_edges + buffer_size, - set_b_query_edges + buffer_size), - [set_c_weak_edges_first = - thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()), - set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), - set_c_weak_edges_dsts.end())] __device__(auto e) { - auto potential_edge = thrust::get<0>(e); - auto potential_or_incoming_edge = thrust::make_tuple(thrust::get<0>(potential_edge), thrust::get<1>(potential_edge)); - if constexpr (is_q_r_edge) { - potential_or_incoming_edge = thrust::make_tuple(thrust::get<1>(potential_edge), thrust::get<0>(potential_edge)); - }; - - /* - auto itr = thrust::lower_bound( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, potential_or_incoming_edge); - */ - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, potential_or_incoming_edge); - }); - auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), - edges_not_overcomp); - - printf("\nlegacy - distance = %d\n", dist); - return dist; } } @@ -230,20 +204,20 @@ struct extract_edges_and_triangle_counts { template struct extract_edges_to_q_r { - raft::device_span vertex_q_r{}; + raft::device_span vertex_q_r_set{}; __device__ thrust::optional> operator()( auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - auto itr_src = thrust::find( - thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), src); + auto has_src = thrust::binary_search( + thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), src); - auto itr_dst = thrust::find( - thrust::seq, vertex_q_r.begin(), vertex_q_r.end(), dst); + auto has_dst = thrust::binary_search( + thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), dst); - if (itr_src != vertex_q_r.end() && *itr_src == src) { + if (has_src) { return thrust::optional>{thrust::make_tuple(src, dst)}; - } else if (itr_dst != vertex_q_r.end() && *itr_dst == dst) { + } else if (has_dst) { return thrust::optional>{thrust::make_tuple(src, dst)}; } else { return thrust::nullopt; @@ -349,8 +323,6 @@ struct extract_q_idx_closing { EdgeIterator major_weak_edgelist_dsts_tag_first{}; EdgeIterator major_weak_edgelist_dsts_tag_last{}; raft::device_span major_weak_edgelist_srcs{}; - raft::device_span weak_edgelist_dsts{}; - raft::device_span weak_edgelist_tags{}; // FIXME: keep this when performing chunking return_type __device__ operator()(thrust::tuple tagged_src, vertex_t dst, @@ -567,10 +539,6 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //printf("\nin 'accumulate_triangles_p_q_or_q_r' and size = %d\n", weak_edgelist_srcs.size()); - - // Call nbr_intersection unroll (p, q) and (q, r) edges auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, graph_view, @@ -581,8 +549,6 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, //do_expensive_check : FIXME true); - //std::cout<< "The intersection size for (p, q) or (q, r) edges = " << intersection_indices.size() << std::endl; - // Generate (p, q) edges auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>(intersection_indices.size(), handle.get_stream()); @@ -849,7 +815,6 @@ k_truss(raft::handle_t const& handle, cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); while (true) { - // extract the edges that have counts less than k - 2. Those edges will be unrolled auto [weak_edgelist_srcs, weak_edgelist_dsts] = extract_transform_e(handle, cur_graph_view, @@ -869,7 +834,7 @@ k_truss(raft::handle_t const& handle, thrust::sort(handle.get_thrust_policy(), weak_edgelist_first, weak_edgelist_first + weak_edgelist_srcs.size()); - + // Find intersection edges size_t prev_chunk_size = 0; size_t num_remaining_weak_edges = weak_edgelist_srcs.size(); @@ -883,29 +848,9 @@ k_truss(raft::handle_t const& handle, if constexpr (multi_gpu) { num_chunks = host_scalar_allreduce(handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); } - - printf("\nnum_chunks = %d\n", num_chunks); - - // FIXME: In case some ranks have no weak edges to process - // Or simply in the for loop set i <= 0 but need to make sure the chunking process is not broken - /* - if (num_chunks == 0) { - num_chunks = 1; - } - */ - + for (size_t i = 0; i < num_chunks; ++i) { - printf("\n in for loop chunk, i = %d, num_chunks = %d\n", i, num_chunks); auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); - //RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //printf("\ntracking hang\n"); - - //auto x = raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()); - //auto x = weak_edgelist_srcs.size(); - //RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //printf("\nafter tracking hang, size = %d\n", x); - - //#if 0 auto [vertex_pair_buffer_p_q, vertex_pair_buffer_p_r_edge_p_q, vertex_pair_buffer_q_r_edge_p_q] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, cur_graph_view, @@ -914,13 +859,6 @@ k_truss(raft::handle_t const& handle, prev_chunk_size, chunk_size, do_expensive_check); - - //#if 0 - //raft::print_device_vector("vertex_pair_buffer_p_q_srcs", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_p_q_dsts", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - - //raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_srcs", std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_dsts", std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size(), std::cout); rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); @@ -955,19 +893,6 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); } - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - /* - std::cout << "Unrolling (p, q) edges" << std::endl; - - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q", std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size(), std::cout); - printf("\n"); - raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_srcs", vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_r_edge_p_q_dsts", vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size(), std::cout); - printf("\n"); - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q_srcs", vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_q_dsts", vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size(), std::cout); - */ decrease_triangle_count( handle, @@ -997,27 +922,8 @@ k_truss(raft::handle_t const& handle, num_remaining_weak_edges -= chunk_size; } - - //#if 0 - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //printf("\nafter unrolling (p, q) edges\n"); - auto [srcs_0, dsts_0, count_0] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), - edge_triangle_counts.view(), - extract_edges_and_triangle_counts{}); - /* - raft::print_device_vector("unrolled_srcs", srcs_0.data(), srcs_0.size(), std::cout); - raft::print_device_vector("unrolled_dsts", dsts_0.data(), dsts_0.size(), std::cout); - raft::print_device_vector("unrolled_n_tr", count_0.data(), count_0.size(), std::cout); - */ - - - //#if 0 - // Iterate over unique vertices that appear as either q or r + // Iterate over unique weak edges' endpoints that appear as either q or r rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); @@ -1053,110 +959,75 @@ k_truss(raft::handle_t const& handle, unique_weak_edgelist_srcs.resize(num_unique_weak_edgelist_srcs, handle.get_stream()); unique_weak_edgelist_dsts.resize(num_unique_weak_edgelist_dsts, handle.get_stream()); - rmm::device_uvector vertex_q_r(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); + // Create a vertex set composed of edge endpoints that are either in the q or r set + rmm::device_uvector vertex_q_r_set(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); auto vertex_q_r_end = thrust::set_union(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end(), unique_weak_edgelist_dsts.begin(), unique_weak_edgelist_dsts.end(), - vertex_q_r.begin()); + vertex_q_r_set.begin()); - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), vertex_q_r_end), handle.get_stream()); + vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), vertex_q_r_end), handle.get_stream()); - thrust::sort(handle.get_thrust_policy(), vertex_q_r.begin(), vertex_q_r.end()); + thrust::sort(handle.get_thrust_policy(), vertex_q_r_set.begin(), vertex_q_r_set.end()); auto weak_unique_v_end = thrust::unique( handle.get_thrust_policy(), - vertex_q_r.begin(), - vertex_q_r.end()); + vertex_q_r_set.begin(), + vertex_q_r_set.end()); - vertex_q_r.resize(thrust::distance(vertex_q_r.begin(), weak_unique_v_end), handle.get_stream()); - - // FIXME: perform all to all 'vertex_q_r'. ******************************** - // FIXME: Might not be able to perform this in chunk for MG - // e.g: giving 4 weak edges where 2 belongs to the same triangle. If we were to process - // each of these edges in different batches, we might not be able to find a triangle - // Need a view of the whole graph to find triangles. But why was batching working in SG with - // nbr_intersection in case 1 ?(because I already have a view of the whole graph). For - // case 2 we can still process the edges in batches as long as we already created the full csc graph - // Isn't better to just create the csc graph with all edges at this point. - // Cannot create partial CSR, need the full one. Can we create ehe CSC in chunks by adding set of - // edges at a time? + vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); if constexpr (multi_gpu) { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); // FIXME: for debugging - // Get global weak_edgelist - // FIXME: Perform all-to-all in chunks - auto global_vertex_q_r = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(vertex_q_r.data(), vertex_q_r.size())); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + // Perform all-to-all in chunks across minor comm + auto minor_vertex_q_r_set = cugraph::detail::device_allgatherv( + handle, minor_comm, raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); - thrust::sort(handle.get_thrust_policy(), global_vertex_q_r.begin(), global_vertex_q_r.end()); + thrust::sort(handle.get_thrust_policy(), minor_vertex_q_r_set.begin(), minor_vertex_q_r_set.end()); weak_unique_v_end = thrust::unique( handle.get_thrust_policy(), - global_vertex_q_r.begin(), - global_vertex_q_r.end()); + minor_vertex_q_r_set.begin(), + minor_vertex_q_r_set.end()); - global_vertex_q_r.resize(thrust::distance(global_vertex_q_r.begin(), weak_unique_v_end), handle.get_stream()); + minor_vertex_q_r_set.resize(thrust::distance(minor_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); - //raft::print_device_vector("1_global_vertex_q_r", global_vertex_q_r.data(), global_vertex_q_r.size(), std::cout); - - // FIXME: Can be very expensive and increase peak memory - vertex_q_r.resize(global_vertex_q_r.size(), handle.get_stream()); + vertex_q_r_set.resize(minor_vertex_q_r_set.size(), handle.get_stream()); thrust::copy( handle.get_thrust_policy(), - global_vertex_q_r.begin(), - global_vertex_q_r.end(), - vertex_q_r.begin()); + minor_vertex_q_r_set.begin(), + minor_vertex_q_r_set.end(), + vertex_q_r_set.begin()); } - //raft::print_device_vector("2_vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); - weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //std::cout << "before extracting edges" << std::endl; - auto [srcs_to_q_r, dsts_to_q_r] = extract_transform_e(handle, + auto [srcs_in_q_r_set, dsts_in_q_r_set] = extract_transform_e(handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), // FIXME: Lambda function instead of functor - extract_edges_to_q_r{raft::device_span(vertex_q_r.data(), vertex_q_r.size())}); + extract_edges_to_q_r{raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())}); - - - - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - /* - std::cout << "arrays to be shuffled" << std::endl; - raft::print_device_vector("vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); - //raft::print_device_vector("vertex_q_r", vertex_q_r.data(), vertex_q_r.size(), std::cout); - raft::print_device_vector("srcs_to_q_r", srcs_to_q_r.data(), srcs_to_q_r.size(), std::cout); - raft::print_device_vector("dsts_to_q_r", dsts_to_q_r.data(), dsts_to_q_r.size(), std::cout); - */ if constexpr (multi_gpu) { - std::tie(dsts_to_q_r, srcs_to_q_r, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(dsts_in_q_r_set, srcs_in_q_r_set, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( handle, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), + std::move(dsts_in_q_r_set), + std::move(srcs_in_q_r_set), std::nullopt, std::nullopt, std::nullopt); } - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - //printf("\ndone shuffling\n"); std::optional> graph_q_r{std::nullopt}; std::optional> renumber_map_q_r{std::nullopt}; @@ -1164,8 +1035,8 @@ k_truss(raft::handle_t const& handle, create_graph_from_edgelist( handle, std::nullopt, - std::move(dsts_to_q_r), - std::move(srcs_to_q_r), + std::move(dsts_in_q_r_set), + std::move(srcs_in_q_r_set), std::nullopt, std::nullopt, std::nullopt, @@ -1231,40 +1102,24 @@ k_truss(raft::handle_t const& handle, } prev_chunk_size = 0; num_remaining_weak_edges = weak_edgelist_size; - - // FIXME: No need to recompute this. It's the same value as above when unrolling (p, q) edges - num_chunks = - raft::div_rounding_up_safe(weak_edgelist_size, edges_to_intersect_per_iteration); - if (num_chunks == 0) { - num_chunks = 1; + if constexpr (multi_gpu) { + num_chunks = host_scalar_allreduce(handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); } - - //auto sorted_weak_edgelist_srcs = thrust::get<0>(weak_edgelist_first.get_iterator_tuple()); // FIXME: Remove this - //auto sorted_weak_edgelist_dsts = thrust::get<1>(weak_edgelist_first.get_iterator_tuple()); - for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); - // Find intersection of weak edges auto [vertex_pair_buffer_q_r, vertex_pair_buffer_p_q_edge_q_r, vertex_pair_buffer_p_r_edge_q_r] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( handle, csc_q_r_graph_view, - //raft::device_span(sorted_weak_edgelist_srcs, weak_edgelist_size), - //raft::device_span(sorted_weak_edgelist_dsts, weak_edgelist_size), raft::device_span(renumbered_weak_edgelist_srcs.data(), renumbered_weak_edgelist_srcs.size()), raft::device_span(renumbered_weak_edgelist_dsts.data(), renumbered_weak_edgelist_dsts.size()), prev_chunk_size, chunk_size, do_expensive_check); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); if constexpr (multi_gpu) { - // Unrenumber auto vertex_partition_range_lasts = std::make_optional>( csc_q_r_graph_view.vertex_partition_range_lasts()); @@ -1300,12 +1155,6 @@ k_truss(raft::handle_t const& handle, (*renumber_map_q_r).data(), *vertex_partition_range_lasts, true); - - //printf("\ndebugging (q, r) edges unrolling\n"); - - //raft::print_device_vector("vertex_pair_buffer_q_r_srcs_b_u", std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_q_r_dsts_b_u", std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size(), std::cout); - //printf("(q, r) edge size = %d\n", std::get<0>(vertex_pair_buffer_q_r).size()); unrenumber_int_vertices(handle, std::get<0>(vertex_pair_buffer_q_r).data(), @@ -1322,63 +1171,55 @@ k_truss(raft::handle_t const& handle, true); } - - //printf("\ndebugging (q, r) edges unrolling\n"); - //raft::print_device_vector("vertex_pair_buffer_q_r_srcs_", std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_q_r_dsts_", std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size(), std::cout); - edge_t num_edges_not_overcomp = 0; if constexpr (multi_gpu) { // Get global weak edges - // FIXME: Retrieve onlu a fraction of the weak edges. - auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); // FIXME: for debugging + // Get global weak_edgelist - // FIXME: Perform all-to-all in chunks - auto chunk_global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + // FIXME: This operation is too expensive (memory) hence shuffle the weak edges instead to + // the appropriate GPU, check for existance as being part of the weak edge list and shuffle + // the result back. The operation below is only meant for validation purposes and should be + // remove once the statement is validated. + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto chunk_global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - //raft::print_device_vector("chunk_global_weak_edgelist_srcs", chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size(), std::cout); - //raft::print_device_vector("chunk_global_weak_edgelist_dsts", chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size(), std::cout); - + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); // Sort the weak edges if they are not already auto chunk_global_weak_edgelist_first = - thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); + thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size()); - + chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); num_edges_not_overcomp = remove_overcompensating_edges( handle, size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), - raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), + raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), cur_graph_view.vertex_partition_range_lasts() ); - - //std::cout << "num (q, r) edges after removing = " << num_edges_not_overcomp << std::endl; - //printf("\n\n"); + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - // resize initial (q, r) edges + // Resize initial (q, r) edges // Note: Once chunking is implemented, reconstruct the (q, r) edges only outside // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle // of the chunk's 'for loop' @@ -1397,8 +1238,6 @@ k_truss(raft::handle_t const& handle, return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); }); - - } else { num_edges_not_overcomp = @@ -1406,7 +1245,8 @@ k_truss(raft::handle_t const& handle, edge_t, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), true, - false // FIXME: Set it to False for now + multi_gpu, + false >( handle, size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), @@ -1414,11 +1254,11 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.vertex_partition_range_lasts() + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.vertex_partition_range_lasts() // Not needed for SG ); - //std::cout << "num (q, r) edges after removing = " << num_edges_not_overcomp << std::endl; - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); @@ -1439,6 +1279,11 @@ k_truss(raft::handle_t const& handle, }); } + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); + if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); @@ -1545,9 +1390,6 @@ k_truss(raft::handle_t const& handle, } - - - //printf("\nafter unrolling (q, r) edges\n"); auto [srcs_1, dsts_1, count_1] = extract_transform_e(handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), @@ -1556,28 +1398,11 @@ k_truss(raft::handle_t const& handle, edge_triangle_counts.view(), extract_edges_and_triangle_counts{}); - /* - raft::print_device_vector("unrolled_srcs", srcs_1.data(), srcs_1.size(), std::cout); - raft::print_device_vector("unrolled_dsts", dsts_1.data(), dsts_1.size(), std::cout); - raft::print_device_vector("unrolled_n_tr", count_1.data(), count_1.size(), std::cout); - printf("\n"); - */ - - /* - std::cout<< "before zipping edgelist" << std::endl; - raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); - raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); - */ weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - /* - std::cout<< "after zipping edgelist" << std::endl; - raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); - raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); - */ - //#if 0 + // Unrolling p, r edges - // create pair weak_src, weak_edge_idx + // create pair weak_src, weak_edge_idx (unique) // create a dataframe buffer of size weak_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs // with a vector counting from 0 .. @@ -1585,62 +1410,43 @@ k_truss(raft::handle_t const& handle, auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>(weak_edgelist_srcs.size(), handle.get_stream()); - //#if 0 - if constexpr (multi_gpu) { - std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; - rmm::device_uvector num_weak_edges(1, handle.get_stream()); - raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); - - auto& comm = handle.get_comms(); - auto comm_rank = comm.get_rank(); - // Get global weak_edgelist - auto global_num_weak_edges = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(num_weak_edges.data(), num_weak_edges.size())); - - rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); - thrust::inclusive_scan(handle.get_thrust_policy(), - global_num_weak_edges.begin(), - global_num_weak_edges.end(), - prefix_sum_global_num_weak_edges.begin()); - - /* - std::cout << "weak_edge_list size = " << weak_edgelist_srcs.size() << std::endl; - raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); - raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); - raft::print_device_vector("weak_edgelist_srcs", weak_edgelist_srcs.data(), weak_edgelist_srcs.size(), std::cout); - raft::print_device_vector("weak_edgelist_dsts", weak_edgelist_dsts.data(), weak_edgelist_dsts.size(), std::cout); - */ - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [rank = comm_rank, - num_weak_edges = prefix_sum_global_num_weak_edges.begin(), - p = weak_edgelist_srcs.begin()] __device__(auto idx) { - if (rank != 0) { - auto idx_tag = idx + (num_weak_edges[rank - 1]); - return thrust::make_tuple(p[idx], idx_tag); - } - - return thrust::make_tuple(p[idx], idx); - }); - - } else { - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [ - p = weak_edgelist_srcs.begin() - ] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); - } + if constexpr (multi_gpu) { + std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; + rmm::device_uvector num_weak_edges(1, handle.get_stream()); - #if 0 - thrust::tabulate( + raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); + + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + // Get global weak_edgelist + auto global_num_weak_edges = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(num_weak_edges.data(), num_weak_edges.size())); + + rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + global_num_weak_edges.begin(), + global_num_weak_edges.end(), + prefix_sum_global_num_weak_edges.begin()); + + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [rank = comm_rank, + num_weak_edges = prefix_sum_global_num_weak_edges.begin(), + p = weak_edgelist_srcs.begin()] __device__(auto idx) { + if (rank != 0) { + auto idx_tag = idx + (num_weak_edges[rank - 1]); + return thrust::make_tuple(p[idx], idx_tag); + } + + return thrust::make_tuple(p[idx], idx); + }); + + } else { + thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), get_dataframe_buffer_end(vertex_pair_buffer_p_tag), @@ -1649,10 +1455,7 @@ k_truss(raft::handle_t const& handle, ] __device__(auto idx) { return thrust::make_tuple(p[idx], idx); }); - #endif - - //raft::print_device_vector("edge_srcs", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); - //raft::print_device_vector("vertex_pair_buffer_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); + } vertex_frontier_t vertex_frontier(handle, 1); rmm::device_uvector tag_cpy(std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); @@ -1661,13 +1464,8 @@ k_truss(raft::handle_t const& handle, std::get<1>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).end(), tag_cpy.begin()); - //std::cout << "emptying the vertex frontier" << std::endl; if constexpr (multi_gpu) { - //printf("\nbefore shuffling\n"); - //raft::print_device_vector("b_vertex_pair_buffer_src", std::get<0>(vertex_pair_buffer_p_tag).data(), std::get<0>(vertex_pair_buffer_p_tag).size(), std::cout); - //raft::print_device_vector("b_vertex_pair_buffer_tag", std::get<1>(vertex_pair_buffer_p_tag).data(), std::get<1>(vertex_pair_buffer_p_tag).size(), std::cout); - // Shuffle vertices auto [p_vrtx, p_tag] = detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( @@ -1676,36 +1474,23 @@ k_truss(raft::handle_t const& handle, std::move(std::get<1>(vertex_pair_buffer_p_tag)), cur_graph_view.vertex_partition_range_lasts()); - //printf("\nafter shuffling\n"); - raft::print_device_vector("a_vertex_pair_buffer_src", p_vrtx.data(), p_vrtx.size(), std::cout); - raft::print_device_vector("a_vertex_pair_buffer_tag", p_tag.data(), p_tag.size(), std::cout); - vertex_frontier.bucket(0).insert( thrust::make_zip_iterator(p_vrtx.begin(), p_tag.begin()), - //thrust::make_zip_iterator(p_vrtx.begin() + 1, p_tag.begin() + 1) - //thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()) thrust::make_zip_iterator(p_vrtx.end(), p_tag.end()) ); } else { vertex_frontier.bucket(0).insert( thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), - //thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin() + 10, std::get<1>(vertex_pair_buffer_p_tag).begin() + 10) thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end()) ); } - - rmm::device_uvector q(0, handle.get_stream()); rmm::device_uvector idx(0, handle.get_stream()); + auto& comm = handle.get_comms(); // FIXME: remove after debugging - //auto [q, idx] = - auto& comm = handle.get_comms(); - auto comm_rank = comm.get_rank(); - //if (comm_rank == 1) { - printf("\nbefore calling 'extract_transform_v_frontier_outgoing_e'\n"); std::tie(q, idx) = cugraph::extract_transform_v_frontier_outgoing_e( handle, @@ -1716,12 +1501,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dummy_property_t{}.view(), extract_q_idx{}, true); - - std::cout << "initial q's size = " << q.size() << std::endl; - - raft::print_device_vector("q", q.data(), q.size(), std::cout); - raft::print_device_vector("i", idx.data(), q.size(), std::cout); - + vertex_frontier.bucket(0).clear(); // Shuffle vertices @@ -1734,11 +1514,8 @@ k_truss(raft::handle_t const& handle, vertex_frontier.bucket(0).insert( thrust::make_zip_iterator(q.begin(), idx.begin()), - //thrust::make_zip_iterator(q.begin() + 1, idx.begin() + 1) thrust::make_zip_iterator(q.end(), idx.end()) ); - //} - auto vertex_pair_buffer_p_r = allocate_dataframe_buffer>(0, @@ -1758,81 +1535,30 @@ k_truss(raft::handle_t const& handle, // back in with the chunk global weak edgelist if constexpr (multi_gpu) { - - // Get global weak edges - // FIXME: Retrieve onlu a fraction of the weak edges. - - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); // FIXME: for debugging - - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto chunk_major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // Get minor weak edges + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto minor_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, minor_comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); // FIXME: Perform all-to-all in chunks - auto chunk_major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + auto minor_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, minor_comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - auto chunk_major_weak_edgelist_tags = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); + auto minor_weak_edgelist_tags = cugraph::detail::device_allgatherv( + handle, minor_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); auto major_weak_edgelist_first = - thrust::make_zip_iterator(chunk_major_weak_edgelist_srcs.begin(), chunk_major_weak_edgelist_dsts.begin()); + thrust::make_zip_iterator(minor_weak_edgelist_srcs.begin(), minor_weak_edgelist_dsts.begin()); auto major_weak_edgelist_dsts_tags_first = - thrust::make_zip_iterator(chunk_major_weak_edgelist_dsts.begin(), chunk_major_weak_edgelist_tags.begin()); + thrust::make_zip_iterator(minor_weak_edgelist_dsts.begin(), minor_weak_edgelist_tags.begin()); thrust::sort_by_key(handle.get_thrust_policy(), major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_dsts.size(), - chunk_major_weak_edgelist_srcs.begin() + major_weak_edgelist_dsts_tags_first + minor_weak_edgelist_dsts.size(), + minor_weak_edgelist_srcs.begin() ); - // Get global weak_edgelist - // FIXME: Perform all-to-all in chunks - auto chunk_global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto chunk_global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - raft::print_device_vector("weak_edgelist_tags", tag_cpy.data(), tag_cpy.size(), std::cout); - auto chunk_global_weak_edgelist_tags = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); - - - - // Sort the weak edges if they are not already - auto chunk_global_weak_edgelist_first = - thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); - /* - thrust::sort_by_key(handle.get_thrust_policy(), - chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size(), - chunk_global_weak_edgelist_tags.begin()); - */ - - /* - thrust::sort_by_key(handle.get_thrust_policy(), - chunk_global_weak_edgelist_tags.begin(), - chunk_global_weak_edgelist_tags.end(), - chunk_global_weak_edgelist_first - ); - */ - - - raft::print_device_vector("chunk_global_weak_edgelist_srcs", chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size(), std::cout); - raft::print_device_vector("chunk_global_weak_edgelist_dsts", chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size(), std::cout); - raft::print_device_vector("chunk_global_weak_edgelist_tags", chunk_global_weak_edgelist_tags.data(), chunk_global_weak_edgelist_tags.size(), std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - printf("\nnumber of weak edges before 'extract_q_idx_closing' = %d\n", chunk_global_weak_edgelist_dsts.size()); - auto& comm_ = handle.get_comms(); - auto const comm_rank_ = comm.get_rank(); // FIXME: for debugging, remove after - - printf("\nrank %d bucket_size = %d\n", comm_rank_, vertex_frontier.bucket(0).size()); - - //#if 0 - // FIXME: Might not even need the 'idx_closing' anymore - remove it + // FIXME: 'idx_closing' no longer needed - remove it auto [q_closing, r_closing, p_closing, idx_closing] = cugraph::extract_transform_v_frontier_outgoing_e( handle, @@ -1841,38 +1567,18 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ + extract_q_idx_closing{ major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_dsts.size(), - raft::device_span(chunk_major_weak_edgelist_srcs.data(), chunk_major_weak_edgelist_srcs.size()), - raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), // FIXME: Unused, remove afterwards - raft::device_span(chunk_global_weak_edgelist_tags.data(), chunk_global_weak_edgelist_tags.size()) + major_weak_edgelist_dsts_tags_first + minor_weak_edgelist_dsts.size(), + raft::device_span(minor_weak_edgelist_srcs.data(), minor_weak_edgelist_srcs.size()), }, true); - //#if 0 - raft::print_device_vector("q_closing", q_closing.data(), q_closing.size(), std::cout); - raft::print_device_vector("idx_closing", idx_closing.data(), idx_closing.size(), std::cout); - - std::cout << "num_closing_edges = " << q_closing.size() << std::endl; - - // extract pair (p, r) - /* - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); - /* - thrust::sort_by_key(handle.get_thrust_policy(), - chunk_global_weak_edgelist_tags.begin(), - chunk_global_weak_edgelist_tags.end(), - chunk_global_weak_edgelist_first - ); - */ + thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), @@ -1880,41 +1586,7 @@ k_truss(raft::handle_t const& handle, thrust::make_zip_iterator( std::get<0>(vertex_pair_buffer_p_r).begin(), std::get<1>(vertex_pair_buffer_p_r).begin()) ); - /* - auto closing_r_tag = thrust::make_zip_iterator(r_closing.begin(), idx_closing.begin()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - //weak_edgelist_first, // FIXME: might need to use - //major_weak_edgelist_first, - //major_weak_edgelist_first + chunk_major_weak_edgelist_tags.size() - //chunk_global_weak_edgelist_first, - major_weak_edgelist_first, - major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + chunk_major_weak_edgelist_tags.size(), - closing_r_tag, - raft::device_span(idx_closing.data(), - idx_closing.size()), - raft::device_span(chunk_global_weak_edgelist_tags.data(), // FIXME: Unused - chunk_global_weak_edgelist_tags.size()) - }); - */ - - //raft::print_device_vector("check_vertex_pair_buffer_p_r_tags", chunk_major_weak_edgelist_tags.data(), chunk_major_weak_edgelist_tags.size(), std::cout); - - //raft::print_device_vector("check_vertex_pair_buffer_p_r_srcs", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); - //raft::print_device_vector("check_vertex_pair_buffer_p_r_dsts", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); - - // construct pair (p, q) - /* - auto vertex_pair_buffer_p_q_edge_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, q_closing.size(), handle.get_stream()); @@ -1927,55 +1599,10 @@ k_truss(raft::handle_t const& handle, std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin()) ); - /* - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_edge_p_r), - generate_p_q_q_r{ - //major_weak_edgelist_first, - chunk_global_weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()), - raft::device_span(chunk_global_weak_edgelist_tags.data(), - chunk_global_weak_edgelist_tags.size()) - }); - */ - - - std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; - - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - - // construct pair (q, r) - /* - auto vertex_pair_buffer_q_r_edge_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ - - printf("\nbefore resizing = %d, after resizing = %d\n", size_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r), q_closing.size()); - + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, q_closing.size(), handle.get_stream()); - /* - printf("\nDone resizing\n"); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_r), - generate_p_q_q_r{ - chunk_global_weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); - */ thrust::copy( handle.get_thrust_policy(), @@ -1984,83 +1611,61 @@ k_truss(raft::handle_t const& handle, thrust::make_zip_iterator( std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) ); - - printf("\nDone generating 'p_q_q_r'\n"); - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_srcs", std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_dsts", std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); - - - + auto& comm = handle.get_comms(); // FIXME: Only using global comm for testing purposes + // Get global weak_edgelist + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - std::cout << "num (p, r) edges before removing = " << q_closing.size() << std::endl; - //raft::print_device_vector("sorted_weak_edgelist_srcs", sorted_weak_edgelist_srcs, weak_edgelist_size, std::cout); - //raft::print_device_vector("sorted_weak_edgelist_dsts", sorted_weak_edgelist_dsts, weak_edgelist_size, std::cout); - - // FIXME: Check if neccessary - /* + // Sort the weak edges if they are not already auto chunk_global_weak_edgelist_first = - thrust::make_zip_iterator(chunk_global_weak_edgelist_srcs.begin(), chunk_global_weak_edgelist_dsts.begin()); - */ + thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); - // Resort the edges. thrust::sort(handle.get_thrust_policy(), chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + chunk_global_weak_edgelist_srcs.size()); - - printf("\nDone sorting\n"); + chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges( handle, q_closing.size(), get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), - raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), - //raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - //raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), cur_graph_view.vertex_partition_range_lasts() ); - std::cout << "1) num (p, r) edges after removing = " << num_edges_not_overcomp_p_q << std::endl; - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_srcs", std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_for_p_r_dsts", std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size(), std::cout); - - //break; // FIXME: Break here ************** - std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - - //break; // FIXME: Break here ************** auto num_edges_not_overcomp_q_r = remove_overcompensating_edges( handle, num_edges_not_overcomp_p_q, get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(chunk_global_weak_edgelist_srcs.data(), chunk_global_weak_edgelist_srcs.size()), - raft::device_span(chunk_global_weak_edgelist_dsts.data(), chunk_global_weak_edgelist_dsts.size()), + raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), cur_graph_view.vertex_partition_range_lasts()); - std::cout << "2) num (p, r) edges after removing = " << num_edges_not_overcomp_q_r << std::endl; - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); @@ -2078,12 +1683,6 @@ k_truss(raft::handle_t const& handle, return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); }); - std::cout << "after removing overcompensating edges" << std::endl; - raft::print_device_vector("check_vertex_pair_buffer_p_r_srcs", std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size(), std::cout); - raft::print_device_vector("check_vertex_pair_buffer_p_r_dsts", std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - } else { // FIXME: refactor SG to use r_closing @@ -2168,7 +1767,8 @@ k_truss(raft::handle_t const& handle, edge_t, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, - false // FIXME: Set it to False for now + multi_gpu, + false >( handle, q_closing.size(), @@ -2176,6 +1776,8 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes cur_graph_view.vertex_partition_range_lasts()); resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); @@ -2190,7 +1792,8 @@ k_truss(raft::handle_t const& handle, edge_t, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, - false // FIXME: Set it to False for now + multi_gpu, + false >( handle, num_edges_not_overcomp_p_q, @@ -2198,6 +1801,8 @@ k_truss(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes cur_graph_view.vertex_partition_range_lasts()); resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); From 05873bc88f6d67a4b5b13302e26be76d84c83dab Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 10 Jul 2024 15:22:57 -0700 Subject: [PATCH 65/93] update thrust function used --- cpp/CMakeLists.txt | 2 +- cpp/src/community/k_truss_impl.cuh | 345 ++++++--------------------- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/community/k_truss_test.cpp | 2 +- 4 files changed, 79 insertions(+), 272 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a7096f3f722..64a5fb0f17b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -284,7 +284,7 @@ set(CUGRAPH_SOURCES src/community/egonet_mg_v32_e32.cu src/community/egonet_mg_v32_e64.cu #src/community/k_truss_sg_v64_e64.cu - #src/community/k_truss_sg_v32_e32.cu + src/community/k_truss_sg_v32_e32.cu #src/community/k_truss_sg_v32_e64.cu #src/community/k_truss_mg_v64_e64.cu src/community/k_truss_mg_v32_e32.cu diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index c66661830ab..93d83480df6 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -44,7 +44,6 @@ namespace cugraph { template -// difference something. edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, EdgeIterator set_a_query_edges, @@ -111,16 +110,10 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, auto set_c_weak_edges_last = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), global_set_c_weak_edges_dsts.end()); auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); - /* - thrust::transform( - handle.get_thrust_policy() - ... - ) - */ - // FIXME: Use thrust::transform instead + // FIXME: Was recommended to use thrust::transform instead but how ? thrust::tabulate( handle.get_thrust_policy(), - has_edge.begin(), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation + has_edge.begin(), has_edge.end(), [ set_c_weak_edges_first, @@ -130,50 +123,33 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, return thrust::binary_search( thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); }); - - std::tie(has_edge, std::ignore) = shuffle_values(handle.get_comms(), has_edge.begin(), rx_count, handle.get_stream()); - /* + auto set_a_and_b_query_edges_first = thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); + auto set_a_and_b_query_edges_last = thrust::make_zip_iterator( + set_a_query_edges + buffer_size, set_b_query_edges + buffer_size); + auto edges_not_overcomp = thrust::remove_if( handle.get_thrust_policy(), - thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), - thrust::make_zip_iterator(set_a_query_edges + buffer_size, - set_b_query_edges + buffer_size), - [has_edge = raft::device_span(has_edge.data(), has_edge.size()) - ] __device__(auto e) { - - auto set_a_query_edge = thrust::get<0>(e); - if constexpr (is_q_r_edge) { - set_a_query_edge = thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); - - }; - - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edge); + set_a_and_b_query_edges_first, + set_a_and_b_query_edges_last, + [ + set_a_and_b_query_edges_first, + set_a_and_b_query_edges_last, + has_edge = raft::device_span(has_edge.data(), has_edge.size()) + ] __device__(auto pair_set) { + auto itr = thrust::lower_bound( + thrust::seq, set_a_and_b_query_edges_first, set_a_and_b_query_edges_last, pair_set); + + auto idx = thrust::distance(set_a_and_b_query_edges_first, itr); + return has_edge[idx]; + }); - */ - // FIXME: use thrust::remove_if (resize). No need for sort_by_key and upper_bound - thrust::sort_by_key(handle.get_thrust_policy(), - has_edge.begin(), - has_edge.end(), - thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges) - ); - - //raft::print_device_vector("sorted_has_edge_a_s_v", has_edge.data(), has_edge.size(), std::cout); - - // thrust upper_bound 0 - auto itr = thrust::upper_bound( - handle.get_thrust_policy(), has_edge.begin(), has_edge.end(), vertex_t{0}); - - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - - auto dist = thrust::distance(has_edge.begin(), itr); + auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, + set_b_query_edges), edges_not_overcomp); return dist; } @@ -306,13 +282,6 @@ struct extract_q_idx { thrust::nullopt_t, thrust::nullopt_t) const { - - //printf("\nsrc = %d, dst = %d, idx = %d\n", thrust::get<0>(tagged_src), dst, thrust::get<1>(tagged_src)); - /* - if (thrust::get<0>(tagged_src) == 3) { - printf("\nsrc = 3, dst = %d, idx = %d\n", dst, thrust::get<1>(tagged_src)); - } - */ return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); } }; @@ -342,14 +311,6 @@ struct extract_q_idx_closing { return (itr != major_weak_edgelist_dsts_tag_last && *itr == thrust::make_tuple(dst, thrust::get<1>(tagged_src))) ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, major_weak_edgelist_srcs[idx], thrust::get<1>(tagged_src))) : thrust::nullopt; - - - /* - edge_t idx = thrust::get<1>(tagged_src); - return dst == weak_edgelist_dsts[idx] - ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), idx)) - : thrust::nullopt; - */ } }; @@ -383,31 +344,11 @@ struct generate_p_r { __device__ thrust::tuple operator()(edge_t i) const { - // FIXME: When performing chunking, run binary search on the idx of the weak - // edges for multi-GPU. similar to the example in 'extract_q_idx_closing'. - // 1) Have the pair(weak_edges, tag) sorted by (tag) - // 2) Perform a binary search on the tag to find if the weak edge exist in the selected - // chunk - // 3) if it exists, return a pair, otherwise, return a thrust::nullopt - - /* - auto itr = thrust::lower_bound( - thrust::seq, chunk_global_weak_edgelist_tags.begin(), chunk_global_weak_edgelist_tags.end(), weak_edge_idx[i]); - */ - auto itr = thrust::lower_bound( thrust::seq, weak_edge_dst_tag_first, weak_edge_dst_tag_last, closing_r_tag[i]); - auto idx = thrust::distance(weak_edge_dst_tag_first, itr); - return *(weak_edge_first + idx); - - - - - //return *(weak_edge + weak_edge_idx[i]); - } }; @@ -420,36 +361,12 @@ struct generate_p_q_q_r { __device__ thrust::tuple operator()(edge_t i) const { - // FIXME: When performing chunking, run binary search on the idx of the weak - // edges for multi-GPU. similar to the example in 'extract_q_idx_closing'. - // 1) Have the pair(weak_edges, tag) sorted by (tag) - // 2) Perform a binary search on the tag to find if the weak edge exist in the selected - // chunk - // 3) if it exists, return a pair, otherwise, return a thrust::nullopt - - /* - auto itr = thrust::lower_bound( - thrust::seq, chunk_global_weak_edgelist_tags.begin(), chunk_global_weak_edgelist_tags.end(), weak_edge_idx[i]); - - auto idx = thrust::distance(chunk_global_weak_edgelist_tags.begin(), itr); - - if constexpr (generate_p_q) { - return thrust::make_tuple(thrust::get<0>(*(weak_edge + idx)), q_closing[i]); - // FIXME: If single GPU, return this. Add template for multi_gpu - } else { - return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + idx))); - } - */ - - if constexpr (generate_p_q) { return thrust::make_tuple(thrust::get<0>(*(weak_edge + weak_edge_idx[i])), q_closing[i]); } else { return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + weak_edge_idx[i]))); } - - } }; @@ -462,7 +379,6 @@ void decrease_triangle_count(raft::handle_t const& handle, ) { // Before updating the count, we need to clear the mask - // cur_graph_view.clear_edge_mask(); auto edge_buffer_first = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); thrust::sort(handle.get_thrust_policy(), @@ -474,16 +390,6 @@ void decrease_triangle_count(raft::handle_t const& handle, edge_buffer_first + edge_srcs.size()); rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - - /* - rmm::device_uvector decrease_count_tmp(edge_srcs.size(), - handle.get_stream()); - - thrust::fill(handle.get_thrust_policy(), - decrease_count_tmp.begin(), - decrease_count_tmp.end(), - size_t{1}); - */ auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( unique_pair_count, handle.get_stream()); @@ -1169,11 +1075,6 @@ k_truss(raft::handle_t const& handle, (*renumber_map_q_r).data(), *vertex_partition_range_lasts, true); - } - - edge_t num_edges_not_overcomp = 0; - - if constexpr (multi_gpu) { // Get global weak edges auto& comm = handle.get_comms(); @@ -1197,7 +1098,7 @@ k_truss(raft::handle_t const& handle, chunk_global_weak_edgelist_first, chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - num_edges_not_overcomp = + auto num_edges_not_overcomp = remove_overcompensating_edges vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - if constexpr (multi_gpu) { + if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_dsts(0, handle.get_stream()); @@ -1389,14 +1290,6 @@ k_truss(raft::handle_t const& handle, } - - auto [srcs_1, dsts_1, count_1] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), - edge_triangle_counts.view(), - extract_edges_and_triangle_counts{}); weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); @@ -1458,14 +1351,17 @@ k_truss(raft::handle_t const& handle, } vertex_frontier_t vertex_frontier(handle, 1); - rmm::device_uvector tag_cpy(std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); - thrust::copy( + rmm::device_uvector tag_cpy(0, handle.get_stream()); + + if constexpr (multi_gpu) { + tag_cpy.resize( + std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); + // Need a copy before shuffling the original tag + thrust::copy( handle.get_thrust_policy(), std::get<1>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).end(), tag_cpy.begin()); - - if constexpr (multi_gpu) { // Shuffle vertices auto [p_vrtx, p_tag] = detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( @@ -1547,7 +1443,7 @@ k_truss(raft::handle_t const& handle, handle, minor_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); auto major_weak_edgelist_first = - thrust::make_zip_iterator(minor_weak_edgelist_srcs.begin(), minor_weak_edgelist_dsts.begin()); + thrust::make_zip_iterator(minor_weak_edgelist_srcs.begin(), minor_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused auto major_weak_edgelist_dsts_tags_first = thrust::make_zip_iterator(minor_weak_edgelist_dsts.begin(), minor_weak_edgelist_tags.begin()); @@ -1639,8 +1535,8 @@ k_truss(raft::handle_t const& handle, >( handle, q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // cannot be a copy, needs to be the original one so overcompensatiing edges can be removed + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // cannot be a copy, needs to be the original one so overcompensatiing edges can be removed raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), @@ -1686,7 +1582,10 @@ k_truss(raft::handle_t const& handle, } else { // FIXME: refactor SG to use r_closing - auto [q_closing, r_closing, idx_closing] = + auto weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( + weak_edgelist_dsts.begin(), std::get<1>(vertex_pair_buffer_p_tag).begin() + ); + auto [q_closing, r_closing, p_closing, idx_closing] = cugraph::extract_transform_v_frontier_outgoing_e( handle, cur_graph_view, @@ -1694,73 +1593,51 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())}, + extract_q_idx_closing{ + weak_edgelist_dsts_tags_first, + weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + }, do_expensive_check); + // FIXME: Move the 3 copies to a function as it is also performed for MG // extract pair (p, r) - /* - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - generate_p_r{ - weak_edgelist_first, - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), r_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_p_r).begin(), std::get<1>(vertex_pair_buffer_p_r).begin()) + ); - // construct pair (p, q) - /* - auto vertex_pair_buffer_p_q_edge_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, - q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_q_edge_p_r), - generate_p_q_q_r{ - weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + q_closing.size(), + handle.get_stream()); - std::cout << "Before remove overcompensating edges when unrolling (p, r) edges" << std::endl; - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_srcs", std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - - // construct pair (q, r) - /* - auto vertex_pair_buffer_q_r_edge_p_r = - allocate_dataframe_buffer>(q_closing.size(), - handle.get_stream()); - */ + // extract pair (p, q) + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), q_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin()) + ); + + // extract pair (q, r) resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, - q_closing.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_r), - generate_p_q_q_r{ - weak_edgelist_first, - raft::device_span(q_closing.data(), - q_closing.size()), - raft::device_span(idx_closing.data(), - idx_closing.size()) - }); + q_closing.size(), + handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(q_closing.end(), r_closing.end()), + thrust::make_zip_iterator( + std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) + ); auto num_edges_not_overcomp_p_q = remove_overcompensating_edges(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_for_p_r_dsts", std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size(), std::cout); - auto num_edges_not_overcomp_q_r = remove_overcompensating_edges( handle, cur_graph_view, @@ -1854,8 +1723,6 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size()) ); - std::cout << "updating count_1" < vertex_pair_buffer_p_q_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_dsts(0, handle.get_stream()); @@ -1873,9 +1740,6 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_srcs", vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_p_q_edge_p_r_dsts", vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size(), std::cout); - decrease_triangle_count( handle, cur_graph_view, @@ -1884,8 +1748,6 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size()) ); - std::cout << "updating count_2" < vertex_pair_buffer_q_r_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_dsts(0, handle.get_stream()); @@ -1902,9 +1764,6 @@ k_truss(raft::handle_t const& handle, std::nullopt, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); - - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_srcs", vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size(), std::cout); - raft::print_device_vector("vertex_pair_buffer_q_r_edge_p_r_dsts", vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size(), std::cout); decrease_triangle_count( handle, @@ -1913,8 +1772,6 @@ k_truss(raft::handle_t const& handle, raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size()), raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size()) ); - - std::cout << "Done updating count_2" <( @@ -1942,21 +1799,6 @@ k_truss(raft::handle_t const& handle, ); } - - - printf("\nafter unrolling (p, r) edges\n"); - auto [srcs_2, dsts_2, count_2] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), - edge_triangle_counts.view(), - extract_edges_and_triangle_counts{}); - - raft::print_device_vector("unrolled_srcs_2", srcs_2.data(), srcs_2.size(), std::cout); - raft::print_device_vector("unrolled_dsts_2", dsts_2.data(), dsts_2.size(), std::cout); - raft::print_device_vector("unrolled_n_tr_2", count_2.data(), count_2.size(), std::cout); - // Mask all the edges that have 0 count cugraph::transform_e( handle, @@ -1972,48 +1814,12 @@ k_truss(raft::handle_t const& handle, false); cur_graph_view.attach_edge_mask(edge_mask.view()); - - auto [srcs_3, dsts_3, count_3] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - //view_concat(edge_triangle_counts.view(), modified_triangle_count.view()), - edge_triangle_counts.view(), - extract_edges_and_triangle_counts{}); - - - printf("\nafter removing edges with no count: num_edges = %d\n", srcs_3.size()); - - raft::print_device_vector("unrolled_srcs_3", srcs_3.data(), srcs_3.size(), std::cout); - raft::print_device_vector("unrolled_dsts_3", dsts_3.data(), dsts_3.size(), std::cout); - raft::print_device_vector("unrolled_n_tr_3", count_3.data(), count_3.size(), std::cout); - - /* - auto [edgelist_srcs_, edgelist_dsts_, edgelist_wgts_, dummy_0, dummy_1] = - decompress_to_edgelist( - handle, - cur_graph_view, - edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, - std::optional>{std::nullopt}, - std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - */ - } - - - - - - - - - rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; - #if 0 + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts, std::ignore, std::ignore) = decompress_to_edgelist( handle, @@ -2022,6 +1828,8 @@ k_truss(raft::handle_t const& handle, std::optional>{std::nullopt}, std::optional>{std::nullopt}, std::optional>(std::nullopt)); + + std::cout << "k_truss num_edges = " << edgelist_srcs.size() << std::endl; std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, @@ -2030,7 +1838,6 @@ k_truss(raft::handle_t const& handle, std::move(edgelist_wgts), false); - #endif return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d60a2ef45d6..97e09b607aa 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -505,7 +505,7 @@ ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) ############################################################################################### # - K-truss tests -------------------------------------------------------------------------- -#ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) +ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) ################################################################################################### # - Triangle Count tests -------------------------------------------------------------------------- diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index 2d33210514e..1896474062a 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -302,7 +302,7 @@ INSTANTIATE_TEST_SUITE_P( // enable correctness checks ::testing::Values(//KTruss_Usecase{4, true, true}, KTruss_Usecase{4, true, true}), - ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets_.mtx")))); + ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx")))); #if 0 INSTANTIATE_TEST_SUITE_P( From 83bca54111174e99a280a2ac257df8e0921fdeaf Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 10 Jul 2024 15:29:09 -0700 Subject: [PATCH 66/93] update fixme --- cpp/src/community/k_truss_impl.cuh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 93d83480df6..908bf0c5e02 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -1104,7 +1104,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), true, multi_gpu, - true // FIXME: Use global weak edges for validation purposes + true // FIXME: Currently using global weak edges for validation purposes >( handle, size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), @@ -1531,7 +1531,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - true // FIXME: Use global weak edges for validation purposes + true // FIXME: Currently using global weak edges for validation purposes >( handle, q_closing.size(), @@ -1550,7 +1550,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - true // FIXME: Use global weak edges for validation purposes + true // FIXME: Currently using global weak edges for validation purposes >( handle, num_edges_not_overcomp_p_q, From f4c2fcf5747f9412fdd48777ca34101dd41ec4e1 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 11 Jul 2024 13:10:15 -0700 Subject: [PATCH 67/93] fix bug by using major comms --- cpp/src/community/k_truss_impl.cuh | 48 +++++++++++++++--------------- 1 file changed, 24 insertions(+), 24 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 908bf0c5e02..ad9cab80c84 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -887,26 +887,26 @@ k_truss(raft::handle_t const& handle, vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); if constexpr (multi_gpu) { - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); // Perform all-to-all in chunks across minor comm - auto minor_vertex_q_r_set = cugraph::detail::device_allgatherv( - handle, minor_comm, raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); + auto major_vertex_q_r_set = cugraph::detail::device_allgatherv( + handle, major_comm, raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); - thrust::sort(handle.get_thrust_policy(), minor_vertex_q_r_set.begin(), minor_vertex_q_r_set.end()); + thrust::sort(handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); weak_unique_v_end = thrust::unique( handle.get_thrust_policy(), - minor_vertex_q_r_set.begin(), - minor_vertex_q_r_set.end()); + major_vertex_q_r_set.begin(), + major_vertex_q_r_set.end()); - minor_vertex_q_r_set.resize(thrust::distance(minor_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); + major_vertex_q_r_set.resize(thrust::distance(major_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); - vertex_q_r_set.resize(minor_vertex_q_r_set.size(), handle.get_stream()); + vertex_q_r_set.resize(major_vertex_q_r_set.size(), handle.get_stream()); thrust::copy( handle.get_thrust_policy(), - minor_vertex_q_r_set.begin(), - minor_vertex_q_r_set.end(), + major_vertex_q_r_set.begin(), + major_vertex_q_r_set.end(), vertex_q_r_set.begin()); } @@ -1432,26 +1432,26 @@ k_truss(raft::handle_t const& handle, if constexpr (multi_gpu) { // Get minor weak edges - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto minor_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, minor_comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, major_comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); // FIXME: Perform all-to-all in chunks - auto minor_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, minor_comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + auto major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, major_comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - auto minor_weak_edgelist_tags = cugraph::detail::device_allgatherv( - handle, minor_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); + auto major_weak_edgelist_tags = cugraph::detail::device_allgatherv( + handle, major_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); auto major_weak_edgelist_first = - thrust::make_zip_iterator(minor_weak_edgelist_srcs.begin(), minor_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused + thrust::make_zip_iterator(major_weak_edgelist_srcs.begin(), major_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused auto major_weak_edgelist_dsts_tags_first = - thrust::make_zip_iterator(minor_weak_edgelist_dsts.begin(), minor_weak_edgelist_tags.begin()); + thrust::make_zip_iterator(major_weak_edgelist_dsts.begin(), major_weak_edgelist_tags.begin()); thrust::sort_by_key(handle.get_thrust_policy(), major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + minor_weak_edgelist_dsts.size(), - minor_weak_edgelist_srcs.begin() + major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), + major_weak_edgelist_srcs.begin() ); // FIXME: 'idx_closing' no longer needed - remove it @@ -1465,8 +1465,8 @@ k_truss(raft::handle_t const& handle, cugraph::edge_dummy_property_t{}.view(), extract_q_idx_closing{ major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + minor_weak_edgelist_dsts.size(), - raft::device_span(minor_weak_edgelist_srcs.data(), minor_weak_edgelist_srcs.size()), + major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), + raft::device_span(major_weak_edgelist_srcs.data(), major_weak_edgelist_srcs.size()), }, true); @@ -1843,4 +1843,4 @@ k_truss(raft::handle_t const& handle, } } -} // namespace cugraph +} // namespace cugraph \ No newline at end of file From d956d2252c73ee0e1b9bc3e0660739a0d05f4027 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 11 Jul 2024 13:12:54 -0700 Subject: [PATCH 68/93] fix typo --- cpp/src/community/k_truss_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index ad9cab80c84..696d79e8dc6 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -1431,7 +1431,7 @@ k_truss(raft::handle_t const& handle, // back in with the chunk global weak edgelist if constexpr (multi_gpu) { - // Get minor weak edges + // Get major weak edges auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); auto major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( handle, major_comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); From dc6136b7188ea24184f006201a62e645ff80dd80 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 12 Jul 2024 20:31:15 -0700 Subject: [PATCH 69/93] unroll edges without using global comms --- cpp/src/community/k_truss_impl.cuh | 107 ++++++++++++++++++++++++----- 1 file changed, 90 insertions(+), 17 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 696d79e8dc6..cc6099df2ae 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -52,6 +52,7 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, raft::device_span global_set_c_weak_edges_dsts, raft::device_span set_c_weak_edges_srcs, raft::device_span set_c_weak_edges_dsts, + vertex_t number_of_local_edge_partitions, std::vector vertex_partition_range_lasts) { @@ -86,9 +87,13 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, edges_not_overcomp); return dist; } else { + + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); - std::vector rx_count{}; + std::vector rx_counts{}; thrust::copy(handle.get_thrust_policy(), set_a_query_edges, @@ -96,7 +101,7 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); // group_by_count to get the destination of each edges - std::tie(set_a_query_edges_srcs, set_a_query_edges_dsts, std::ignore, std::ignore, std::ignore, rx_count) = + std::tie(set_a_query_edges_srcs, set_a_query_edges_dsts, std::ignore, std::ignore, std::ignore, rx_counts) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t - auto set_c_weak_edges_first = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), global_set_c_weak_edges_dsts.begin()); // setBedges - auto set_c_weak_edges_last = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), global_set_c_weak_edges_dsts.end()); + auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges + auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); - // FIXME: Was recommended to use thrust::transform instead but how ? + // FIXME: Use thrust::transform instead thrust::tabulate( handle.get_thrust_policy(), has_edge.begin(), @@ -124,32 +129,80 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); }); + //auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), + handle.get_stream()); + + raft::update_device(d_vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.data(), + vertex_partition_range_lasts.size(), + handle.get_stream()); + + auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + comm_size, + major_comm_size, + minor_comm_size}; + + auto d_tx_counts = cugraph::groupby_and_count( + thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()), + thrust::make_zip_iterator(set_a_query_edges_srcs.end(), set_a_query_edges_dsts.end()), + [func, major_comm_size]__device__(auto val) { + return func(val) % major_comm_size; + }, + major_comm_size, + std::numeric_limits::max(), + handle.get_stream()); + + std::vector h_tx_counts{d_tx_counts.size()}; + + raft::update_host(h_tx_counts.data(), + d_tx_counts.data(), + d_tx_counts.size(), + handle.get_stream()); + std::tie(has_edge, std::ignore) = - shuffle_values(handle.get_comms(), has_edge.begin(), rx_count, handle.get_stream()); + shuffle_values(handle.get_comms(), has_edge.begin(), h_tx_counts, handle.get_stream()); auto set_a_and_b_query_edges_first = thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); auto set_a_and_b_query_edges_last = thrust::make_zip_iterator( set_a_query_edges + buffer_size, set_b_query_edges + buffer_size); + + thrust::sort_by_key(handle.get_thrust_policy(), + set_a_query_edges, + set_a_query_edges + buffer_size, + thrust::make_zip_iterator(set_b_query_edges, has_edge.begin()) + ); + + + auto edges_not_overcomp = thrust::remove_if( handle.get_thrust_policy(), set_a_and_b_query_edges_first, set_a_and_b_query_edges_last, [ - set_a_and_b_query_edges_first, - set_a_and_b_query_edges_last, + set_a_query_edges, + buffer_size, has_edge = raft::device_span(has_edge.data(), has_edge.size()) ] __device__(auto pair_set) { + //auto set_a_query_edge = thrust::get<0>(pair_set) auto itr = thrust::lower_bound( - thrust::seq, set_a_and_b_query_edges_first, set_a_and_b_query_edges_last, pair_set); + thrust::seq, set_a_query_edges, set_a_query_edges + buffer_size, thrust::get<0>(pair_set)); - auto idx = thrust::distance(set_a_and_b_query_edges_first, itr); + auto idx = thrust::distance(set_a_query_edges, itr); return has_edge[idx]; }); - auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), edges_not_overcomp); + auto dist = thrust::distance(set_a_and_b_query_edges_first, edges_not_overcomp); return dist; } @@ -167,6 +220,16 @@ struct extract_weak_edges { } }; +template +struct extract_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* + __device__ thrust::optional> operator()( + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const + { + return thrust::make_tuple(src, dst, count); + } +}; + template struct extract_edges_and_triangle_counts { __device__ thrust::optional> operator()( @@ -890,7 +953,7 @@ k_truss(raft::handle_t const& handle, auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); // Perform all-to-all in chunks across minor comm auto major_vertex_q_r_set = cugraph::detail::device_allgatherv( - handle, major_comm, raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); + handle, handle.get_comms(), raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); thrust::sort(handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); @@ -1114,6 +1177,7 @@ k_truss(raft::handle_t const& handle, raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts() ); @@ -1147,7 +1211,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), true, multi_gpu, - false + true >( handle, size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), @@ -1157,6 +1221,7 @@ k_truss(raft::handle_t const& handle, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts() // Not needed for SG ); @@ -1524,7 +1589,7 @@ k_truss(raft::handle_t const& handle, thrust::sort(handle.get_thrust_policy(), chunk_global_weak_edgelist_first, chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts() ); + + // FIXME: No need to resize the dataframes buffer now. + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); auto num_edges_not_overcomp_q_r = remove_overcompensating_edges(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts()); resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); @@ -1645,7 +1716,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - false + true >( handle, q_closing.size(), @@ -1655,6 +1726,7 @@ k_truss(raft::handle_t const& handle, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts()); resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); @@ -1666,7 +1738,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - false + true >( handle, num_edges_not_overcomp_p_q, @@ -1676,6 +1748,7 @@ k_truss(raft::handle_t const& handle, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), cur_graph_view.vertex_partition_range_lasts()); resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); From a83971d8803cda8fae3af99a3e0e5e4c2ae12e6f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 15 Jul 2024 00:57:01 -0700 Subject: [PATCH 70/93] decompress the edges with the renumber_map --- cpp/src/community/k_truss_impl.cuh | 215 +++++++++++++++-------------- 1 file changed, 108 insertions(+), 107 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index cc6099df2ae..1db5a0fb7d5 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -464,7 +464,7 @@ void decrease_triangle_count(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - + cugraph::edge_bucket_t edges_to_decrement_count(handle); edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), @@ -636,6 +636,7 @@ k_truss(raft::handle_t const& handle, } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core + #if 0 { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -692,6 +693,7 @@ k_truss(raft::handle_t const& handle, } renumber_map = std::move(tmp_renumber_map); } + #endif // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -793,7 +795,7 @@ k_truss(raft::handle_t const& handle, // FIXME: Replace by lambda function extract_weak_edges{k}); - + auto num_weak_edges = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); @@ -833,6 +835,9 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_dsts(0, handle.get_stream()); + + + // Shuffle edges if constexpr (multi_gpu) { // FIXME: Check whether we need to shuffle (p, q) edges @@ -863,6 +868,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view.vertex_partition_range_lasts()); } + decrease_triangle_count( handle, cur_graph_view, @@ -876,7 +882,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view, edge_triangle_counts, multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), // FIXME: Make sure multi_gpu is properly handles - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()) // FIXME: Make sure multi_gpu is properly handles + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) // FIXME: Make sure multi_gpu is properly handles ); decrease_triangle_count( @@ -884,7 +890,7 @@ k_truss(raft::handle_t const& handle, cur_graph_view, edge_triangle_counts, multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()) + multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size()) : raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) ); prev_chunk_size += chunk_size; @@ -1014,23 +1020,18 @@ k_truss(raft::handle_t const& handle, auto csc_q_r_graph_view = (*graph_q_r).view(); - rmm::device_uvector renumbered_weak_edgelist_srcs(0, handle.get_stream()); - rmm::device_uvector renumbered_weak_edgelist_dsts(0, handle.get_stream()); - size_t weak_edgelist_size = weak_edgelist_srcs.size(); - - if constexpr (multi_gpu) { - renumbered_weak_edgelist_srcs.resize(weak_edgelist_srcs.size(), handle.get_stream()); - renumbered_weak_edgelist_dsts.resize(weak_edgelist_dsts.size(), handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_srcs( + weak_edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_dsts( + weak_edgelist_srcs.size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin())); - - rmm::device_uvector shuffled_weak_edgelist_srcs{0, handle.get_stream()}; - rmm::device_uvector shuffled_weak_edgelist_dsts{0, handle.get_stream()}; + thrust::copy( + handle.get_thrust_policy(), + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), + thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), + thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin())); + if constexpr (multi_gpu) { std::tie( renumbered_weak_edgelist_srcs, renumbered_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - renumbered_weak_edgelist_srcs.data(), - renumbered_weak_edgelist_srcs.size(), - (*renumber_map_q_r).data(), - csc_q_r_graph_view.local_vertex_partition_range_first(), - csc_q_r_graph_view.local_vertex_partition_range_last(), - true); + renumber_ext_vertices( + handle, + renumbered_weak_edgelist_srcs.data(), + renumbered_weak_edgelist_srcs.size(), + (*renumber_map_q_r).data(), + csc_q_r_graph_view.local_vertex_partition_range_first(), + csc_q_r_graph_view.local_vertex_partition_range_last(), + true); - renumber_ext_vertices( - handle, - renumbered_weak_edgelist_dsts.data(), - renumbered_weak_edgelist_dsts.size(), - (*renumber_map_q_r).data(), - csc_q_r_graph_view.local_vertex_partition_range_first(), - csc_q_r_graph_view.local_vertex_partition_range_last(), - true); - - weak_edgelist_size = renumbered_weak_edgelist_srcs.size(); - weak_edgelist_first = - thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + renumbered_weak_edgelist_srcs.size()); - } + renumber_ext_vertices( + handle, + renumbered_weak_edgelist_dsts.data(), + renumbered_weak_edgelist_dsts.size(), + (*renumber_map_q_r).data(), + csc_q_r_graph_view.local_vertex_partition_range_first(), + csc_q_r_graph_view.local_vertex_partition_range_last(), + true); + + auto weak_edgelist_size = renumbered_weak_edgelist_srcs.size(); + weak_edgelist_first = + thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + renumbered_weak_edgelist_srcs.size()); + prev_chunk_size = 0; num_remaining_weak_edges = weak_edgelist_size; @@ -1087,58 +1089,58 @@ k_truss(raft::handle_t const& handle, prev_chunk_size, chunk_size, do_expensive_check); + + // Unrenumber + auto vertex_partition_range_lasts = std::make_optional>( + csc_q_r_graph_view.vertex_partition_range_lasts()); - if constexpr (multi_gpu) { - // Unrenumber - auto vertex_partition_range_lasts = std::make_optional>( - csc_q_r_graph_view.vertex_partition_range_lasts()); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); + unrenumber_int_vertices( + handle, + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_q_r).data(), - std::get<0>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_q_r).data(), - std::get<1>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); + unrenumber_int_vertices( + handle, + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + unrenumber_int_vertices(handle, + std::get<0>(vertex_pair_buffer_q_r).data(), + std::get<0>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + unrenumber_int_vertices(handle, + std::get<1>(vertex_pair_buffer_q_r).data(), + std::get<1>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + + if constexpr (multi_gpu) { // Get global weak edges auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); // FIXME: for debugging @@ -1204,7 +1206,6 @@ k_truss(raft::handle_t const& handle, }); } else { - auto num_edges_not_overcomp = remove_overcompensating_edges q(0, handle.get_stream()); rmm::device_uvector idx(0, handle.get_stream()); - auto& comm = handle.get_comms(); // FIXME: remove after debugging std::tie(q, idx) = cugraph::extract_transform_v_frontier_outgoing_e( @@ -1465,13 +1468,15 @@ k_truss(raft::handle_t const& handle, vertex_frontier.bucket(0).clear(); - // Shuffle vertices - std::tie(q, idx) = - detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( - handle, - std::move(q), - std::move(idx), - cur_graph_view.vertex_partition_range_lasts()); + if constexpr (multi_gpu) { + // Shuffle vertices + std::tie(q, idx) = + detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, + std::move(q), + std::move(idx), + cur_graph_view.vertex_partition_range_lasts()); + } vertex_frontier.bucket(0).insert( thrust::make_zip_iterator(q.begin(), idx.begin()), @@ -1491,10 +1496,6 @@ k_truss(raft::handle_t const& handle, handle.get_stream()); // Get chunk global weak edges - // FIXME: To avoid copying and performing all-to-all - // when twice when unrolling (q, r) and (p, r) edges, unroll both edge type back to - // back in with the chunk global weak edgelist - if constexpr (multi_gpu) { // Get major weak edges auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -1885,6 +1886,7 @@ k_truss(raft::handle_t const& handle, }, edge_mask.mutable_view(), false); + cur_graph_view.attach_edge_mask(edge_mask.view()); } @@ -1900,9 +1902,8 @@ k_truss(raft::handle_t const& handle, edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::optional>(std::nullopt)); - - std::cout << "k_truss num_edges = " << edgelist_srcs.size() << std::endl; + std::make_optional(raft::device_span((*renumber_map).data(), (*renumber_map).size())) + ); std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, From bc558ffbe271eed6a3fbb1efbb5c216728e487ce Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 16 Jul 2024 18:40:36 -0700 Subject: [PATCH 71/93] remove unnecessary shuffle function --- cpp/src/community/k_truss_impl.cuh | 101 ++++++++++++++++------------- 1 file changed, 55 insertions(+), 46 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 1db5a0fb7d5..084348bcbbc 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -44,6 +44,8 @@ namespace cugraph { template +// Remname set difference +// break 'remove_overcompensating_edges' to only take set_q_querry edges to find the difference edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, EdgeIterator set_a_query_edges, @@ -90,45 +92,15 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); - + rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); - std::vector rx_counts{}; - + thrust::copy(handle.get_thrust_policy(), set_a_query_edges, set_a_query_edges + buffer_size, thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); - - // group_by_count to get the destination of each edges - std::tie(set_a_query_edges_srcs, set_a_query_edges_dsts, std::ignore, std::ignore, std::ignore, rx_counts) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(set_a_query_edges_srcs), std::move(set_a_query_edges_dsts), std::nullopt, std::nullopt, std::nullopt, vertex_partition_range_lasts); - - - rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t - - auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges - auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); - auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); - - // FIXME: Use thrust::transform instead - thrust::tabulate( - handle.get_thrust_policy(), - has_edge.begin(), - has_edge.end(), - [ - set_c_weak_edges_first, - set_c_weak_edges_last, - set_a_query_edges_first - ] __device__(auto i) { - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); - }); - + //auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -150,26 +122,54 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, comm_size, major_comm_size, minor_comm_size}; - + auto d_tx_counts = cugraph::groupby_and_count( thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()), thrust::make_zip_iterator(set_a_query_edges_srcs.end(), set_a_query_edges_dsts.end()), [func, major_comm_size]__device__(auto val) { - return func(val) % major_comm_size; + return func(val); //% major_comm_size; }, - major_comm_size, + comm_size, + //major_comm_size, std::numeric_limits::max(), handle.get_stream()); std::vector h_tx_counts{d_tx_counts.size()}; - + std::vector h_rx_counts{}; + raft::update_host(h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + std::tie(set_a_query_edges_srcs, h_rx_counts) = + shuffle_values(handle.get_comms(), set_a_query_edges_srcs.begin(), h_tx_counts, handle.get_stream()); + + std::tie(set_a_query_edges_dsts, std::ignore) = + shuffle_values(handle.get_comms(), set_a_query_edges_dsts.begin(), h_tx_counts, handle.get_stream()); + + rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t + + auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges + auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); + auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); + + // FIXME: Use thrust::transform instead + thrust::tabulate( + handle.get_thrust_policy(), + has_edge.begin(), + has_edge.end(), + [ + set_c_weak_edges_first, + set_c_weak_edges_last, + set_a_query_edges_first + ] __device__(auto i) { + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); + }); + std::tie(has_edge, std::ignore) = - shuffle_values(handle.get_comms(), has_edge.begin(), h_tx_counts, handle.get_stream()); + shuffle_values(handle.get_comms(), has_edge.begin(), h_rx_counts, handle.get_stream()); auto set_a_and_b_query_edges_first = thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); auto set_a_and_b_query_edges_last = thrust::make_zip_iterator( @@ -182,8 +182,6 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, thrust::make_zip_iterator(set_b_query_edges, has_edge.begin()) ); - - auto edges_not_overcomp = thrust::remove_if( handle.get_thrust_policy(), set_a_and_b_query_edges_first, @@ -583,6 +581,7 @@ k_truss(raft::handle_t const& handle, edge_t k, bool do_expensive_check) { + // 1. Check input arguments. CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -636,7 +635,7 @@ k_truss(raft::handle_t const& handle, } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - #if 0 + { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -683,6 +682,11 @@ k_truss(raft::handle_t const& handle, true); modified_graph_view = (*modified_graph).view(); + /* + edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) + : std::optional>{std::nullopt}; + */ if (renumber_map) { // collapse renumber_map unrenumber_int_vertices(handle, @@ -691,9 +695,9 @@ k_truss(raft::handle_t const& handle, (*renumber_map).data(), *vertex_partition_range_lasts); } + renumber_map = std::move(tmp_renumber_map); } - #endif // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. @@ -795,7 +799,6 @@ k_truss(raft::handle_t const& handle, // FIXME: Replace by lambda function extract_weak_edges{k}); - auto num_weak_edges = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); @@ -867,7 +870,6 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); } - decrease_triangle_count( handle, @@ -1597,7 +1599,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - true // FIXME: Currently using global weak edges for validation purposes + false // FIXME: Currently using global weak edges for validation purposes >( handle, q_closing.size(), @@ -1611,6 +1613,13 @@ k_truss(raft::handle_t const& handle, cur_graph_view.vertex_partition_range_lasts() ); + + + // FIXME: No need to resize the dataframes buffer now. + resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + // FIXME: No need to resize the dataframes buffer now. resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); @@ -1899,7 +1908,7 @@ k_truss(raft::handle_t const& handle, decompress_to_edgelist( handle, cur_graph_view, - edge_weight_view, + edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, std::optional>{std::nullopt}, std::optional>{std::nullopt}, std::make_optional(raft::device_span((*renumber_map).data(), (*renumber_map).size())) From c76746b49b417560670e1c5f4bb9006048988e9f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 18 Jul 2024 13:56:59 -0700 Subject: [PATCH 72/93] update mg tests for ktruss --- cpp/tests/community/mg_k_truss_test.cpp | 236 ++++++++++++++---------- 1 file changed, 136 insertions(+), 100 deletions(-) diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 51c12423b77..28d018019a9 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -39,16 +39,19 @@ #include -struct EdgeTriangleCount_Usecase { +struct KTruss_Usecase { + int32_t k_{3}; + bool test_weighted_{false}; + // FIXME: test edge mask bool edge_masking_{false}; bool check_correctness_{true}; }; template -class Tests_MGEdgeTriangleCount - : public ::testing::TestWithParam> { +class Tests_MGKTruss + : public ::testing::TestWithParam> { public: - Tests_MGEdgeTriangleCount() {} + Tests_MGKTruss() {} static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } @@ -57,9 +60,9 @@ class Tests_MGEdgeTriangleCount virtual void SetUp() {} virtual void TearDown() {} - // Compare the results of running EdgeTriangleCount on multiple GPUs to that of a single-GPU run + // Compare the results of running KTruss on multiple GPUs to that of a single-GPU run template - void run_current_test(EdgeTriangleCount_Usecase const& edge_triangle_count_usecase, + void run_current_test(KTruss_Usecase const& k_truss_usecase, input_usecase_t const& input_usecase) { using weight_t = float; @@ -74,11 +77,9 @@ class Tests_MGEdgeTriangleCount hr_timer.start("MG Construct graph"); } - cugraph::graph_t mg_graph(*handle_); - std::optional> mg_renumber_map{std::nullopt}; - std::tie(mg_graph, std::ignore, mg_renumber_map) = + auto [mg_graph, edge_weight, mg_renumber_map] = cugraph::test::construct_graph( - *handle_, input_usecase, false, true, false, true); + *handle_, input_usecase, k_truss_usecase.test_weighted_, true, false, true); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -90,33 +91,27 @@ class Tests_MGEdgeTriangleCount auto mg_graph_view = mg_graph.view(); std::optional> edge_mask{std::nullopt}; - if (edge_triangle_count_usecase.edge_masking_) { + if (k_truss_usecase.edge_masking_) { edge_mask = cugraph::test::generate::edge_property( *handle_, mg_graph_view, 2); mg_graph_view.attach_edge_mask((*edge_mask).view()); } - // 2. run MG EdgeTriangleCount + // 2. run MG KTruss if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); - hr_timer.start("MG EdgeTriangleCount"); + hr_timer.start("MG KTruss"); } - /* - auto d_mg_cugraph_results = - cugraph::edge_triangle_count(*handle_, mg_graph_view); - */ - + auto mg_edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt; auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = cugraph::k_truss( *handle_, mg_graph_view, - // edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt, - std::nullopt, // FIXME: test weights - // k_truss_usecase.k_, - 4, + mg_edge_weight_view, + k_truss_usecase.k_, false); if (cugraph::test::g_perf) { @@ -124,75 +119,120 @@ class Tests_MGEdgeTriangleCount handle_->get_comms().barrier(); hr_timer.stop(); hr_timer.display_and_clear(std::cout); - } + } // 3. Compare SG & MG results -#if 0 - if (edge_triangle_count_usecase.check_correctness_) { - // 3-1. Convert to SG graph + if (k_truss_usecase.check_correctness_) { + cugraph::unrenumber_int_vertices( + *handle_, + d_cugraph_srcs.data(), + d_cugraph_srcs.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); - cugraph::graph_t sg_graph(*handle_); - std::optional< - cugraph::edge_property_t, edge_t>> - d_sg_cugraph_results{std::nullopt}; - std::tie(sg_graph, std::ignore, d_sg_cugraph_results, std::ignore) = + cugraph::unrenumber_int_vertices( + *handle_, + d_cugraph_dsts.data(), + d_cugraph_dsts.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); + + auto global_d_cugraph_srcs = cugraph::test::device_gatherv( + *handle_, raft::device_span(d_cugraph_srcs.data(), d_cugraph_srcs.size())); + + auto global_d_cugraph_dsts = cugraph::test::device_gatherv( + *handle_, raft::device_span(d_cugraph_dsts.data(), d_cugraph_srcs.size())); + + rmm::device_uvector d_sorted_cugraph_srcs{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_cugraph_dsts{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_cugraph_wgts{0, handle_->get_stream()}; + + if (edge_weight) { + auto global_d_cugraph_wgts = cugraph::test::device_gatherv( + *handle_, raft::device_span((*d_cugraph_wgts).data(), (*d_cugraph_wgts).size())); + + std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts, d_sorted_cugraph_wgts) = + cugraph::test::sort_by_key( + *handle_, global_d_cugraph_srcs, global_d_cugraph_dsts, global_d_cugraph_wgts); + + } else { + std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts) = + cugraph::test::sort(*handle_, global_d_cugraph_srcs, global_d_cugraph_dsts); + } + + // 3-1. Convert to SG graph + auto [sg_graph, sg_edge_weights, sg_edge_ids, sg_number_map] = cugraph::test::mg_graph_to_sg_graph( *handle_, mg_graph_view, - std::optional>{std::nullopt}, - // FIXME: Update 'create_graph_from_edgelist' to support int32_t and int64_t values - std::make_optional(d_mg_cugraph_results.view()), + mg_edge_weight_view, + std::optional>{std::nullopt}, std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); + - if (handle_->get_comms().get_rank() == int{0}) { - // 3-2. Convert the MG triangle counts stored as 'edge_property_t' to device vector - - auto [edgelist_srcs, - edgelist_dsts, - d_edgelist_weights, - d_edge_triangle_counts, - d_edgelist_type] = - cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - std::optional>{std::nullopt}, - // FIXME: Update 'decompress_edgelist' to support int32_t and int64_t values - std::make_optional((*d_sg_cugraph_results).view()), - std::optional>{std::nullopt}, - std::optional>{ - std::nullopt}); // FIXME: No longer needed - - // 3-3. Run SG EdgeTriangleCount - - auto ref_d_sg_cugraph_results = - cugraph::edge_triangle_count(*handle_, sg_graph.view()); - auto [ref_edgelist_srcs, - ref_edgelist_dsts, - ref_d_edgelist_weights, - ref_d_edge_triangle_counts] = - cugraph::decompress_to_edgelist( - *handle_, - sg_graph.view(), - std::optional>{std::nullopt}, - std::make_optional(ref_d_sg_cugraph_results.view()), - std::optional>{ - std::nullopt}); // FIXME: No longer needed + auto sg_edge_weight_view = sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; - // 3-4. Compare - - auto h_mg_edge_triangle_counts = cugraph::test::to_host(*handle_, *d_edge_triangle_counts); - auto h_sg_edge_triangle_counts = - cugraph::test::to_host(*handle_, *ref_d_edge_triangle_counts); + if (handle_->get_comms().get_rank() == int{0}) { + auto sg_graph_view = sg_graph.view(); - ASSERT_TRUE(std::equal(h_mg_edge_triangle_counts.begin(), - h_mg_edge_triangle_counts.end(), - h_sg_edge_triangle_counts.begin())); + // 3-2. Run SG KTruss + auto [ref_d_cugraph_srcs, ref_d_cugraph_dsts, ref_d_cugraph_wgts] = + cugraph::k_truss( + *handle_, + sg_graph_view, + sg_edge_weight_view, + k_truss_usecase.k_, + false); + + rmm::device_uvector d_sorted_ref_cugraph_srcs{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_ref_cugraph_dsts{0, handle_->get_stream()}; + rmm::device_uvector d_sorted_ref_cugraph_wgts{0, handle_->get_stream()}; + + if (edge_weight) { + std::tie(d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts, d_sorted_ref_cugraph_wgts) = + cugraph::test::sort_by_key( + *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts, *ref_d_cugraph_wgts); + + } else { + std::tie(d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts) = + cugraph::test::sort( + *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts); + } + + // 3-3. Compare + auto h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_cugraph_srcs); + auto h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_cugraph_dsts); + auto ref_h_cugraph_srcs = + cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_srcs); + auto ref_h_cugraph_dsts = + cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_dsts); + + ASSERT_TRUE(std::equal(h_cugraph_srcs.begin(), + h_cugraph_srcs.end(), + ref_h_cugraph_srcs.begin())); + + ASSERT_TRUE(std::equal(h_cugraph_dsts.begin(), + h_cugraph_dsts.end(), + ref_h_cugraph_dsts.begin())); + + if (edge_weight) { + auto ref_h_cugraph_wgts = + cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_wgts); + + auto h_cugraph_wgts = + cugraph::test::to_host(*handle_, d_sorted_cugraph_wgts); + + ASSERT_TRUE(std::equal(h_cugraph_wgts.begin(), + h_cugraph_wgts.end(), + ref_h_cugraph_wgts.begin())); + } + } } -#endif + } private: @@ -200,60 +240,57 @@ class Tests_MGEdgeTriangleCount }; template -std::unique_ptr Tests_MGEdgeTriangleCount::handle_ = nullptr; +std::unique_ptr Tests_MGKTruss::handle_ = nullptr; -using Tests_MGEdgeTriangleCount_File = Tests_MGEdgeTriangleCount; -// using Tests_MGEdgeTriangleCount_Rmat = Tests_MGEdgeTriangleCount; +using Tests_MGKTruss_File = Tests_MGKTruss; +//using Tests_MGKTruss_Rmat = Tests_MGKTruss; -TEST_P(Tests_MGEdgeTriangleCount_File, CheckInt32Int32) +TEST_P(Tests_MGKTruss_File, CheckInt32Int32) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -#if 0 -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int32) + +TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt32Int64) +TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGEdgeTriangleCount_Rmat, CheckInt64Int64) +TEST_P(Tests_MGKTruss_Rmat, CheckInt64Int64) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -#endif INSTANTIATE_TEST_SUITE_P( file_tests, - Tests_MGEdgeTriangleCount_File, + Tests_MGKTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(EdgeTriangleCount_Usecase{false, false} - // EdgeTriangleCount_Usecase{true, true} + ::testing::Values(KTruss_Usecase{4, false, false, true}, + KTruss_Usecase{5, true, false, true} ), - ::testing::Values( - cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx") - // cugraph::test::File_Usecase("test/datasets/dolphins.mtx") + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx") ))); -#if 0 INSTANTIATE_TEST_SUITE_P( rmat_small_tests, - Tests_MGEdgeTriangleCount_Rmat, + Tests_MGKTruss_Rmat, ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, true}, - EdgeTriangleCount_Usecase{true, true}), - ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{8, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -261,11 +298,10 @@ INSTANTIATE_TEST_SUITE_P( vertex & edge type combination) by command line arguments and do not include more than one Rmat_Usecase that differ only in scale or edge factor (to avoid running same benchmarks more than once) */ - Tests_MGEdgeTriangleCount_Rmat, + Tests_MGKTruss_Rmat, ::testing::Combine( - ::testing::Values(EdgeTriangleCount_Usecase{false, false}, - EdgeTriangleCount_Usecase{true, false}), + ::testing::Values(KTruss_Usecase{4, false, false, false}, + KTruss_Usecase{5, false, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); -#endif CUGRAPH_MG_TEST_PROGRAM_MAIN() From 3c817bd95897bb3f1677bc2389ee475e6747b293 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 18 Jul 2024 13:58:14 -0700 Subject: [PATCH 73/93] undo changes to test --- cpp/tests/community/k_truss_test.cpp | 36 +++++++--------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index 1896474062a..95e06d2e2d3 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -245,19 +245,13 @@ class Tests_KTruss : public ::testing::TestWithParam{ weight_t{1e-3}, weight_t{(weight_t{1} / static_cast((h_cugraph_wgts).size())) * weight_t{1e-3}}}; - EXPECT_TRUE(std::equal((h_cugraph_wgts).begin(), (h_cugraph_wgts).end(), (*h_reference_wgts).begin(), @@ -268,14 +262,14 @@ class Tests_KTruss : public ::testing::TestWithParam; -//using Tests_KTruss_Rmat = Tests_KTruss; +using Tests_KTruss_Rmat = Tests_KTruss; TEST_P(Tests_KTruss_File, CheckInt32Int32Float) { run_current_test( override_File_Usecase_with_cmd_line_arguments(GetParam())); } -#if 0 + TEST_P(Tests_KTruss_File, CheckInt64Int64Float) { run_current_test( @@ -293,25 +287,14 @@ TEST_P(Tests_KTruss_Rmat, CheckInt64Int64Float) run_current_test( override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } -#endif - -INSTANTIATE_TEST_SUITE_P( - simple_test, - Tests_KTruss_File, - ::testing::Combine( - // enable correctness checks - ::testing::Values(//KTruss_Usecase{4, true, true}, - KTruss_Usecase{4, true, true}), - ::testing::Values(cugraph::test::File_Usecase("/raid/jnke/optimize_ktruss/datasets/test_datasets.mtx")))); -#if 0 INSTANTIATE_TEST_SUITE_P( simple_test, Tests_KTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{5, true, true}, - KTruss_Usecase{4, true, true}, + ::testing::Values(KTruss_Usecase{5, true, false}, + KTruss_Usecase{4, true, false}, KTruss_Usecase{9, true, true}, KTruss_Usecase{7, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/netscience.mtx"), @@ -320,8 +303,8 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_KTruss_Rmat, // enable correctness checks - ::testing::Combine(::testing::Values(KTruss_Usecase{5, true, true}, - KTruss_Usecase{4, true, true}, + ::testing::Combine(::testing::Values(KTruss_Usecase{5, false, true}, + KTruss_Usecase{4, false, true}, KTruss_Usecase{9, true, true}, KTruss_Usecase{7, true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( @@ -337,8 +320,7 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks for large graphs // FIXME: High memory footprint. Perform nbr_intersection in chunks. ::testing::Combine( - ::testing::Values(KTruss_Usecase{4, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(18, 16, 0.57, 0.19, 0.19, 0, true, false)))); -#endif + ::testing::Values(KTruss_Usecase{12, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(14, 16, 0.57, 0.19, 0.19, 0, true, false)))); -CUGRAPH_TEST_PROGRAM_MAIN() +CUGRAPH_TEST_PROGRAM_MAIN() \ No newline at end of file From b16e37cabe5814a1f5a08d1dba7b14f3450d86e8 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 18 Jul 2024 14:00:06 -0700 Subject: [PATCH 74/93] add fixme and remove unused arguments --- cpp/src/community/k_truss_impl.cuh | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 084348bcbbc..ce06ccf8a62 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -347,6 +347,7 @@ struct extract_q_idx { } }; +// FIXME: Remove multi_gpu as it is not used. template struct extract_q_idx_closing { using return_type = thrust::optional>; @@ -682,11 +683,6 @@ k_truss(raft::handle_t const& handle, true); modified_graph_view = (*modified_graph).view(); - /* - edge_weight_view = - edge_weight ? std::make_optional((*edge_weight).view()) - : std::optional>{std::nullopt}; - */ if (renumber_map) { // collapse renumber_map unrenumber_int_vertices(handle, @@ -799,6 +795,7 @@ k_truss(raft::handle_t const& handle, // FIXME: Replace by lambda function extract_weak_edges{k}); + auto num_weak_edges = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); @@ -1599,7 +1596,7 @@ k_truss(raft::handle_t const& handle, decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), false, multi_gpu, - false // FIXME: Currently using global weak edges for validation purposes + true // FIXME: Currently using global weak edges for validation purposes >( handle, q_closing.size(), @@ -1663,9 +1660,18 @@ k_truss(raft::handle_t const& handle, } else { // FIXME: refactor SG to use r_closing + auto weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( weak_edgelist_dsts.begin(), std::get<1>(vertex_pair_buffer_p_tag).begin() ); + + thrust::sort_by_key(handle.get_thrust_policy(), + weak_edgelist_dsts_tags_first, + weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), + //major_weak_edgelist_srcs.begin() + weak_edgelist_srcs.begin() + ); + auto [q_closing, r_closing, p_closing, idx_closing] = cugraph::extract_transform_v_frontier_outgoing_e( handle, @@ -1720,6 +1726,12 @@ k_truss(raft::handle_t const& handle, std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) ); + // weak_edgelist_first + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_dsts.size() + ); + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges Date: Thu, 18 Jul 2024 14:02:19 -0700 Subject: [PATCH 75/93] instantiate type combinations --- cpp/src/community/k_truss_mg_v32_e32.cu | 41 ++----------------------- cpp/src/community/k_truss_mg_v32_e64.cu | 41 +++++++++++++++++++++++++ cpp/src/community/k_truss_mg_v64_e64.cu | 41 +++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 39 deletions(-) create mode 100644 cpp/src/community/k_truss_mg_v32_e64.cu create mode 100644 cpp/src/community/k_truss_mg_v64_e64.cu diff --git a/cpp/src/community/k_truss_mg_v32_e32.cu b/cpp/src/community/k_truss_mg_v32_e32.cu index 04845d5b73d..69690e3c3e6 100644 --- a/cpp/src/community/k_truss_mg_v32_e32.cu +++ b/cpp/src/community/k_truss_mg_v32_e32.cu @@ -18,7 +18,7 @@ namespace cugraph { -// SG instantiation +// MG instantiation template std::tuple, rmm::device_uvector, @@ -28,7 +28,7 @@ k_truss(raft::handle_t const& handle, std::optional> edge_weight_view, int32_t k, bool do_expensive_check); -/* + template std::tuple, rmm::device_uvector, std::optional>> @@ -38,41 +38,4 @@ k_truss(raft::handle_t const& handle, int32_t k, bool do_expensive_check); -template std::tuple, - rmm::device_uvector, - std::optional>> -k_truss(raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t k, - bool do_expensive_check); - -template std::tuple, - rmm::device_uvector, - std::optional>> -k_truss(raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t k, - bool do_expensive_check); - -template std::tuple, - rmm::device_uvector, - std::optional>> -k_truss(raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t k, - bool do_expensive_check); - -template std::tuple, - rmm::device_uvector, - std::optional>> -k_truss(raft::handle_t const& handle, - graph_view_t const& graph_view, - std::optional> edge_weight_view, - int64_t k, - bool do_expensive_check); -*/ - } // namespace cugraph diff --git a/cpp/src/community/k_truss_mg_v32_e64.cu b/cpp/src/community/k_truss_mg_v32_e64.cu new file mode 100644 index 00000000000..639269efc97 --- /dev/null +++ b/cpp/src/community/k_truss_mg_v32_e64.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// MG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/community/k_truss_mg_v64_e64.cu b/cpp/src/community/k_truss_mg_v64_e64.cu new file mode 100644 index 00000000000..3fda694f342 --- /dev/null +++ b/cpp/src/community/k_truss_mg_v64_e64.cu @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "community/k_truss_impl.cuh" + +namespace cugraph { + +// MG instantiation + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>> +k_truss(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + int64_t k, + bool do_expensive_check); + +} // namespace cugraph From e6698f3f12d3de34fc67ddd2879efb6a47ab872f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 18 Jul 2024 14:03:48 -0700 Subject: [PATCH 76/93] update cmake list --- cpp/CMakeLists.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 64a5fb0f17b..c78108a4b42 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -283,12 +283,12 @@ set(CUGRAPH_SOURCES src/community/egonet_mg_v64_e64.cu src/community/egonet_mg_v32_e32.cu src/community/egonet_mg_v32_e64.cu - #src/community/k_truss_sg_v64_e64.cu + src/community/k_truss_sg_v64_e64.cu src/community/k_truss_sg_v32_e32.cu - #src/community/k_truss_sg_v32_e64.cu - #src/community/k_truss_mg_v64_e64.cu + src/community/k_truss_sg_v32_e64.cu + src/community/k_truss_mg_v64_e64.cu src/community/k_truss_mg_v32_e32.cu - #src/community/k_truss_mg_v32_e64.cu + src/community/k_truss_mg_v32_e64.cu src/lookup/lookup_src_dst_mg_v32_e32.cu src/lookup/lookup_src_dst_mg_v32_e64.cu src/lookup/lookup_src_dst_mg_v64_e64.cu From 1cd35c0ae8d92b13ae542d14360b24c840f3a2a2 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 18 Jul 2024 14:06:28 -0700 Subject: [PATCH 77/93] fix style --- cpp/src/community/k_truss_impl.cuh | 2003 ++++++++++++---------- cpp/src/structure/coarsen_graph_impl.cuh | 3 +- cpp/tests/community/k_truss_test.cpp | 2 +- cpp/tests/community/mg_k_truss_test.cpp | 122 +- 4 files changed, 1128 insertions(+), 1002 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index ce06ccf8a62..5d98286698a 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/edge_bucket.cuh" #include "prims/extract_transform_e.cuh" +#include "prims/extract_transform_v_frontier_outgoing_e.cuh" #include "prims/fill_edge_property.cuh" #include "prims/transform_e.cuh" #include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh" @@ -43,8 +43,13 @@ namespace cugraph { -template -// Remname set difference +template +// Remname set difference // break 'remove_overcompensating_edges' to only take set_q_querry edges to find the difference edge_t remove_overcompensating_edges(raft::handle_t const& handle, size_t buffer_size, @@ -57,7 +62,6 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, vertex_t number_of_local_edge_partitions, std::vector vertex_partition_range_lasts) { - // To avoid over-compensating, check whether the 'potential_closing_edges' // are within the weak edges. If yes, those edges were already unrolled @@ -65,51 +69,48 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, // FIXME: can use thrust::set_difference for SG auto edges_not_overcomp = thrust::remove_if( handle.get_thrust_policy(), - thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), - thrust::make_zip_iterator(set_a_query_edges + buffer_size, - set_b_query_edges + buffer_size), - [set_c_weak_edges_first = - thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), global_set_c_weak_edges_dsts.begin()), - set_c_weak_edges_last = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), - global_set_c_weak_edges_dsts.end())] __device__(auto e) { - + thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges), + thrust::make_zip_iterator(set_a_query_edges + buffer_size, set_b_query_edges + buffer_size), + [set_c_weak_edges_first = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), + global_set_c_weak_edges_dsts.begin()), + set_c_weak_edges_last = + thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), + global_set_c_weak_edges_dsts.end())] __device__(auto e) { auto set_a_query_edge = thrust::get<0>(e); if constexpr (is_q_r_edge) { - set_a_query_edge = thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); - + set_a_query_edge = + thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); }; return thrust::binary_search( thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edge); }); - auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, - set_b_query_edges), - edges_not_overcomp); + auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges), + edges_not_overcomp); return dist; } else { - auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); - + rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - set_a_query_edges, - set_a_query_edges + buffer_size, - thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); - - //auto& comm = handle.get_comms(); + thrust::copy( + handle.get_thrust_policy(), + set_a_query_edges, + set_a_query_edges + buffer_size, + thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); + + // auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); auto const major_comm_size = major_comm.get_size(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_size = minor_comm.get_size(); - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); + rmm::device_uvector d_vertex_partition_range_lasts( + vertex_partition_range_lasts.size(), handle.get_stream()); raft::update_device(d_vertex_partition_range_lasts.data(), vertex_partition_range_lasts.data(), @@ -126,83 +127,79 @@ edge_t remove_overcompensating_edges(raft::handle_t const& handle, auto d_tx_counts = cugraph::groupby_and_count( thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()), thrust::make_zip_iterator(set_a_query_edges_srcs.end(), set_a_query_edges_dsts.end()), - [func, major_comm_size]__device__(auto val) { - return func(val); //% major_comm_size; + [func, major_comm_size] __device__(auto val) { + return func(val); //% major_comm_size; }, comm_size, - //major_comm_size, + // major_comm_size, std::numeric_limits::max(), handle.get_stream()); - + std::vector h_tx_counts{d_tx_counts.size()}; std::vector h_rx_counts{}; - - raft::update_host(h_tx_counts.data(), - d_tx_counts.data(), - d_tx_counts.size(), - handle.get_stream()); - - std::tie(set_a_query_edges_srcs, h_rx_counts) = - shuffle_values(handle.get_comms(), set_a_query_edges_srcs.begin(), h_tx_counts, handle.get_stream()); - - std::tie(set_a_query_edges_dsts, std::ignore) = - shuffle_values(handle.get_comms(), set_a_query_edges_dsts.begin(), h_tx_counts, handle.get_stream()); - - rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), handle.get_stream()); // type should be size_t - - auto set_c_weak_edges_first = thrust::make_zip_iterator(set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges - auto set_c_weak_edges_last = thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); - auto set_a_query_edges_first = thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); + + raft::update_host( + h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); + + std::tie(set_a_query_edges_srcs, h_rx_counts) = shuffle_values( + handle.get_comms(), set_a_query_edges_srcs.begin(), h_tx_counts, handle.get_stream()); + + std::tie(set_a_query_edges_dsts, std::ignore) = shuffle_values( + handle.get_comms(), set_a_query_edges_dsts.begin(), h_tx_counts, handle.get_stream()); + + rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), + handle.get_stream()); // type should be size_t + + auto set_c_weak_edges_first = thrust::make_zip_iterator( + set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges + auto set_c_weak_edges_last = + thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); + auto set_a_query_edges_first = + thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); // FIXME: Use thrust::transform instead thrust::tabulate( - handle.get_thrust_policy(), - has_edge.begin(), - has_edge.end(), - [ - set_c_weak_edges_first, - set_c_weak_edges_last, - set_a_query_edges_first - ] __device__(auto i) { - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); - }); + handle.get_thrust_policy(), + has_edge.begin(), + has_edge.end(), + [set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first] __device__(auto i) { + return thrust::binary_search( + thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); + }); std::tie(has_edge, std::ignore) = shuffle_values(handle.get_comms(), has_edge.begin(), h_rx_counts, handle.get_stream()); - - auto set_a_and_b_query_edges_first = thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); - auto set_a_and_b_query_edges_last = thrust::make_zip_iterator( - set_a_query_edges + buffer_size, set_b_query_edges + buffer_size); - - - thrust::sort_by_key(handle.get_thrust_policy(), - set_a_query_edges, - set_a_query_edges + buffer_size, - thrust::make_zip_iterator(set_b_query_edges, has_edge.begin()) - ); - - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - set_a_and_b_query_edges_first, - set_a_and_b_query_edges_last, - [ - set_a_query_edges, - buffer_size, - has_edge = raft::device_span(has_edge.data(), has_edge.size()) - ] __device__(auto pair_set) { - //auto set_a_query_edge = thrust::get<0>(pair_set) - auto itr = thrust::lower_bound( - thrust::seq, set_a_query_edges, set_a_query_edges + buffer_size, thrust::get<0>(pair_set)); - - auto idx = thrust::distance(set_a_query_edges, itr); - return has_edge[idx]; - - }); - auto dist = thrust::distance(set_a_and_b_query_edges_first, edges_not_overcomp); - return dist; + auto set_a_and_b_query_edges_first = + thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); + auto set_a_and_b_query_edges_last = + thrust::make_zip_iterator(set_a_query_edges + buffer_size, set_b_query_edges + buffer_size); + thrust::sort_by_key(handle.get_thrust_policy(), + set_a_query_edges, + set_a_query_edges + buffer_size, + thrust::make_zip_iterator(set_b_query_edges, has_edge.begin())); + + auto edges_not_overcomp = + thrust::remove_if(handle.get_thrust_policy(), + set_a_and_b_query_edges_first, + set_a_and_b_query_edges_last, + [set_a_query_edges, + buffer_size, + has_edge = raft::device_span( + has_edge.data(), has_edge.size())] __device__(auto pair_set) { + // auto set_a_query_edge = thrust::get<0>(pair_set) + auto itr = thrust::lower_bound(thrust::seq, + set_a_query_edges, + set_a_query_edges + buffer_size, + thrust::get<0>(pair_set)); + + auto idx = thrust::distance(set_a_query_edges, itr); + return has_edge[idx]; + }); + + auto dist = thrust::distance(set_a_and_b_query_edges_first, edges_not_overcomp); + return dist; } } @@ -219,9 +216,10 @@ struct extract_weak_edges { }; template -struct extract_edges { // FIXME: ******************************Remove this functor. For testing purposes only******************* +struct extract_edges { // FIXME: ******************************Remove this functor. For testing + // purposes only******************* __device__ thrust::optional> operator()( - + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { return thrust::make_tuple(src, dst, count); @@ -231,7 +229,7 @@ struct extract_edges { // FIXME: ******************************Remove this func template struct extract_edges_and_triangle_counts { __device__ thrust::optional> operator()( - + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const { return thrust::make_tuple(src, dst, count); @@ -240,17 +238,16 @@ struct extract_edges_and_triangle_counts { template struct extract_edges_to_q_r { - raft::device_span vertex_q_r_set{}; __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const + + auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const { - auto has_src = thrust::binary_search( - thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), src); + auto has_src = + thrust::binary_search(thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), src); - auto has_dst = thrust::binary_search( - thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), dst); + auto has_dst = + thrust::binary_search(thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), dst); if (has_src) { return thrust::optional>{thrust::make_tuple(src, dst)}; @@ -361,18 +358,20 @@ struct extract_q_idx_closing { thrust::nullopt_t, thrust::nullopt_t) const { + auto itr = thrust::lower_bound(thrust::seq, + major_weak_edgelist_dsts_tag_first, + major_weak_edgelist_dsts_tag_last, + thrust::make_tuple(dst, thrust::get<1>(tagged_src))); - auto itr = thrust::lower_bound( - thrust::seq, - major_weak_edgelist_dsts_tag_first, - major_weak_edgelist_dsts_tag_last, - thrust::make_tuple(dst, thrust::get<1>(tagged_src))); - auto idx = thrust::distance(major_weak_edgelist_dsts_tag_first, itr); - - return (itr != major_weak_edgelist_dsts_tag_last && *itr == thrust::make_tuple(dst, thrust::get<1>(tagged_src))) - ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), dst, major_weak_edgelist_srcs[idx], thrust::get<1>(tagged_src))) - : thrust::nullopt; + + return (itr != major_weak_edgelist_dsts_tag_last && + *itr == thrust::make_tuple(dst, thrust::get<1>(tagged_src))) + ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), + dst, + major_weak_edgelist_srcs[idx], + thrust::get<1>(tagged_src))) + : thrust::nullopt; } }; @@ -389,7 +388,7 @@ struct generate_p_q { auto itr = thrust::upper_bound( thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - + return thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); } }; @@ -402,7 +401,7 @@ struct generate_p_r { EdgeIterator closing_r_tag{}; raft::device_span weak_edge_idx{}; - raft::device_span chunk_global_weak_edgelist_tags{}; + raft::device_span chunk_global_weak_edgelist_tags{}; __device__ thrust::tuple operator()(edge_t i) const { @@ -419,11 +418,10 @@ struct generate_p_q_q_r { EdgeIterator weak_edge{}; raft::device_span q_closing{}; raft::device_span weak_edge_idx{}; - raft::device_span chunk_global_weak_edgelist_tags{}; + raft::device_span chunk_global_weak_edgelist_tags{}; __device__ thrust::tuple operator()(edge_t i) const { - if constexpr (generate_p_q) { return thrust::make_tuple(thrust::get<0>(*(weak_edge + weak_edge_idx[i])), q_closing[i]); } else { @@ -433,29 +431,26 @@ struct generate_p_q_q_r { }; template -void decrease_triangle_count(raft::handle_t const& handle, - graph_view_t & cur_graph_view, - edge_property_t, edge_t> & edge_triangle_counts, - raft::device_span edge_srcs, - raft::device_span edge_dsts - ) { - +void decrease_triangle_count( + raft::handle_t const& handle, + graph_view_t& cur_graph_view, + edge_property_t, edge_t>& edge_triangle_counts, + raft::device_span edge_srcs, + raft::device_span edge_dsts) +{ // Before updating the count, we need to clear the mask auto edge_buffer_first = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); - - thrust::sort(handle.get_thrust_policy(), - edge_buffer_first, - edge_buffer_first + edge_srcs.size()); - - auto unique_pair_count = thrust::unique_count(handle.get_thrust_policy(), - edge_buffer_first, - edge_buffer_first + edge_srcs.size()); - + + thrust::sort(handle.get_thrust_policy(), edge_buffer_first, edge_buffer_first + edge_srcs.size()); + + auto unique_pair_count = thrust::unique_count( + handle.get_thrust_policy(), edge_buffer_first, edge_buffer_first + edge_srcs.size()); + rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - + auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - + unique_pair_count, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), edge_buffer_first, edge_buffer_first + edge_srcs.size(), @@ -463,12 +458,12 @@ void decrease_triangle_count(raft::handle_t const& handle, get_dataframe_buffer_begin(vertex_pair_buffer_unique), decrease_count.begin(), thrust::equal_to>{}); - + cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), + edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), std::get<0>(vertex_pair_buffer_unique).end(), std::get<1>(vertex_pair_buffer_unique).begin()); - + cugraph::transform_e( handle, cur_graph_view, @@ -476,36 +471,39 @@ void decrease_triangle_count(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), - [ - edge_buffer_first = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - edge_buffer_last = get_dataframe_buffer_end(vertex_pair_buffer_unique), - decrease_count = decrease_count.data() - ] - __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) { - - auto e = thrust::make_tuple(src, dst); - auto itr_pair = thrust::lower_bound( - thrust::seq, edge_buffer_first, edge_buffer_last, e); + [edge_buffer_first = get_dataframe_buffer_begin(vertex_pair_buffer_unique), + edge_buffer_last = get_dataframe_buffer_end(vertex_pair_buffer_unique), + decrease_count = decrease_count.data()] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + edge_t count) { + auto e = thrust::make_tuple(src, dst); + auto itr_pair = thrust::lower_bound(thrust::seq, edge_buffer_first, edge_buffer_last, e); auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); return count - decrease_count[idx_pair]; }, edge_triangle_counts.mutable_view(), - true); // FIXME: set expensive check to False - + true); // FIXME: set expensive check to False }; -template -std::tuple -accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, - graph_view_t & graph_view, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - size_t prev_chunk_size, - size_t chunk_size, - bool do_expensive_check) { - - auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); +template +std::tuple accumulate_triangles_p_q_or_q_r( + raft::handle_t const& handle, + graph_view_t& graph_view, + raft::device_span weak_edgelist_srcs, + raft::device_span weak_edgelist_dsts, + size_t prev_chunk_size, + size_t chunk_size, + bool do_expensive_check) +{ + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); auto [intersection_offsets, intersection_indices] = detail::nbr_intersection(handle, @@ -514,12 +512,11 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, weak_edgelist_first + prev_chunk_size, weak_edgelist_first + prev_chunk_size + chunk_size, std::array{true, true}, - //do_expensive_check : FIXME - true); + // do_expensive_check : FIXME + true); - auto vertex_pair_buffer_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>( + intersection_indices.size(), handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), @@ -527,47 +524,42 @@ accumulate_triangles_p_q_or_q_r(raft::handle_t const& handle, get_dataframe_buffer_end(vertex_pair_buffer_p_q), generate_p_q{ prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), weak_edgelist_srcs, - weak_edgelist_dsts - }); - + weak_edgelist_dsts}); + auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), generate_p_r_or_q_r_from_p_q{ prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), weak_edgelist_srcs, weak_edgelist_dsts}); - + auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); + allocate_dataframe_buffer>(intersection_indices.size(), + handle.get_stream()); thrust::tabulate( handle.get_thrust_policy(), get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), generate_p_r_or_q_r_from_p_q{ prev_chunk_size, - raft::device_span(intersection_offsets.data(), - intersection_offsets.size()), - raft::device_span(intersection_indices.data(), - intersection_indices.size()), + raft::device_span(intersection_offsets.data(), intersection_offsets.size()), + raft::device_span(intersection_indices.data(), intersection_indices.size()), weak_edgelist_srcs, weak_edgelist_dsts}); - return std::make_tuple(std::move(vertex_pair_buffer_p_q), std::move(vertex_pair_buffer_p_r_edge_p_q), std::move(vertex_pair_buffer_q_r_edge_p_q)); + return std::make_tuple(std::move(vertex_pair_buffer_p_q), + std::move(vertex_pair_buffer_p_r_edge_p_q), + std::move(vertex_pair_buffer_q_r_edge_p_q)); } } // namespace @@ -582,7 +574,6 @@ k_truss(raft::handle_t const& handle, edge_t k, bool do_expensive_check) { - // 1. Check input arguments. CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -653,11 +644,11 @@ k_truss(raft::handle_t const& handle, raft::device_span core_number_span{core_numbers.data(), core_numbers.size()}; auto [srcs, dsts, wgts] = k_core(handle, - cur_graph_view, - edge_weight_view, - k - 1, - std::make_optional(k_core_degree_type_t::OUT), - std::make_optional(core_number_span)); + cur_graph_view, + edge_weight_view, + k - 1, + std::make_optional(k_core_degree_type_t::OUT), + std::make_optional(core_number_span)); if constexpr (multi_gpu) { std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = @@ -779,86 +770,106 @@ k_truss(raft::handle_t const& handle, edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - - auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view); + + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view); cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); while (true) { // extract the edges that have counts less than k - 2. Those edges will be unrolled - auto [weak_edgelist_srcs, weak_edgelist_dsts] = extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - // FIXME: Replace by lambda function - extract_weak_edges{k}); - - + auto [weak_edgelist_srcs, weak_edgelist_dsts] = + extract_transform_e(handle, + cur_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + // FIXME: Replace by lambda function + extract_weak_edges{k}); + auto num_weak_edges = weak_edgelist_srcs.size(); if constexpr (multi_gpu) { - num_weak_edges = host_scalar_allreduce(handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); + num_weak_edges = host_scalar_allreduce( + handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); } if (num_weak_edges == 0) { break; } - auto weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_srcs.size()); + auto weak_edgelist_first = + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_srcs.size()); // Find intersection edges - size_t prev_chunk_size = 0; + size_t prev_chunk_size = 0; size_t num_remaining_weak_edges = weak_edgelist_srcs.size(); size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); auto num_chunks = raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); if constexpr (multi_gpu) { - num_chunks = host_scalar_allreduce(handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); + num_chunks = host_scalar_allreduce( + handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); } for (size_t i = 0; i < num_chunks; ++i) { auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); - auto [vertex_pair_buffer_p_q, vertex_pair_buffer_p_r_edge_p_q, vertex_pair_buffer_q_r_edge_p_q] = accumulate_triangles_p_q_or_q_r>(size_t{0}, handle.get_stream())), multi_gpu>( - handle, - cur_graph_view, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - prev_chunk_size, - chunk_size, - do_expensive_check); - + auto [vertex_pair_buffer_p_q, + vertex_pair_buffer_p_r_edge_p_q, + vertex_pair_buffer_q_r_edge_p_q] = + accumulate_triangles_p_q_or_q_r< + vertex_t, + edge_t, + weight_t, + decltype(allocate_dataframe_buffer>( + size_t{0}, handle.get_stream())), + multi_gpu>( + handle, + cur_graph_view, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + prev_chunk_size, + chunk_size, + do_expensive_check); + rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_dsts(0, handle.get_stream()); - - // Shuffle edges if constexpr (multi_gpu) { // FIXME: Check whether we need to shuffle (p, q) edges - std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, vertex_pair_buffer_p_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, + vertex_pair_buffer_p_r_edge_p_q_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); - std::tie(vertex_pair_buffer_q_r_edge_p_q_srcs, vertex_pair_buffer_q_r_edge_p_q_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_q_r_edge_p_q_srcs, + vertex_pair_buffer_q_r_edge_p_q_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + edge_t, + weight_t, + int32_t>( handle, std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), @@ -867,130 +878,157 @@ k_truss(raft::handle_t const& handle, std::nullopt, cur_graph_view.vertex_partition_range_lasts()); } - + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), std::get<1>(vertex_pair_buffer_p_q).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), + std::get<0>(vertex_pair_buffer_p_q).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), + std::get<1>(vertex_pair_buffer_p_q).size())); decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), vertex_pair_buffer_p_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_p_r_edge_p_q).size()), // FIXME: Make sure multi_gpu is properly handles - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), vertex_pair_buffer_p_r_edge_p_q_dsts.size()) : raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_p_r_edge_p_q).size()) // FIXME: Make sure multi_gpu is properly handles - ); + handle, + cur_graph_view, + edge_triangle_counts, + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), + vertex_pair_buffer_p_r_edge_p_q_srcs.size()) + : raft::device_span( + std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_p_q) + .size()), // FIXME: Make sure multi_gpu is properly handles + multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), + vertex_pair_buffer_p_r_edge_p_q_dsts.size()) + : raft::device_span( + std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_p_q) + .size()) // FIXME: Make sure multi_gpu is properly handles + ); decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), vertex_pair_buffer_q_r_edge_p_q_srcs.size()) : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - multi_gpu ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), vertex_pair_buffer_q_r_edge_p_q_dsts.size()) : raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size()) - ); - + handle, + cur_graph_view, + edge_triangle_counts, + multi_gpu + ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), + vertex_pair_buffer_q_r_edge_p_q_srcs.size()) + : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), + multi_gpu + ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), + vertex_pair_buffer_q_r_edge_p_q_dsts.size()) + : raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size())); + prev_chunk_size += chunk_size; num_remaining_weak_edges -= chunk_size; - } // Iterate over unique weak edges' endpoints that appear as either q or r - rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), handle.get_stream()); - rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), handle.get_stream()); - + rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), + handle.get_stream()); + rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), + handle.get_stream()); + // Get unique srcs and dsts thrust::copy(handle.get_thrust_policy(), weak_edgelist_srcs.begin(), weak_edgelist_srcs.end(), - unique_weak_edgelist_srcs.begin() - ); + unique_weak_edgelist_srcs.begin()); thrust::copy(handle.get_thrust_policy(), weak_edgelist_dsts.begin(), weak_edgelist_dsts.end(), - unique_weak_edgelist_dsts.begin() - ); - - thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_srcs.begin(), unique_weak_edgelist_srcs.end()); // No need to sort the 'dst' since they are already sorted - - thrust::sort(handle.get_thrust_policy(), unique_weak_edgelist_dsts.begin(), unique_weak_edgelist_dsts.end()); - - auto unique_srcs_end = thrust::unique( - handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs.end()); - - auto unique_dsts_end = thrust::unique( - handle.get_thrust_policy(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end()); - - auto num_unique_weak_edgelist_srcs = thrust::distance(unique_weak_edgelist_srcs.begin(), unique_srcs_end); - auto num_unique_weak_edgelist_dsts = thrust::distance(unique_weak_edgelist_dsts.begin(), unique_dsts_end); + unique_weak_edgelist_dsts.begin()); + + thrust::sort(handle.get_thrust_policy(), + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs + .end()); // No need to sort the 'dst' since they are already sorted + + thrust::sort(handle.get_thrust_policy(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end()); + + auto unique_srcs_end = thrust::unique(handle.get_thrust_policy(), + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs.end()); + + auto unique_dsts_end = thrust::unique(handle.get_thrust_policy(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end()); + + auto num_unique_weak_edgelist_srcs = + thrust::distance(unique_weak_edgelist_srcs.begin(), unique_srcs_end); + auto num_unique_weak_edgelist_dsts = + thrust::distance(unique_weak_edgelist_dsts.begin(), unique_dsts_end); unique_weak_edgelist_srcs.resize(num_unique_weak_edgelist_srcs, handle.get_stream()); unique_weak_edgelist_dsts.resize(num_unique_weak_edgelist_dsts, handle.get_stream()); // Create a vertex set composed of edge endpoints that are either in the q or r set - rmm::device_uvector vertex_q_r_set(num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); + rmm::device_uvector vertex_q_r_set( + num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); auto vertex_q_r_end = thrust::set_union(handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs.end(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end(), - vertex_q_r_set.begin()); - - vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), vertex_q_r_end), handle.get_stream()); + unique_weak_edgelist_srcs.begin(), + unique_weak_edgelist_srcs.end(), + unique_weak_edgelist_dsts.begin(), + unique_weak_edgelist_dsts.end(), + vertex_q_r_set.begin()); + + vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), vertex_q_r_end), + handle.get_stream()); thrust::sort(handle.get_thrust_policy(), vertex_q_r_set.begin(), vertex_q_r_set.end()); - auto weak_unique_v_end = thrust::unique( - handle.get_thrust_policy(), - vertex_q_r_set.begin(), - vertex_q_r_set.end()); - - vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); + auto weak_unique_v_end = + thrust::unique(handle.get_thrust_policy(), vertex_q_r_set.begin(), vertex_q_r_set.end()); + + vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), weak_unique_v_end), + handle.get_stream()); if constexpr (multi_gpu) { auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); // Perform all-to-all in chunks across minor comm auto major_vertex_q_r_set = cugraph::detail::device_allgatherv( - handle, handle.get_comms(), raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); - - thrust::sort(handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); + handle, + handle.get_comms(), + raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); + + thrust::sort( + handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); weak_unique_v_end = thrust::unique( - handle.get_thrust_policy(), - major_vertex_q_r_set.begin(), - major_vertex_q_r_set.end()); - - major_vertex_q_r_set.resize(thrust::distance(major_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); + handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); + + major_vertex_q_r_set.resize( + thrust::distance(major_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); vertex_q_r_set.resize(major_vertex_q_r_set.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - major_vertex_q_r_set.begin(), - major_vertex_q_r_set.end(), - vertex_q_r_set.begin()); + + thrust::copy(handle.get_thrust_policy(), + major_vertex_q_r_set.begin(), + major_vertex_q_r_set.end(), + vertex_q_r_set.begin()); } - weak_edgelist_first = thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? + weak_edgelist_first = thrust::make_zip_iterator( + weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? - auto [srcs_in_q_r_set, dsts_in_q_r_set] = extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - // FIXME: Lambda function instead of functor - extract_edges_to_q_r{raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())}); - + auto [srcs_in_q_r_set, dsts_in_q_r_set] = + extract_transform_e(handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + // FIXME: Lambda function instead of functor + extract_edges_to_q_r{raft::device_span( + vertex_q_r_set.data(), vertex_q_r_set.size())}); if constexpr (multi_gpu) { - std::tie(dsts_in_q_r_set, srcs_in_q_r_set, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie( + dsts_in_q_r_set, srcs_in_q_r_set, std::ignore, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning> graph_q_r{std::nullopt}; std::optional> renumber_map_q_r{std::nullopt}; std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = @@ -1019,20 +1057,25 @@ k_truss(raft::handle_t const& handle, auto csc_q_r_graph_view = (*graph_q_r).view(); - rmm::device_uvector renumbered_weak_edgelist_srcs( - weak_edgelist_srcs.size(), handle.get_stream()); - rmm::device_uvector renumbered_weak_edgelist_dsts( - weak_edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_srcs(weak_edgelist_srcs.size(), + handle.get_stream()); + rmm::device_uvector renumbered_weak_edgelist_dsts(weak_edgelist_srcs.size(), + handle.get_stream()); thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), renumbered_weak_edgelist_dsts.begin())); + thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), + renumbered_weak_edgelist_dsts.begin())); if constexpr (multi_gpu) { - std::tie( - renumbered_weak_edgelist_srcs, renumbered_weak_edgelist_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(renumbered_weak_edgelist_srcs, + renumbered_weak_edgelist_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning>(size_t{0}, handle.get_stream())), multi_gpu>( - handle, - csc_q_r_graph_view, - raft::device_span(renumbered_weak_edgelist_srcs.data(), renumbered_weak_edgelist_srcs.size()), - raft::device_span(renumbered_weak_edgelist_dsts.data(), renumbered_weak_edgelist_dsts.size()), - prev_chunk_size, - chunk_size, - do_expensive_check); - + auto [vertex_pair_buffer_q_r, + vertex_pair_buffer_p_q_edge_q_r, + vertex_pair_buffer_p_r_edge_q_r] = + accumulate_triangles_p_q_or_q_r< + vertex_t, + edge_t, + weight_t, + decltype(allocate_dataframe_buffer>( + size_t{0}, handle.get_stream())), + multi_gpu>(handle, + csc_q_r_graph_view, + raft::device_span(renumbered_weak_edgelist_srcs.data(), + renumbered_weak_edgelist_srcs.size()), + raft::device_span(renumbered_weak_edgelist_dsts.data(), + renumbered_weak_edgelist_dsts.size()), + prev_chunk_size, + chunk_size, + do_expensive_check); + // Unrenumber auto vertex_partition_range_lasts = std::make_optional>( - csc_q_r_graph_view.vertex_partition_range_lasts()); + csc_q_r_graph_view.vertex_partition_range_lasts()); unrenumber_int_vertices( handle, @@ -1126,19 +1180,19 @@ k_truss(raft::handle_t const& handle, true); unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_q_r).data(), - std::get<0>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); + std::get<0>(vertex_pair_buffer_q_r).data(), + std::get<0>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_q_r).data(), - std::get<1>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - + std::get<1>(vertex_pair_buffer_q_r).data(), + std::get<1>(vertex_pair_buffer_q_r).size(), + (*renumber_map_q_r).data(), + *vertex_partition_range_lasts, + true); + if constexpr (multi_gpu) { // Get global weak edges auto& comm = handle.get_comms(); @@ -1146,105 +1200,137 @@ k_truss(raft::handle_t const& handle, // Get global weak_edgelist // FIXME: This operation is too expensive (memory) hence shuffle the weak edges instead to - // the appropriate GPU, check for existance as being part of the weak edge list and shuffle - // the result back. The operation below is only meant for validation purposes and should be - // remove once the statement is validated. + // the appropriate GPU, check for existance as being part of the weak edge list and + // shuffle the result back. The operation below is only meant for validation purposes and + // should be remove once the statement is validated. auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + handle, + comm, + raft::device_span(weak_edgelist_srcs.data(), + weak_edgelist_srcs.size())); auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - + handle, + comm, + raft::device_span(weak_edgelist_dsts.data(), + weak_edgelist_dsts.size())); + // Sort the weak edges if they are not already - auto chunk_global_weak_edgelist_first = - thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + auto chunk_global_weak_edgelist_first = thrust::make_zip_iterator( + global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); thrust::sort(handle.get_thrust_policy(), chunk_global_weak_edgelist_first, chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts() - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + auto num_edges_not_overcomp = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), + true, + multi_gpu, + true // FIXME: Currently using global weak edges for validation purposes + >( + handle, + size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the + // original one so overcompensatiing edges can be + // removed + get_dataframe_buffer_begin( + vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the + // original one so overcompensatiing edges can be + // removed + raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts()); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); // Resize initial (q, r) edges // Note: Once chunking is implemented, reconstruct the (q, r) edges only outside - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - // of the chunk's 'for loop' - resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the + // triangle of the chunk's 'for loop' + resize_dataframe_buffer( + vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - // Reconstruct (q, r) edges that didn't already have their count updated + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the + // triangle Reconstruct (q, r) edges that didn't already have their count updated thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no + // overcompensation ************************************ get_dataframe_buffer_end(vertex_pair_buffer_q_r), - [ - vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); + [vertex_pair_buffer_p_q_edge_q_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + vertex_pair_buffer_p_r_edge_q_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), + thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); }); } else { - auto num_edges_not_overcomp = - remove_overcompensating_edges( - handle, - size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts() // Not needed for SG - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - - // resize initial (q, r) edges - resize_dataframe_buffer(vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); + auto num_edges_not_overcomp = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), + true, + multi_gpu, + true>( + handle, + size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the + // original one so overcompensatiing edges can be + // removed + get_dataframe_buffer_begin( + vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the + // original one so overcompensatiing edges can be + // removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span( + weak_edgelist_srcs.data(), + weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span( + weak_edgelist_dsts.data(), + weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts() // Not needed for SG + ); + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); + // resize initial (q, r) edges + resize_dataframe_buffer( + vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); - // Reconstruct (q, r) edges that didn't already have their count updated - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the + // triangle thrust::tabulate( handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no overcompensation ************************************ + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no + // overcompensation ************************************ get_dataframe_buffer_end(vertex_pair_buffer_q_r), - [ - vertex_pair_buffer_p_q_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - vertex_pair_buffer_p_r_edge_q_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); + [vertex_pair_buffer_p_q_edge_q_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), + vertex_pair_buffer_p_r_edge_q_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), + thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); }); } @@ -1253,555 +1339,592 @@ k_truss(raft::handle_t const& handle, rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - if constexpr (multi_gpu) { + if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_q_r_srcs, vertex_pair_buffer_q_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r)), - std::move(std::get<1>(vertex_pair_buffer_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - + + std::tie(vertex_pair_buffer_q_r_srcs, + vertex_pair_buffer_q_r_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_q_r)), + std::move(std::get<1>(vertex_pair_buffer_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_q_r_srcs.data(), vertex_pair_buffer_q_r_srcs.size()), - raft::device_span(vertex_pair_buffer_q_r_dsts.data(), vertex_pair_buffer_q_r_dsts.size()) - ); + raft::device_span(vertex_pair_buffer_q_r_srcs.data(), + vertex_pair_buffer_q_r_srcs.size()), + raft::device_span(vertex_pair_buffer_q_r_dsts.data(), + vertex_pair_buffer_q_r_dsts.size())); // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, vertex_pair_buffer_p_q_edge_q_r_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_q_edge_q_r)), - std::move(std::get<0>(vertex_pair_buffer_p_q_edge_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, + handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, + handle.get_stream()); + + std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, + vertex_pair_buffer_p_q_edge_q_r_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_q_edge_q_r)), + std::move(std::get<0>(vertex_pair_buffer_p_q_edge_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_q_edge_q_r_dsts.data(), vertex_pair_buffer_p_q_edge_q_r_dsts.size()), - raft::device_span(vertex_pair_buffer_p_q_edge_q_r_srcs.data(), vertex_pair_buffer_p_q_edge_q_r_srcs.size()) - ); + raft::device_span(vertex_pair_buffer_p_q_edge_q_r_dsts.data(), + vertex_pair_buffer_p_q_edge_q_r_dsts.size()), + raft::device_span(vertex_pair_buffer_p_q_edge_q_r_srcs.data(), + vertex_pair_buffer_p_q_edge_q_r_srcs.size())); // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, vertex_pair_buffer_p_r_edge_q_r_srcs, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, + handle.get_stream()); + rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, + handle.get_stream()); + + std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, + vertex_pair_buffer_p_r_edge_q_r_srcs, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), + std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_r_edge_q_r_dsts.data(), vertex_pair_buffer_p_r_edge_q_r_dsts.size()), - raft::device_span(vertex_pair_buffer_p_r_edge_q_r_srcs.data(), vertex_pair_buffer_p_r_edge_q_r_srcs.size()) - ); - + raft::device_span(vertex_pair_buffer_p_r_edge_q_r_dsts.data(), + vertex_pair_buffer_p_r_edge_q_r_dsts.size()), + raft::device_span(vertex_pair_buffer_p_r_edge_q_r_srcs.data(), + vertex_pair_buffer_p_r_edge_q_r_srcs.size())); + } else { decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), std::get<0>(vertex_pair_buffer_q_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), std::get<1>(vertex_pair_buffer_q_r).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), + std::get<0>(vertex_pair_buffer_q_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), + std::get<1>(vertex_pair_buffer_q_r).size())); decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size()) - ); + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size())); decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size()) - ); + raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), + raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), + std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size())); } - + prev_chunk_size += chunk_size; num_remaining_weak_edges -= chunk_size; - - } weak_edgelist_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); + thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // Unrolling p, r edges // create pair weak_src, weak_edge_idx (unique) // create a dataframe buffer of size weak_edge_size // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs - // with a vector counting from 0 .. - - auto vertex_pair_buffer_p_tag = - allocate_dataframe_buffer>(weak_edgelist_srcs.size(), - handle.get_stream()); + // with a vector counting from 0 .. + + auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>( + weak_edgelist_srcs.size(), handle.get_stream()); if constexpr (multi_gpu) { std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; rmm::device_uvector num_weak_edges(1, handle.get_stream()); - raft::update_device(num_weak_edges.data(), h_num_weak_edges.data(), h_num_weak_edges.size(), handle.get_stream()); - - auto& comm = handle.get_comms(); + raft::update_device(num_weak_edges.data(), + h_num_weak_edges.data(), + h_num_weak_edges.size(), + handle.get_stream()); + + auto& comm = handle.get_comms(); auto const comm_rank = comm.get_rank(); - // Get global weak_edgelist + // Get global weak_edgelist auto global_num_weak_edges = cugraph::detail::device_allgatherv( handle, comm, raft::device_span(num_weak_edges.data(), num_weak_edges.size())); - - rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), handle.get_stream()); + + rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), + handle.get_stream()); thrust::inclusive_scan(handle.get_thrust_policy(), - global_num_weak_edges.begin(), - global_num_weak_edges.end(), - prefix_sum_global_num_weak_edges.begin()); - + global_num_weak_edges.begin(), + global_num_weak_edges.end(), + prefix_sum_global_num_weak_edges.begin()); + thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [rank = comm_rank, - num_weak_edges = prefix_sum_global_num_weak_edges.begin(), - p = weak_edgelist_srcs.begin()] __device__(auto idx) { - if (rank != 0) { - auto idx_tag = idx + (num_weak_edges[rank - 1]); - return thrust::make_tuple(p[idx], idx_tag); - } - - return thrust::make_tuple(p[idx], idx); - }); - + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [rank = comm_rank, + num_weak_edges = prefix_sum_global_num_weak_edges.begin(), + p = weak_edgelist_srcs.begin()] __device__(auto idx) { + if (rank != 0) { + auto idx_tag = idx + (num_weak_edges[rank - 1]); + return thrust::make_tuple(p[idx], idx_tag); + } + + return thrust::make_tuple(p[idx], idx); + }); + } else { - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [ - p = weak_edgelist_srcs.begin() - ] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); + thrust::tabulate(handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), + get_dataframe_buffer_end(vertex_pair_buffer_p_tag), + [p = weak_edgelist_srcs.begin()] __device__(auto idx) { + return thrust::make_tuple(p[idx], idx); + }); } - + vertex_frontier_t vertex_frontier(handle, 1); rmm::device_uvector tag_cpy(0, handle.get_stream()); if constexpr (multi_gpu) { - tag_cpy.resize( - std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); + tag_cpy.resize(std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); // Need a copy before shuffling the original tag - thrust::copy( - handle.get_thrust_policy(), - std::get<1>(vertex_pair_buffer_p_tag).begin(), - std::get<1>(vertex_pair_buffer_p_tag).end(), - tag_cpy.begin()); + thrust::copy(handle.get_thrust_policy(), + std::get<1>(vertex_pair_buffer_p_tag).begin(), + std::get<1>(vertex_pair_buffer_p_tag).end(), + tag_cpy.begin()); // Shuffle vertices auto [p_vrtx, p_tag] = - detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_tag)), - std::move(std::get<1>(vertex_pair_buffer_p_tag)), - cur_graph_view.vertex_partition_range_lasts()); + detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, + std::move(std::get<0>(vertex_pair_buffer_p_tag)), + std::move(std::get<1>(vertex_pair_buffer_p_tag)), + cur_graph_view.vertex_partition_range_lasts()); - - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(p_vrtx.begin(), p_tag.begin()), - thrust::make_zip_iterator(p_vrtx.end(), p_tag.end()) - ); + vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(p_vrtx.begin(), p_tag.begin()), + thrust::make_zip_iterator(p_vrtx.end(), p_tag.end())); } else { vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), std::get<1>(vertex_pair_buffer_p_tag).end()) - ); - + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), + std::get<1>(vertex_pair_buffer_p_tag).begin()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), + std::get<1>(vertex_pair_buffer_p_tag).end())); } rmm::device_uvector q(0, handle.get_stream()); rmm::device_uvector idx(0, handle.get_stream()); - std::tie(q, idx) = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx{}, - true); - + std::tie(q, idx) = cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx{}, + true); + vertex_frontier.bucket(0).clear(); if constexpr (multi_gpu) { // Shuffle vertices std::tie(q, idx) = - detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( - handle, - std::move(q), - std::move(idx), - cur_graph_view.vertex_partition_range_lasts()); + detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(q), std::move(idx), cur_graph_view.vertex_partition_range_lasts()); } - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end()) - ); + vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(q.begin(), idx.begin()), + thrust::make_zip_iterator(q.end(), idx.end())); auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(0, - handle.get_stream()); - + allocate_dataframe_buffer>(0, handle.get_stream()); + auto vertex_pair_buffer_p_q_edge_p_r = - allocate_dataframe_buffer>(0, - handle.get_stream()); - + allocate_dataframe_buffer>(0, handle.get_stream()); + auto vertex_pair_buffer_q_r_edge_p_r = - allocate_dataframe_buffer>(0, - handle.get_stream()); + allocate_dataframe_buffer>(0, handle.get_stream()); // Get chunk global weak edges if constexpr (multi_gpu) { - // Get major weak edges - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, major_comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, major_comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - auto major_weak_edgelist_tags = cugraph::detail::device_allgatherv( - handle, major_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); - - auto major_weak_edgelist_first = - thrust::make_zip_iterator(major_weak_edgelist_srcs.begin(), major_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused - - auto major_weak_edgelist_dsts_tags_first = - thrust::make_zip_iterator(major_weak_edgelist_dsts.begin(), major_weak_edgelist_tags.begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), - major_weak_edgelist_srcs.begin() - ); - - // FIXME: 'idx_closing' no longer needed - remove it - auto [q_closing, r_closing, p_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ - major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), - raft::device_span(major_weak_edgelist_srcs.data(), major_weak_edgelist_srcs.size()), - }, - true); - - - resize_dataframe_buffer(vertex_pair_buffer_p_r, - q_closing.size(), - handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), r_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_p_r).begin(), std::get<1>(vertex_pair_buffer_p_r).begin()) - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, - q_closing.size(), - handle.get_stream()); - - thrust::copy( + // Get major weak edges + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, + major_comm, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, + major_comm, + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + auto major_weak_edgelist_tags = cugraph::detail::device_allgatherv( + handle, major_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); + + auto major_weak_edgelist_first = thrust::make_zip_iterator( + major_weak_edgelist_srcs.begin(), + major_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused + + auto major_weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( + major_weak_edgelist_dsts.begin(), major_weak_edgelist_tags.begin()); + + thrust::sort_by_key(handle.get_thrust_policy(), + major_weak_edgelist_dsts_tags_first, + major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), + major_weak_edgelist_srcs.begin()); + + // FIXME: 'idx_closing' no longer needed - remove it + auto [q_closing, r_closing, p_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{ + major_weak_edgelist_dsts_tags_first, + major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), + raft::device_span(major_weak_edgelist_srcs.data(), + major_weak_edgelist_srcs.size()), + }, + true); + + resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), r_closing.end()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r).begin(), + std::get<1>(vertex_pair_buffer_p_r).begin())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, q_closing.size(), handle.get_stream()); + + thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), thrust::make_zip_iterator(p_closing.end(), q_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin()) - ); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), + std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin())); - - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, - q_closing.size(), - handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, q_closing.size(), handle.get_stream()); - thrust::copy( + thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), thrust::make_zip_iterator(q_closing.end(), r_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) - ); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin())); - auto& comm = handle.get_comms(); // FIXME: Only using global comm for testing purposes - // Get global weak_edgelist - // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, comm, raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + auto& comm = handle.get_comms(); // FIXME: Only using global comm for testing purposes + // Get global weak_edgelist + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); + // FIXME: Perform all-to-all in chunks + auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( + handle, + comm, + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); + + // Sort the weak edges if they are not already + auto chunk_global_weak_edgelist_first = thrust::make_zip_iterator( + global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + + thrust::sort(handle.get_thrust_policy(), + chunk_global_weak_edgelist_first, + chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); + + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), + false, + multi_gpu, + true // FIXME: Currently using global weak edges for validation purposes + >(handle, + q_closing.size(), + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_p_r), // cannot be a copy, needs to be the original one + // so overcompensatiing edges can be removed + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r_edge_p_r), // cannot be a copy, needs to be the original one + // so overcompensatiing edges can be removed + raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts()); - // Sort the weak edges if they are not already - auto chunk_global_weak_edgelist_first = - thrust::make_zip_iterator(global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); + // FIXME: No need to resize the dataframes buffer now. + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + // FIXME: No need to resize the dataframes buffer now. + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), + false, + multi_gpu, + true // FIXME: Currently using global weak edges for validation purposes + >( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + raft::device_span(global_weak_edgelist_srcs.data(), + global_weak_edgelist_srcs.size()), + raft::device_span(global_weak_edgelist_dsts.data(), + global_weak_edgelist_dsts.size()), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts()); - thrust::sort(handle.get_thrust_policy(), - chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts() - ); - - - - // FIXME: No need to resize the dataframes buffer now. - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - - // FIXME: No need to resize the dataframes buffer now. - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(global_weak_edgelist_srcs.data(), global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - // Reconstruct (p, r) edges that didn't already have their count updated - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - vertex_pair_buffer_q_r_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); - }); - - } else { - - // FIXME: refactor SG to use r_closing - - auto weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( - weak_edgelist_dsts.begin(), std::get<1>(vertex_pair_buffer_p_tag).begin() - ); - - thrust::sort_by_key(handle.get_thrust_policy(), - weak_edgelist_dsts_tags_first, - weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), - //major_weak_edgelist_srcs.begin() - weak_edgelist_srcs.begin() - ); - - auto [q_closing, r_closing, p_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ - weak_edgelist_dsts_tags_first, - weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - }, - do_expensive_check); - - // FIXME: Move the 3 copies to a function as it is also performed for MG - // extract pair (p, r) - resize_dataframe_buffer(vertex_pair_buffer_p_r, - q_closing.size(), - handle.get_stream()); - thrust::copy( + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + + // Reconstruct (p, r) edges that didn't already have their count updated + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the + // triangle + resize_dataframe_buffer( + vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), r_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_p_r).begin(), std::get<1>(vertex_pair_buffer_p_r).begin()) - ); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, - q_closing.size(), - handle.get_stream()); - - // extract pair (p, q) - thrust::copy( + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [vertex_pair_buffer_p_q_edge_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + vertex_pair_buffer_q_r_edge_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), + thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); + }); + + } else { + // FIXME: refactor SG to use r_closing + + auto weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( + weak_edgelist_dsts.begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()); + + thrust::sort_by_key(handle.get_thrust_policy(), + weak_edgelist_dsts_tags_first, + weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), + // major_weak_edgelist_srcs.begin() + weak_edgelist_srcs.begin()); + + auto [q_closing, r_closing, p_closing, idx_closing] = + cugraph::extract_transform_v_frontier_outgoing_e( + handle, + cur_graph_view, + vertex_frontier.bucket(0), + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + extract_q_idx_closing{ + weak_edgelist_dsts_tags_first, + weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + }, + do_expensive_check); + + // FIXME: Move the 3 copies to a function as it is also performed for MG + // extract pair (p, r) + resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), + thrust::make_zip_iterator(p_closing.end(), r_closing.end()), + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r).begin(), + std::get<1>(vertex_pair_buffer_p_r).begin())); + + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, q_closing.size(), handle.get_stream()); + + // extract pair (p, q) + thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), thrust::make_zip_iterator(p_closing.end(), q_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin()) - ); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), + std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin())); - // extract pair (q, r) - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, - q_closing.size(), - handle.get_stream()); + // extract pair (q, r) + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, q_closing.size(), handle.get_stream()); - thrust::copy( + thrust::copy( handle.get_thrust_policy(), thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), thrust::make_zip_iterator(q_closing.end(), r_closing.end()), - thrust::make_zip_iterator( - std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin()) - ); + thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin())); + + // weak_edgelist_first + thrust::sort(handle.get_thrust_policy(), + weak_edgelist_first, + weak_edgelist_first + weak_edgelist_dsts.size()); + + auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), + false, + multi_gpu, + true>( + handle, + q_closing.size(), + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span( + weak_edgelist_srcs.data(), + weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span( + weak_edgelist_dsts.data(), + weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts()); - // weak_edgelist_first - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_dsts.size() - ); - - auto num_edges_not_overcomp_p_q = - remove_overcompensating_edges( - handle, - q_closing.size(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); + + auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< + vertex_t, + edge_t, + decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), + false, + multi_gpu, + true>( + handle, + num_edges_not_overcomp_p_q, + get_dataframe_buffer_begin( + vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + get_dataframe_buffer_begin( + vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original + // one so overcompensatiing edges can be removed + raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), + raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), + raft::device_span( + weak_edgelist_srcs.data(), + weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes + raft::device_span( + weak_edgelist_dsts.data(), + weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes + cur_graph_view.number_of_local_edge_partitions(), + cur_graph_view.vertex_partition_range_lasts()); - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = - remove_overcompensating_edges( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original one so overcompensatiing edges can be removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer(vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer(vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - // Reconstruct (p, r) edges that didn't already have their count updated. - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the triangle - resize_dataframe_buffer(vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [ - vertex_pair_buffer_p_q_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - vertex_pair_buffer_q_r_edge_p_r = get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r) - ] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); - }); + resize_dataframe_buffer( + vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + resize_dataframe_buffer( + vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - } - - if constexpr (multi_gpu) { + // Reconstruct (p, r) edges that didn't already have their count updated. + // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the + // triangle + resize_dataframe_buffer( + vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + get_dataframe_buffer_begin(vertex_pair_buffer_p_r), + get_dataframe_buffer_end(vertex_pair_buffer_p_r), + [vertex_pair_buffer_p_q_edge_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), + vertex_pair_buffer_q_r_edge_p_r = + get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r)] __device__(auto i) { + return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), + thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); + }); + } + + if constexpr (multi_gpu) { // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_r_dsts(0, handle.get_stream()); - std::tie(vertex_pair_buffer_p_r_srcs, vertex_pair_buffer_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(vertex_pair_buffer_p_r_srcs, + vertex_pair_buffer_p_r_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + edge_t, + weight_t, + int32_t>( handle, std::move(std::get<0>(vertex_pair_buffer_p_r)), std::move(std::get<1>(vertex_pair_buffer_p_r)), @@ -1814,99 +1937,122 @@ k_truss(raft::handle_t const& handle, handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_r_srcs.data(), vertex_pair_buffer_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_p_r_dsts.data(), vertex_pair_buffer_p_r_dsts.size()) - ); + raft::device_span(vertex_pair_buffer_p_r_srcs.data(), + vertex_pair_buffer_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_p_r_dsts.data(), + vertex_pair_buffer_p_r_dsts.size())); // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_p_q_edge_p_r_srcs, vertex_pair_buffer_p_q_edge_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>(vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - + + std::tie(vertex_pair_buffer_p_q_edge_p_r_srcs, + vertex_pair_buffer_p_q_edge_p_r_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>( + vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to + // vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>( + vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to + // vertex_pair_buffer_p_q_edge_p_r for consistency + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_q_edge_p_r_srcs.data(), vertex_pair_buffer_p_q_edge_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), vertex_pair_buffer_p_q_edge_p_r_dsts.size()) - ); + raft::device_span(vertex_pair_buffer_p_q_edge_p_r_srcs.data(), + vertex_pair_buffer_p_q_edge_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), + vertex_pair_buffer_p_q_edge_p_r_dsts.size())); // Shuffle before updating count rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_srcs(0, handle.get_stream()); rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_q_r_edge_p_r_srcs, vertex_pair_buffer_q_r_edge_p_r_dsts, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to vertex_pair_buffer_p_q_edge_p_r for consistency - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); + + std::tie(vertex_pair_buffer_q_r_edge_p_r_srcs, + vertex_pair_buffer_q_r_edge_p_r_dsts, + std::ignore, + std::ignore, + std::ignore, + std::ignore) = + detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(std::get<0>( + vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to + // vertex_pair_buffer_p_q_edge_p_r for consistency + std::move(std::get<1>( + vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to + // vertex_pair_buffer_p_q_edge_p_r for consistency + std::nullopt, + std::nullopt, + std::nullopt, + cur_graph_view.vertex_partition_range_lasts()); decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), vertex_pair_buffer_q_r_edge_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), vertex_pair_buffer_q_r_edge_p_r_dsts.size()) - ); - + raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), + vertex_pair_buffer_q_r_edge_p_r_srcs.size()), + raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), + vertex_pair_buffer_q_r_edge_p_r_dsts.size())); + } else { decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), std::get<1>(vertex_pair_buffer_p_r).size()) - ); - + raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), + std::get<0>(vertex_pair_buffer_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), + std::get<1>(vertex_pair_buffer_p_r).size())); + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size()) - ); - + raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), + std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), + std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size())); + decrease_triangle_count( handle, cur_graph_view, edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size()) - ); + raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), + std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size()), + raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), + std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size())); } // Mask all the edges that have 0 count cugraph::transform_e( - handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [] __device__( - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count != 0; - }, - edge_mask.mutable_view(), - false); + handle, + cur_graph_view, + cugraph::edge_src_dummy_property_t{}.view(), + cugraph::edge_dst_dummy_property_t{}.view(), + edge_triangle_counts.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count != 0; + }, + edge_mask.mutable_view(), + false); cur_graph_view.attach_edge_mask(edge_mask.view()); } @@ -1922,9 +2068,9 @@ k_truss(raft::handle_t const& handle, edge_weight_view ? std::make_optional(*edge_weight_view) : std::nullopt, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - std::make_optional(raft::device_span((*renumber_map).data(), (*renumber_map).size())) - ); - + std::make_optional( + raft::device_span((*renumber_map).data(), (*renumber_map).size()))); + std::tie(edgelist_srcs, edgelist_dsts, edgelist_wgts) = symmetrize_edgelist(handle, std::move(edgelist_srcs), @@ -1935,6 +2081,5 @@ k_truss(raft::handle_t const& handle, return std::make_tuple( std::move(edgelist_srcs), std::move(edgelist_dsts), std::move(edgelist_wgts)); } - } -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index 1cb50e51300..0cb3df8a23e 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -349,7 +349,8 @@ coarsen_graph(raft::handle_t const& handle, // 1-2. globally shuffle - std::tie(edgelist_majors, edgelist_minors, edgelist_weights, std::ignore, std::ignore, std::ignore) = + std::tie( + edgelist_majors, edgelist_minors, edgelist_weights, std::ignore, std::ignore, std::ignore) = cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning< vertex_t, edge_t, diff --git a/cpp/tests/community/k_truss_test.cpp b/cpp/tests/community/k_truss_test.cpp index 95e06d2e2d3..424d52f2067 100644 --- a/cpp/tests/community/k_truss_test.cpp +++ b/cpp/tests/community/k_truss_test.cpp @@ -323,4 +323,4 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(KTruss_Usecase{12, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(14, 16, 0.57, 0.19, 0.19, 0, true, false)))); -CUGRAPH_TEST_PROGRAM_MAIN() \ No newline at end of file +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 28d018019a9..2bd7219e8ee 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -62,8 +62,7 @@ class Tests_MGKTruss // Compare the results of running KTruss on multiple GPUs to that of a single-GPU run template - void run_current_test(KTruss_Usecase const& k_truss_usecase, - input_usecase_t const& input_usecase) + void run_current_test(KTruss_Usecase const& k_truss_usecase, input_usecase_t const& input_usecase) { using weight_t = float; @@ -105,21 +104,18 @@ class Tests_MGKTruss hr_timer.start("MG KTruss"); } - auto mg_edge_weight_view = edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt; + auto mg_edge_weight_view = + edge_weight ? std::make_optional((*edge_weight).view()) : std::nullopt; auto [d_cugraph_srcs, d_cugraph_dsts, d_cugraph_wgts] = cugraph::k_truss( - *handle_, - mg_graph_view, - mg_edge_weight_view, - k_truss_usecase.k_, - false); + *handle_, mg_graph_view, mg_edge_weight_view, k_truss_usecase.k_, false); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); hr_timer.stop(); hr_timer.display_and_clear(std::cout); - } + } // 3. Compare SG & MG results @@ -132,33 +128,34 @@ class Tests_MGKTruss mg_graph_view.vertex_partition_range_lasts()); cugraph::unrenumber_int_vertices( - *handle_, - d_cugraph_dsts.data(), - d_cugraph_dsts.size(), - (*mg_renumber_map).data(), - mg_graph_view.vertex_partition_range_lasts()); + *handle_, + d_cugraph_dsts.data(), + d_cugraph_dsts.size(), + (*mg_renumber_map).data(), + mg_graph_view.vertex_partition_range_lasts()); auto global_d_cugraph_srcs = cugraph::test::device_gatherv( *handle_, raft::device_span(d_cugraph_srcs.data(), d_cugraph_srcs.size())); - + auto global_d_cugraph_dsts = cugraph::test::device_gatherv( - *handle_, raft::device_span(d_cugraph_dsts.data(), d_cugraph_srcs.size())); - + *handle_, raft::device_span(d_cugraph_dsts.data(), d_cugraph_srcs.size())); + rmm::device_uvector d_sorted_cugraph_srcs{0, handle_->get_stream()}; rmm::device_uvector d_sorted_cugraph_dsts{0, handle_->get_stream()}; rmm::device_uvector d_sorted_cugraph_wgts{0, handle_->get_stream()}; - + if (edge_weight) { auto global_d_cugraph_wgts = cugraph::test::device_gatherv( - *handle_, raft::device_span((*d_cugraph_wgts).data(), (*d_cugraph_wgts).size())); - + *handle_, + raft::device_span((*d_cugraph_wgts).data(), (*d_cugraph_wgts).size())); + std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts, d_sorted_cugraph_wgts) = - cugraph::test::sort_by_key( - *handle_, global_d_cugraph_srcs, global_d_cugraph_dsts, global_d_cugraph_wgts); + cugraph::test::sort_by_key( + *handle_, global_d_cugraph_srcs, global_d_cugraph_dsts, global_d_cugraph_wgts); } else { std::tie(d_sorted_cugraph_srcs, d_sorted_cugraph_dsts) = - cugraph::test::sort(*handle_, global_d_cugraph_srcs, global_d_cugraph_dsts); + cugraph::test::sort(*handle_, global_d_cugraph_srcs, global_d_cugraph_dsts); } // 3-1. Convert to SG graph @@ -171,9 +168,9 @@ class Tests_MGKTruss std::make_optional>((*mg_renumber_map).data(), (*mg_renumber_map).size()), false); - - auto sg_edge_weight_view = sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; if (handle_->get_comms().get_rank() == int{0}) { auto sg_graph_view = sg_graph.view(); @@ -181,58 +178,45 @@ class Tests_MGKTruss // 3-2. Run SG KTruss auto [ref_d_cugraph_srcs, ref_d_cugraph_dsts, ref_d_cugraph_wgts] = cugraph::k_truss( - *handle_, - sg_graph_view, - sg_edge_weight_view, - k_truss_usecase.k_, - false); + *handle_, sg_graph_view, sg_edge_weight_view, k_truss_usecase.k_, false); rmm::device_uvector d_sorted_ref_cugraph_srcs{0, handle_->get_stream()}; rmm::device_uvector d_sorted_ref_cugraph_dsts{0, handle_->get_stream()}; rmm::device_uvector d_sorted_ref_cugraph_wgts{0, handle_->get_stream()}; - - if (edge_weight) { - std::tie(d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts, d_sorted_ref_cugraph_wgts) = - cugraph::test::sort_by_key( - *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts, *ref_d_cugraph_wgts); + + if (edge_weight) { + std::tie( + d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts, d_sorted_ref_cugraph_wgts) = + cugraph::test::sort_by_key( + *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts, *ref_d_cugraph_wgts); } else { std::tie(d_sorted_ref_cugraph_srcs, d_sorted_ref_cugraph_dsts) = - cugraph::test::sort( - *handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts); + cugraph::test::sort(*handle_, ref_d_cugraph_srcs, ref_d_cugraph_dsts); } // 3-3. Compare - auto h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_cugraph_srcs); - auto h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_cugraph_dsts); - auto ref_h_cugraph_srcs = - cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_srcs); - auto ref_h_cugraph_dsts = - cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_dsts); - - ASSERT_TRUE(std::equal(h_cugraph_srcs.begin(), - h_cugraph_srcs.end(), - ref_h_cugraph_srcs.begin())); - - ASSERT_TRUE(std::equal(h_cugraph_dsts.begin(), - h_cugraph_dsts.end(), - ref_h_cugraph_dsts.begin())); - - if (edge_weight) { - auto ref_h_cugraph_wgts = - cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_wgts); - - auto h_cugraph_wgts = - cugraph::test::to_host(*handle_, d_sorted_cugraph_wgts); - - ASSERT_TRUE(std::equal(h_cugraph_wgts.begin(), - h_cugraph_wgts.end(), - ref_h_cugraph_wgts.begin())); + auto h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_cugraph_srcs); + auto h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_cugraph_dsts); + auto ref_h_cugraph_srcs = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_srcs); + auto ref_h_cugraph_dsts = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_dsts); + + ASSERT_TRUE( + std::equal(h_cugraph_srcs.begin(), h_cugraph_srcs.end(), ref_h_cugraph_srcs.begin())); + + ASSERT_TRUE( + std::equal(h_cugraph_dsts.begin(), h_cugraph_dsts.end(), ref_h_cugraph_dsts.begin())); + + if (edge_weight) { + auto ref_h_cugraph_wgts = cugraph::test::to_host(*handle_, d_sorted_ref_cugraph_wgts); + + auto h_cugraph_wgts = cugraph::test::to_host(*handle_, d_sorted_cugraph_wgts); + + ASSERT_TRUE( + std::equal(h_cugraph_wgts.begin(), h_cugraph_wgts.end(), ref_h_cugraph_wgts.begin())); } - } } - } private: @@ -243,7 +227,7 @@ template std::unique_ptr Tests_MGKTruss::handle_ = nullptr; using Tests_MGKTruss_File = Tests_MGKTruss; -//using Tests_MGKTruss_Rmat = Tests_MGKTruss; +using Tests_MGKTruss_Rmat = Tests_MGKTruss; TEST_P(Tests_MGKTruss_File, CheckInt32Int32) { @@ -277,12 +261,9 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGKTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{4, false, false, true}, - KTruss_Usecase{5, true, false, true} - ), + ::testing::Values(KTruss_Usecase{4, false, false, true}, KTruss_Usecase{5, true, false, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/dolphins.mtx") - ))); + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); INSTANTIATE_TEST_SUITE_P( rmat_small_tests, @@ -291,7 +272,6 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(KTruss_Usecase{8, false, false, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); - INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with --gtest_filter to select only the rmat_benchmark_test with a specific From a34cef3b9560c14554db201e5f21f1908fe706e4 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 22 Jul 2024 13:24:30 -0700 Subject: [PATCH 78/93] enable edge masking for k-core and k-truss and add tests --- cpp/src/community/k_truss_impl.cuh | 12 +++++------- cpp/src/cores/k_core_impl.cuh | 1 - cpp/tests/community/mg_k_truss_test.cpp | 2 +- 3 files changed, 6 insertions(+), 9 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index dabc807e056..9549efd2354 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -575,14 +575,13 @@ k_truss(raft::handle_t const& handle, bool do_expensive_check) { // 1. Check input arguments. - - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - + CUGRAPH_EXPECTS(graph_view.is_symmetric(), "Invalid input arguments: K-truss currently supports undirected graphs only."); CUGRAPH_EXPECTS(!graph_view.is_multigraph(), "Invalid input arguments: K-truss currently does not support multi-graphs."); + if (do_expensive_check) { // nothing to do } @@ -651,16 +650,15 @@ k_truss(raft::handle_t const& handle, std::make_optional(core_number_span)); if constexpr (multi_gpu) { - std::tie(srcs, dsts, std::ignore, std::ignore, std::ignore, std::ignore) = + std::tie(srcs, dsts, wgts, std::ignore, std::ignore, std::ignore) = detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, std::move(srcs), std::move(dsts), std::nullopt, std::nullopt, std::nullopt); + handle, std::move(srcs), std::move(dsts), std::move(wgts), std::nullopt, std::nullopt); } std::optional> tmp_renumber_map{std::nullopt}; - std::tie(*modified_graph, edge_weight, std::ignore, std::ignore, tmp_renumber_map) = create_graph_from_edgelist( handle, @@ -777,7 +775,7 @@ k_truss(raft::handle_t const& handle, edge_triangle_count(handle, cur_graph_view); cugraph::edge_property_t edge_mask(handle, cur_graph_view); - cugraph::fill_edge_property(handle, cur_graph_view, true, edge_mask); + cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); while (true) { // extract the edges that have counts less than k - 2. Those edges will be unrolled diff --git a/cpp/src/cores/k_core_impl.cuh b/cpp/src/cores/k_core_impl.cuh index 06402cc3382..b3c832d1539 100644 --- a/cpp/src/cores/k_core_impl.cuh +++ b/cpp/src/cores/k_core_impl.cuh @@ -37,7 +37,6 @@ k_core(raft::handle_t const& handle, std::optional> core_numbers, bool do_expensive_check) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); rmm::device_uvector computed_core_numbers(0, handle.get_stream()); diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 2bd7219e8ee..4e3c3b82465 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -261,7 +261,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGKTruss_File, ::testing::Combine( // enable correctness checks - ::testing::Values(KTruss_Usecase{4, false, false, true}, KTruss_Usecase{5, true, false, true}), + ::testing::Values(KTruss_Usecase{4, false, true, true}, KTruss_Usecase{5, true, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); From b1aeab479ccacbcb6b65570dfb5e1ac83b436097 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 22 Jul 2024 14:43:49 -0700 Subject: [PATCH 79/93] enable int64 type for 'd_values' in 'shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning' --- cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu | 7 +++++++ cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu index 0c91eb546d6..db7be5a3031 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu @@ -40,6 +40,13 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& d_values, std::vector const& vertex_partition_range_lasts); +template std::tuple, rmm::device_uvector> +shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values, + std::vector const& vertex_partition_range_lasts); + template rmm::device_uvector shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, rmm::device_uvector&& d_vertices); diff --git a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu index 5abce7c0783..7d968006bc7 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu @@ -35,6 +35,13 @@ shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& d_values, std::vector const& vertex_partition_range_lasts); +template std::tuple, rmm::device_uvector> +shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values, + std::vector const& vertex_partition_range_lasts); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, From 533f374b6d6481b4b675543d608ae337e1ea05c8 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 22 Jul 2024 14:44:45 -0700 Subject: [PATCH 80/93] fix type bug --- cpp/src/community/k_truss_impl.cuh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 9549efd2354..a00a2b617f4 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -1627,7 +1627,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ + extract_q_idx_closing{ major_weak_edgelist_dsts_tags_first, major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), raft::device_span(major_weak_edgelist_srcs.data(), @@ -1787,7 +1787,7 @@ k_truss(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ + extract_q_idx_closing{ weak_edgelist_dsts_tags_first, weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), From ae17245878cb5b848ce4e895f251a873c9864e6e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 24 Jul 2024 10:29:38 -0700 Subject: [PATCH 81/93] update benchmark tests and simplify initial k-truss implementation --- .../community/edge_triangle_count_impl.cuh | 2 +- cpp/src/community/k_truss_impl.cuh | 1312 +---------------- cpp/tests/community/mg_k_truss_test.cpp | 44 +- 3 files changed, 47 insertions(+), 1311 deletions(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index 225687c4cf0..af2747843bc 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -136,7 +136,7 @@ edge_property_t, edge_t> edge_t auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 13); auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index a00a2b617f4..4f4ed868357 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -40,6 +40,9 @@ #include #include #include +#include +using namespace std::chrono; + namespace cugraph { @@ -626,7 +629,7 @@ k_truss(raft::handle_t const& handle, } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - + { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -687,6 +690,7 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { + auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -760,6 +764,7 @@ k_truss(raft::handle_t const& handle, *vertex_partition_range_lasts); } renumber_map = std::move(tmp_renumber_map); + } // 4. Compute triangle count using nbr_intersection and unroll weak edges @@ -771,1292 +776,61 @@ k_truss(raft::handle_t const& handle, edge_weight ? std::make_optional((*edge_weight).view()) : std::optional>{std::nullopt}; - auto edge_triangle_counts = - edge_triangle_count(handle, cur_graph_view); - cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); + std::chrono::seconds s (0); // 1 second + std::chrono::duration triangle_count_ms = duration_cast (s); + std::chrono::duration ext_w_edges_ms = duration_cast (s); + std::chrono::duration edge_mask_ms = duration_cast (s); + while (true) { - // extract the edges that have counts less than k - 2. Those edges will be unrolled - auto [weak_edgelist_srcs, weak_edgelist_dsts] = - extract_transform_e(handle, - cur_graph_view, - edge_src_dummy_property_t{}.view(), - edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - // FIXME: Replace by lambda function - extract_weak_edges{k}); - - auto num_weak_edges = weak_edgelist_srcs.size(); - if constexpr (multi_gpu) { - num_weak_edges = host_scalar_allreduce( - handle.get_comms(), num_weak_edges, raft::comms::op_t::SUM, handle.get_stream()); - } - if (num_weak_edges == 0) { break; } - auto weak_edgelist_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_srcs.size()); - - // Find intersection edges - size_t prev_chunk_size = 0; - size_t num_remaining_weak_edges = weak_edgelist_srcs.size(); - size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); - - auto num_chunks = - raft::div_rounding_up_safe(weak_edgelist_srcs.size(), edges_to_intersect_per_iteration); - - if constexpr (multi_gpu) { - num_chunks = host_scalar_allreduce( - handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); - } - - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); - auto [vertex_pair_buffer_p_q, - vertex_pair_buffer_p_r_edge_p_q, - vertex_pair_buffer_q_r_edge_p_q] = - accumulate_triangles_p_q_or_q_r< - vertex_t, - edge_t, - weight_t, - decltype(allocate_dataframe_buffer>( - size_t{0}, handle.get_stream())), - multi_gpu>( - handle, - cur_graph_view, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - prev_chunk_size, - chunk_size, - do_expensive_check); - - rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_p_q_dsts(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_q_r_edge_p_q_dsts(0, handle.get_stream()); - - // Shuffle edges - if constexpr (multi_gpu) { - // FIXME: Check whether we need to shuffle (p, q) edges - std::tie(vertex_pair_buffer_p_r_edge_p_q_srcs, - vertex_pair_buffer_p_r_edge_p_q_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - std::tie(vertex_pair_buffer_q_r_edge_p_q_srcs, - vertex_pair_buffer_q_r_edge_p_q_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r_edge_p_q)), - std::move(std::get<1>(vertex_pair_buffer_q_r_edge_p_q)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - } - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q).data(), - std::get<0>(vertex_pair_buffer_p_q).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q).data(), - std::get<1>(vertex_pair_buffer_p_q).size())); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_srcs.data(), - vertex_pair_buffer_p_r_edge_p_q_srcs.size()) - : raft::device_span( - std::get<0>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_p_q) - .size()), // FIXME: Make sure multi_gpu is properly handles - multi_gpu ? raft::device_span(vertex_pair_buffer_p_r_edge_p_q_dsts.data(), - vertex_pair_buffer_p_r_edge_p_q_dsts.size()) - : raft::device_span( - std::get<1>(vertex_pair_buffer_p_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_p_q) - .size()) // FIXME: Make sure multi_gpu is properly handles - ); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - multi_gpu - ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_srcs.data(), - vertex_pair_buffer_q_r_edge_p_q_srcs.size()) - : raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_q).size()), - multi_gpu - ? raft::device_span(vertex_pair_buffer_q_r_edge_p_q_dsts.data(), - vertex_pair_buffer_q_r_edge_p_q_dsts.size()) - : raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_q).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_q).size())); - - prev_chunk_size += chunk_size; - num_remaining_weak_edges -= chunk_size; - } - - // Iterate over unique weak edges' endpoints that appear as either q or r - rmm::device_uvector unique_weak_edgelist_srcs(weak_edgelist_srcs.size(), - handle.get_stream()); - rmm::device_uvector unique_weak_edgelist_dsts(weak_edgelist_dsts.size(), - handle.get_stream()); - - // Get unique srcs and dsts - thrust::copy(handle.get_thrust_policy(), - weak_edgelist_srcs.begin(), - weak_edgelist_srcs.end(), - unique_weak_edgelist_srcs.begin()); - - thrust::copy(handle.get_thrust_policy(), - weak_edgelist_dsts.begin(), - weak_edgelist_dsts.end(), - unique_weak_edgelist_dsts.begin()); - - thrust::sort(handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs - .end()); // No need to sort the 'dst' since they are already sorted - - thrust::sort(handle.get_thrust_policy(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end()); - - auto unique_srcs_end = thrust::unique(handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs.end()); - - auto unique_dsts_end = thrust::unique(handle.get_thrust_policy(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end()); - - auto num_unique_weak_edgelist_srcs = - thrust::distance(unique_weak_edgelist_srcs.begin(), unique_srcs_end); - auto num_unique_weak_edgelist_dsts = - thrust::distance(unique_weak_edgelist_dsts.begin(), unique_dsts_end); - unique_weak_edgelist_srcs.resize(num_unique_weak_edgelist_srcs, handle.get_stream()); - unique_weak_edgelist_dsts.resize(num_unique_weak_edgelist_dsts, handle.get_stream()); - - // Create a vertex set composed of edge endpoints that are either in the q or r set - rmm::device_uvector vertex_q_r_set( - num_unique_weak_edgelist_srcs + num_unique_weak_edgelist_dsts, handle.get_stream()); - - auto vertex_q_r_end = thrust::set_union(handle.get_thrust_policy(), - unique_weak_edgelist_srcs.begin(), - unique_weak_edgelist_srcs.end(), - unique_weak_edgelist_dsts.begin(), - unique_weak_edgelist_dsts.end(), - vertex_q_r_set.begin()); - - vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), vertex_q_r_end), - handle.get_stream()); - - thrust::sort(handle.get_thrust_policy(), vertex_q_r_set.begin(), vertex_q_r_set.end()); - - auto weak_unique_v_end = - thrust::unique(handle.get_thrust_policy(), vertex_q_r_set.begin(), vertex_q_r_set.end()); - - vertex_q_r_set.resize(thrust::distance(vertex_q_r_set.begin(), weak_unique_v_end), - handle.get_stream()); - - if constexpr (multi_gpu) { - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - // Perform all-to-all in chunks across minor comm - auto major_vertex_q_r_set = cugraph::detail::device_allgatherv( - handle, - handle.get_comms(), - raft::device_span(vertex_q_r_set.data(), vertex_q_r_set.size())); - - thrust::sort( - handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); - - weak_unique_v_end = thrust::unique( - handle.get_thrust_policy(), major_vertex_q_r_set.begin(), major_vertex_q_r_set.end()); - - major_vertex_q_r_set.resize( - thrust::distance(major_vertex_q_r_set.begin(), weak_unique_v_end), handle.get_stream()); - - vertex_q_r_set.resize(major_vertex_q_r_set.size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - major_vertex_q_r_set.begin(), - major_vertex_q_r_set.end(), - vertex_q_r_set.begin()); - } - - weak_edgelist_first = thrust::make_zip_iterator( - weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); // FIXME: is this necessary ? - - auto [srcs_in_q_r_set, dsts_in_q_r_set] = - extract_transform_e(handle, - cur_graph_view, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - // FIXME: Lambda function instead of functor - extract_edges_to_q_r{raft::device_span( - vertex_q_r_set.data(), vertex_q_r_set.size())}); - - if constexpr (multi_gpu) { - std::tie( - dsts_in_q_r_set, srcs_in_q_r_set, std::ignore, std::ignore, std::ignore, std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(dsts_in_q_r_set), - std::move(srcs_in_q_r_set), - std::nullopt, - std::nullopt, - std::nullopt); - } - - std::optional> graph_q_r{std::nullopt}; - std::optional> renumber_map_q_r{std::nullopt}; - std::tie(*graph_q_r, std::ignore, std::ignore, std::ignore, renumber_map_q_r) = - create_graph_from_edgelist( - handle, - std::nullopt, - std::move(dsts_in_q_r_set), - std::move(srcs_in_q_r_set), - std::nullopt, - std::nullopt, - std::nullopt, - cugraph::graph_properties_t{true, graph_view.is_multigraph()}, - true); - - auto csc_q_r_graph_view = (*graph_q_r).view(); - - rmm::device_uvector renumbered_weak_edgelist_srcs(weak_edgelist_srcs.size(), - handle.get_stream()); - rmm::device_uvector renumbered_weak_edgelist_dsts(weak_edgelist_srcs.size(), - handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()), - thrust::make_zip_iterator(weak_edgelist_srcs.end(), weak_edgelist_dsts.end()), - thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), - renumbered_weak_edgelist_dsts.begin())); - - if constexpr (multi_gpu) { - std::tie(renumbered_weak_edgelist_srcs, - renumbered_weak_edgelist_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(renumbered_weak_edgelist_srcs), - std::move(renumbered_weak_edgelist_dsts), - std::nullopt, - std::nullopt, - std::nullopt); - } - - renumber_ext_vertices( - handle, - renumbered_weak_edgelist_srcs.data(), - renumbered_weak_edgelist_srcs.size(), - (*renumber_map_q_r).data(), - csc_q_r_graph_view.local_vertex_partition_range_first(), - csc_q_r_graph_view.local_vertex_partition_range_last(), - true); - - renumber_ext_vertices( - handle, - renumbered_weak_edgelist_dsts.data(), - renumbered_weak_edgelist_dsts.size(), - (*renumber_map_q_r).data(), - csc_q_r_graph_view.local_vertex_partition_range_first(), - csc_q_r_graph_view.local_vertex_partition_range_last(), - true); - - auto weak_edgelist_size = renumbered_weak_edgelist_srcs.size(); - weak_edgelist_first = thrust::make_zip_iterator(renumbered_weak_edgelist_srcs.begin(), - renumbered_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + renumbered_weak_edgelist_srcs.size()); - - prev_chunk_size = 0; - num_remaining_weak_edges = weak_edgelist_size; - - if constexpr (multi_gpu) { - num_chunks = host_scalar_allreduce( - handle.get_comms(), num_chunks, raft::comms::op_t::SUM, handle.get_stream()); - } - - for (size_t i = 0; i < num_chunks; ++i) { - auto chunk_size = std::min(edges_to_intersect_per_iteration, num_remaining_weak_edges); - // Find intersection of weak edges - auto [vertex_pair_buffer_q_r, - vertex_pair_buffer_p_q_edge_q_r, - vertex_pair_buffer_p_r_edge_q_r] = - accumulate_triangles_p_q_or_q_r< - vertex_t, - edge_t, - weight_t, - decltype(allocate_dataframe_buffer>( - size_t{0}, handle.get_stream())), - multi_gpu>(handle, - csc_q_r_graph_view, - raft::device_span(renumbered_weak_edgelist_srcs.data(), - renumbered_weak_edgelist_srcs.size()), - raft::device_span(renumbered_weak_edgelist_dsts.data(), - renumbered_weak_edgelist_dsts.size()), - prev_chunk_size, - chunk_size, - do_expensive_check); - - // Unrenumber - auto vertex_partition_range_lasts = std::make_optional>( - csc_q_r_graph_view.vertex_partition_range_lasts()); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices( - handle, - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<0>(vertex_pair_buffer_q_r).data(), - std::get<0>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - unrenumber_int_vertices(handle, - std::get<1>(vertex_pair_buffer_q_r).data(), - std::get<1>(vertex_pair_buffer_q_r).size(), - (*renumber_map_q_r).data(), - *vertex_partition_range_lasts, - true); - - if constexpr (multi_gpu) { - // Get global weak edges - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); // FIXME: for debugging - - // Get global weak_edgelist - // FIXME: This operation is too expensive (memory) hence shuffle the weak edges instead to - // the appropriate GPU, check for existance as being part of the weak edge list and - // shuffle the result back. The operation below is only meant for validation purposes and - // should be remove once the statement is validated. - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_srcs.data(), - weak_edgelist_srcs.size())); - - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_dsts.data(), - weak_edgelist_dsts.size())); - - // Sort the weak edges if they are not already - auto chunk_global_weak_edgelist_first = thrust::make_zip_iterator( - global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); - thrust::sort(handle.get_thrust_policy(), - chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - - auto num_edges_not_overcomp = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), - true, - multi_gpu, - true // FIXME: Currently using global weak edges for validation purposes - >( - handle, - size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the - // original one so overcompensatiing edges can be - // removed - get_dataframe_buffer_begin( - vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the - // original one so overcompensatiing edges can be - // removed - raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - - // Resize initial (q, r) edges - // Note: Once chunking is implemented, reconstruct the (q, r) edges only outside - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the - // triangle of the chunk's 'for loop' - resize_dataframe_buffer( - vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); - - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the - // triangle Reconstruct (q, r) edges that didn't already have their count updated - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no - // overcompensation ************************************ - get_dataframe_buffer_end(vertex_pair_buffer_q_r), - [vertex_pair_buffer_p_q_edge_q_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - vertex_pair_buffer_p_r_edge_q_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), - thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); - }); - - } else { - auto num_edges_not_overcomp = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_q_r)), - true, - multi_gpu, - true>( - handle, - size_dataframe_buffer(vertex_pair_buffer_p_q_edge_q_r), - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_q_r), // FIXME: cannot be a copy, needs to be the - // original one so overcompensatiing edges can be - // removed - get_dataframe_buffer_begin( - vertex_pair_buffer_p_r_edge_q_r), // FIXME: cannot be a copy, needs to be the - // original one so overcompensatiing edges can be - // removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span( - weak_edgelist_srcs.data(), - weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span( - weak_edgelist_dsts.data(), - weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts() // Not needed for SG - ); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_p_r_edge_q_r, num_edges_not_overcomp, handle.get_stream()); - - // resize initial (q, r) edges - resize_dataframe_buffer( - vertex_pair_buffer_q_r, num_edges_not_overcomp, handle.get_stream()); - - // Reconstruct (q, r) edges that didn't already have their count updated - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the - // triangle - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r), // FIXME: Properly reconstruct (p, r) even when there is no - // overcompensation ************************************ - get_dataframe_buffer_end(vertex_pair_buffer_q_r), - [vertex_pair_buffer_p_q_edge_q_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_q_r), - vertex_pair_buffer_p_r_edge_q_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_q_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_q_r[i]), - thrust::get<0>(vertex_pair_buffer_p_r_edge_q_r[i])); - }); - } - - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, handle.get_stream()); - - if constexpr (multi_gpu) { - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_q_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_q_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_q_r_srcs, - vertex_pair_buffer_q_r_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_q_r)), - std::move(std::get<1>(vertex_pair_buffer_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_q_r_srcs.data(), - vertex_pair_buffer_q_r_srcs.size()), - raft::device_span(vertex_pair_buffer_q_r_dsts.data(), - vertex_pair_buffer_q_r_dsts.size())); - - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_srcs(0, - handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_q_r_dsts(0, - handle.get_stream()); - - std::tie(vertex_pair_buffer_p_q_edge_q_r_dsts, - vertex_pair_buffer_p_q_edge_q_r_srcs, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_q_edge_q_r)), - std::move(std::get<0>(vertex_pair_buffer_p_q_edge_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_q_edge_q_r_dsts.data(), - vertex_pair_buffer_p_q_edge_q_r_dsts.size()), - raft::device_span(vertex_pair_buffer_p_q_edge_q_r_srcs.data(), - vertex_pair_buffer_p_q_edge_q_r_srcs.size())); - - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_srcs(0, - handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_edge_q_r_dsts(0, - handle.get_stream()); - - std::tie(vertex_pair_buffer_p_r_edge_q_r_dsts, - vertex_pair_buffer_p_r_edge_q_r_srcs, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<1>(vertex_pair_buffer_p_r_edge_q_r)), - std::move(std::get<0>(vertex_pair_buffer_p_r_edge_q_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_r_edge_q_r_dsts.data(), - vertex_pair_buffer_p_r_edge_q_r_dsts.size()), - raft::device_span(vertex_pair_buffer_p_r_edge_q_r_srcs.data(), - vertex_pair_buffer_p_r_edge_q_r_srcs.size())); - - } else { - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r).data(), - std::get<0>(vertex_pair_buffer_q_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r).data(), - std::get<1>(vertex_pair_buffer_q_r).size())); - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_q_edge_q_r).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_q_edge_q_r).size())); - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<1>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<0>(vertex_pair_buffer_p_r_edge_q_r).size()), - raft::device_span(std::get<0>(vertex_pair_buffer_p_r_edge_q_r).data(), - std::get<1>(vertex_pair_buffer_p_r_edge_q_r).size())); - } - - prev_chunk_size += chunk_size; - num_remaining_weak_edges -= chunk_size; - } - - weak_edgelist_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - - // Unrolling p, r edges - // create pair weak_src, weak_edge_idx (unique) - // create a dataframe buffer of size weak_edge_size - // FIXME: No need to create a dataframe buffer. We can just zip weak_edgelist_srcs - // with a vector counting from 0 .. - - auto vertex_pair_buffer_p_tag = allocate_dataframe_buffer>( - weak_edgelist_srcs.size(), handle.get_stream()); - - if constexpr (multi_gpu) { - std::vector h_num_weak_edges = {vertex_t{weak_edgelist_srcs.size()}}; - rmm::device_uvector num_weak_edges(1, handle.get_stream()); - - raft::update_device(num_weak_edges.data(), - h_num_weak_edges.data(), - h_num_weak_edges.size(), - handle.get_stream()); - - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - // Get global weak_edgelist - auto global_num_weak_edges = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(num_weak_edges.data(), num_weak_edges.size())); - - rmm::device_uvector prefix_sum_global_num_weak_edges(global_num_weak_edges.size(), - handle.get_stream()); - thrust::inclusive_scan(handle.get_thrust_policy(), - global_num_weak_edges.begin(), - global_num_weak_edges.end(), - prefix_sum_global_num_weak_edges.begin()); - - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [rank = comm_rank, - num_weak_edges = prefix_sum_global_num_weak_edges.begin(), - p = weak_edgelist_srcs.begin()] __device__(auto idx) { - if (rank != 0) { - auto idx_tag = idx + (num_weak_edges[rank - 1]); - return thrust::make_tuple(p[idx], idx_tag); - } - - return thrust::make_tuple(p[idx], idx); - }); - - } else { - thrust::tabulate(handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_tag), - get_dataframe_buffer_end(vertex_pair_buffer_p_tag), - [p = weak_edgelist_srcs.begin()] __device__(auto idx) { - return thrust::make_tuple(p[idx], idx); - }); - } - - vertex_frontier_t vertex_frontier(handle, 1); - rmm::device_uvector tag_cpy(0, handle.get_stream()); - - if constexpr (multi_gpu) { - tag_cpy.resize(std::get<1>(vertex_pair_buffer_p_tag).size(), handle.get_stream()); - // Need a copy before shuffling the original tag - thrust::copy(handle.get_thrust_policy(), - std::get<1>(vertex_pair_buffer_p_tag).begin(), - std::get<1>(vertex_pair_buffer_p_tag).end(), - tag_cpy.begin()); - // Shuffle vertices - auto [p_vrtx, p_tag] = - detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_tag)), - std::move(std::get<1>(vertex_pair_buffer_p_tag)), - cur_graph_view.vertex_partition_range_lasts()); - - vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(p_vrtx.begin(), p_tag.begin()), - thrust::make_zip_iterator(p_vrtx.end(), p_tag.end())); - } else { - vertex_frontier.bucket(0).insert( - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).begin(), - std::get<1>(vertex_pair_buffer_p_tag).begin()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_tag).end(), - std::get<1>(vertex_pair_buffer_p_tag).end())); - } - - rmm::device_uvector q(0, handle.get_stream()); - rmm::device_uvector idx(0, handle.get_stream()); - - std::tie(q, idx) = cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx{}, - true); - - vertex_frontier.bucket(0).clear(); - - if constexpr (multi_gpu) { - // Shuffle vertices - std::tie(q, idx) = - detail::shuffle_int_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( - handle, std::move(q), std::move(idx), cur_graph_view.vertex_partition_range_lasts()); - } - - vertex_frontier.bucket(0).insert(thrust::make_zip_iterator(q.begin(), idx.begin()), - thrust::make_zip_iterator(q.end(), idx.end())); - - auto vertex_pair_buffer_p_r = - allocate_dataframe_buffer>(0, handle.get_stream()); - - auto vertex_pair_buffer_p_q_edge_p_r = - allocate_dataframe_buffer>(0, handle.get_stream()); - - auto vertex_pair_buffer_q_r_edge_p_r = - allocate_dataframe_buffer>(0, handle.get_stream()); - - // Get chunk global weak edges - if constexpr (multi_gpu) { - // Get major weak edges - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto major_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, - major_comm, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto major_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, - major_comm, - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - auto major_weak_edgelist_tags = cugraph::detail::device_allgatherv( - handle, major_comm, raft::device_span(tag_cpy.data(), tag_cpy.size())); - - auto major_weak_edgelist_first = thrust::make_zip_iterator( - major_weak_edgelist_srcs.begin(), - major_weak_edgelist_dsts.begin()); // FIXME: remove as it is unused - - auto major_weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( - major_weak_edgelist_dsts.begin(), major_weak_edgelist_tags.begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), - major_weak_edgelist_srcs.begin()); - - // FIXME: 'idx_closing' no longer needed - remove it - auto [q_closing, r_closing, p_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ - major_weak_edgelist_dsts_tags_first, - major_weak_edgelist_dsts_tags_first + major_weak_edgelist_dsts.size(), - raft::device_span(major_weak_edgelist_srcs.data(), - major_weak_edgelist_srcs.size()), - }, - true); - - resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), r_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r).begin(), - std::get<1>(vertex_pair_buffer_p_r).begin())); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, q_closing.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), q_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), - std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin())); - - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, q_closing.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(q_closing.end(), r_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin())); - - auto& comm = handle.get_comms(); // FIXME: Only using global comm for testing purposes - // Get global weak_edgelist - // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_srcs = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size())); - // FIXME: Perform all-to-all in chunks - auto global_weak_edgelist_dsts = cugraph::detail::device_allgatherv( - handle, - comm, - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size())); - - // Sort the weak edges if they are not already - auto chunk_global_weak_edgelist_first = thrust::make_zip_iterator( - global_weak_edgelist_srcs.begin(), global_weak_edgelist_dsts.begin()); - - thrust::sort(handle.get_thrust_policy(), - chunk_global_weak_edgelist_first, - chunk_global_weak_edgelist_first + global_weak_edgelist_srcs.size()); - - auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), - false, - multi_gpu, - true // FIXME: Currently using global weak edges for validation purposes - >(handle, - q_closing.size(), - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_p_r), // cannot be a copy, needs to be the original one - // so overcompensatiing edges can be removed - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r_edge_p_r), // cannot be a copy, needs to be the original one - // so overcompensatiing edges can be removed - raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - // FIXME: No need to resize the dataframes buffer now. - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - // FIXME: No need to resize the dataframes buffer now. - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), - false, - multi_gpu, - true // FIXME: Currently using global weak edges for validation purposes - >( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - raft::device_span(global_weak_edgelist_srcs.data(), - global_weak_edgelist_srcs.size()), - raft::device_span(global_weak_edgelist_dsts.data(), - global_weak_edgelist_dsts.size()), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - // Reconstruct (p, r) edges that didn't already have their count updated - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the - // triangle - resize_dataframe_buffer( - vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [vertex_pair_buffer_p_q_edge_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - vertex_pair_buffer_q_r_edge_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), - thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); - }); - - } else { - // FIXME: refactor SG to use r_closing - - auto weak_edgelist_dsts_tags_first = thrust::make_zip_iterator( - weak_edgelist_dsts.begin(), std::get<1>(vertex_pair_buffer_p_tag).begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - weak_edgelist_dsts_tags_first, - weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), - // major_weak_edgelist_srcs.begin() - weak_edgelist_srcs.begin()); - - auto [q_closing, r_closing, p_closing, idx_closing] = - cugraph::extract_transform_v_frontier_outgoing_e( - handle, - cur_graph_view, - vertex_frontier.bucket(0), - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - cugraph::edge_dummy_property_t{}.view(), - extract_q_idx_closing{ - weak_edgelist_dsts_tags_first, - weak_edgelist_dsts_tags_first + weak_edgelist_dsts.size(), - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - }, - do_expensive_check); - - // FIXME: Move the 3 copies to a function as it is also performed for MG - // extract pair (p, r) - resize_dataframe_buffer(vertex_pair_buffer_p_r, q_closing.size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), r_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_r).begin(), - std::get<1>(vertex_pair_buffer_p_r).begin())); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, q_closing.size(), handle.get_stream()); - - // extract pair (p, q) - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(p_closing.begin(), q_closing.begin()), - thrust::make_zip_iterator(p_closing.end(), q_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).begin(), - std::get<1>(vertex_pair_buffer_p_q_edge_p_r).begin())); - - // extract pair (q, r) - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, q_closing.size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - thrust::make_zip_iterator(q_closing.begin(), r_closing.begin()), - thrust::make_zip_iterator(q_closing.end(), r_closing.end()), - thrust::make_zip_iterator(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).begin(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_r).begin())); - - // weak_edgelist_first - thrust::sort(handle.get_thrust_policy(), - weak_edgelist_first, - weak_edgelist_first + weak_edgelist_dsts.size()); - - auto num_edges_not_overcomp_p_q = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), - false, - multi_gpu, - true>( - handle, - q_closing.size(), - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span( - weak_edgelist_srcs.data(), - weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span( - weak_edgelist_dsts.data(), - weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_p_q, handle.get_stream()); - - auto num_edges_not_overcomp_q_r = remove_overcompensating_edges< - vertex_t, - edge_t, - decltype(get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r)), - false, - multi_gpu, - true>( - handle, - num_edges_not_overcomp_p_q, - get_dataframe_buffer_begin( - vertex_pair_buffer_q_r_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - get_dataframe_buffer_begin( - vertex_pair_buffer_p_q_edge_p_r), // FIXME: cannot be a copy, needs to be the original - // one so overcompensatiing edges can be removed - raft::device_span(weak_edgelist_srcs.data(), weak_edgelist_srcs.size()), - raft::device_span(weak_edgelist_dsts.data(), weak_edgelist_dsts.size()), - raft::device_span( - weak_edgelist_srcs.data(), - weak_edgelist_srcs.size()), // FIXME: Only for MG validation purposes - raft::device_span( - weak_edgelist_dsts.data(), - weak_edgelist_dsts.size()), // FIXME: Only for MG validation purposes - cur_graph_view.number_of_local_edge_partitions(), - cur_graph_view.vertex_partition_range_lasts()); - - resize_dataframe_buffer( - vertex_pair_buffer_p_q_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - resize_dataframe_buffer( - vertex_pair_buffer_q_r_edge_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - - // Reconstruct (p, r) edges that didn't already have their count updated. - // FIXME: No need to reconstruct the third array because we can zip all 3 edges of the - // triangle - resize_dataframe_buffer( - vertex_pair_buffer_p_r, num_edges_not_overcomp_q_r, handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r), - get_dataframe_buffer_end(vertex_pair_buffer_p_r), - [vertex_pair_buffer_p_q_edge_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_p_q_edge_p_r), - vertex_pair_buffer_q_r_edge_p_r = - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_r)] __device__(auto i) { - return thrust::make_tuple(thrust::get<0>(vertex_pair_buffer_p_q_edge_p_r[i]), - thrust::get<1>(vertex_pair_buffer_q_r_edge_p_r[i])); - }); - } - - if constexpr (multi_gpu) { - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_p_r_srcs, - vertex_pair_buffer_p_r_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>(vertex_pair_buffer_p_r)), - std::move(std::get<1>(vertex_pair_buffer_p_r)), - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_r_srcs.data(), - vertex_pair_buffer_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_p_r_dsts.data(), - vertex_pair_buffer_p_r_dsts.size())); - - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_p_q_edge_p_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_p_q_edge_p_r_srcs, - vertex_pair_buffer_p_q_edge_p_r_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>( - vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to - // vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>( - vertex_pair_buffer_p_q_edge_p_r)), // FIXME: rename to - // vertex_pair_buffer_p_q_edge_p_r for consistency - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_p_q_edge_p_r_srcs.data(), - vertex_pair_buffer_p_q_edge_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_p_q_edge_p_r_dsts.data(), - vertex_pair_buffer_p_q_edge_p_r_dsts.size())); - - // Shuffle before updating count - rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_srcs(0, handle.get_stream()); - rmm::device_uvector vertex_pair_buffer_q_r_edge_p_r_dsts(0, handle.get_stream()); - - std::tie(vertex_pair_buffer_q_r_edge_p_r_srcs, - vertex_pair_buffer_q_r_edge_p_r_dsts, - std::ignore, - std::ignore, - std::ignore, - std::ignore) = - detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( - handle, - std::move(std::get<0>( - vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to - // vertex_pair_buffer_p_q_edge_p_r for consistency - std::move(std::get<1>( - vertex_pair_buffer_q_r_edge_p_r)), // FIXME: rename to - // vertex_pair_buffer_p_q_edge_p_r for consistency - std::nullopt, - std::nullopt, - std::nullopt, - cur_graph_view.vertex_partition_range_lasts()); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(vertex_pair_buffer_q_r_edge_p_r_srcs.data(), - vertex_pair_buffer_q_r_edge_p_r_srcs.size()), - raft::device_span(vertex_pair_buffer_q_r_edge_p_r_dsts.data(), - vertex_pair_buffer_q_r_edge_p_r_dsts.size())); - - } else { - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_r).data(), - std::get<0>(vertex_pair_buffer_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_r).data(), - std::get<1>(vertex_pair_buffer_p_r).size())); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_p_q_edge_p_r).data(), - std::get<0>(vertex_pair_buffer_p_q_edge_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_p_q_edge_p_r).data(), - std::get<1>(vertex_pair_buffer_p_q_edge_p_r).size())); - - decrease_triangle_count( - handle, - cur_graph_view, - edge_triangle_counts, - raft::device_span(std::get<0>(vertex_pair_buffer_q_r_edge_p_r).data(), - std::get<0>(vertex_pair_buffer_q_r_edge_p_r).size()), - raft::device_span(std::get<1>(vertex_pair_buffer_q_r_edge_p_r).data(), - std::get<1>(vertex_pair_buffer_q_r_edge_p_r).size())); - } + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + auto start = high_resolution_clock::now(); + auto edge_triangle_counts = + edge_triangle_count(handle, cur_graph_view); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + auto stop = high_resolution_clock::now(); + + triangle_count_ms += duration_cast(stop - start); // Mask all the edges that have 0 count + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + start = high_resolution_clock::now(); + + auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); + cugraph::transform_e( handle, cur_graph_view, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), edge_triangle_counts.view(), - [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { - return count != 0; + [k] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) { + return count >= k - 2; }, edge_mask.mutable_view(), false); cur_graph_view.attach_edge_mask(edge_mask.view()); - } + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + stop = high_resolution_clock::now(); + + edge_mask_ms += duration_cast(stop - start); + + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { + break; + } + + } + + std::cout << "edge triangle count took " << triangle_count_ms.count() << " milliseconds" << std::endl; + std::cout << "weak edge extraction took " << ext_w_edges_ms.count() << " milliseconds" << std::endl; + std::cout << "edge masking took " << edge_mask_ms.count() << " milliseconds" << std::endl; + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 4e3c3b82465..ae31b09303f 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -78,7 +78,7 @@ class Tests_MGKTruss auto [mg_graph, edge_weight, mg_renumber_map] = cugraph::test::construct_graph( - *handle_, input_usecase, k_truss_usecase.test_weighted_, true, false, true); + *handle_, input_usecase, k_truss_usecase.test_weighted_, true, true, true); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -226,15 +226,8 @@ class Tests_MGKTruss template std::unique_ptr Tests_MGKTruss::handle_ = nullptr; -using Tests_MGKTruss_File = Tests_MGKTruss; using Tests_MGKTruss_Rmat = Tests_MGKTruss; -TEST_P(Tests_MGKTruss_File, CheckInt32Int32) -{ - auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); -} - TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) { auto param = GetParam(); @@ -242,36 +235,6 @@ TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int64) -{ - auto param = GetParam(); - run_current_test( - std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGKTruss_Rmat, CheckInt64Int64) -{ - auto param = GetParam(); - run_current_test( - std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -INSTANTIATE_TEST_SUITE_P( - file_tests, - Tests_MGKTruss_File, - ::testing::Combine( - // enable correctness checks - ::testing::Values(KTruss_Usecase{4, false, true, true}, KTruss_Usecase{5, true, true, true}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), - cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); - -INSTANTIATE_TEST_SUITE_P( - rmat_small_tests, - Tests_MGKTruss_Rmat, - ::testing::Combine( - ::testing::Values(KTruss_Usecase{8, false, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); - INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with --gtest_filter to select only the rmat_benchmark_test with a specific @@ -280,8 +243,7 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGKTruss_Rmat, ::testing::Combine( - ::testing::Values(KTruss_Usecase{4, false, false, false}, - KTruss_Usecase{5, false, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{3, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 2733cec0aace1e0d1febf6296e920c4c389c5076 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 07:50:38 -0700 Subject: [PATCH 82/93] reset chunk parameter --- cpp/src/community/edge_triangle_count_impl.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh index af2747843bc..225687c4cf0 100644 --- a/cpp/src/community/edge_triangle_count_impl.cuh +++ b/cpp/src/community/edge_triangle_count_impl.cuh @@ -136,7 +136,7 @@ edge_property_t, edge_t> edge_t auto edge_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); size_t edges_to_intersect_per_iteration = - static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 13); + static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 17); auto num_chunks = raft::div_rounding_up_safe(edgelist_srcs.size(), edges_to_intersect_per_iteration); From 6de56d51a02749b21d54e4e1e9ec9b34b65264fd Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 07:51:51 -0700 Subject: [PATCH 83/93] remove debug statement --- cpp/src/community/k_truss_impl.cuh | 500 +----------------------- cpp/tests/community/mg_k_truss_test.cpp | 44 ++- 2 files changed, 43 insertions(+), 501 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 4f4ed868357..2fe9e1c9285 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,228 +40,10 @@ #include #include #include -#include -using namespace std::chrono; namespace cugraph { -template -// Remname set difference -// break 'remove_overcompensating_edges' to only take set_q_querry edges to find the difference -edge_t remove_overcompensating_edges(raft::handle_t const& handle, - size_t buffer_size, - EdgeIterator set_a_query_edges, - EdgeIterator set_b_query_edges, - raft::device_span global_set_c_weak_edges_srcs, - raft::device_span global_set_c_weak_edges_dsts, - raft::device_span set_c_weak_edges_srcs, - raft::device_span set_c_weak_edges_dsts, - vertex_t number_of_local_edge_partitions, - std::vector vertex_partition_range_lasts) -{ - // To avoid over-compensating, check whether the 'potential_closing_edges' - // are within the weak edges. If yes, those edges were already unrolled - - if constexpr (global_weak) { - // FIXME: can use thrust::set_difference for SG - auto edges_not_overcomp = thrust::remove_if( - handle.get_thrust_policy(), - thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges), - thrust::make_zip_iterator(set_a_query_edges + buffer_size, set_b_query_edges + buffer_size), - [set_c_weak_edges_first = thrust::make_zip_iterator(global_set_c_weak_edges_srcs.begin(), - global_set_c_weak_edges_dsts.begin()), - set_c_weak_edges_last = - thrust::make_zip_iterator(global_set_c_weak_edges_srcs.end(), - global_set_c_weak_edges_dsts.end())] __device__(auto e) { - auto set_a_query_edge = thrust::get<0>(e); - if constexpr (is_q_r_edge) { - set_a_query_edge = - thrust::make_tuple(thrust::get<1>(set_a_query_edge), thrust::get<0>(set_a_query_edge)); - }; - - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edge); - }); - - auto dist = thrust::distance(thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges), - edges_not_overcomp); - return dist; - } else { - auto& comm = handle.get_comms(); - auto const comm_rank = comm.get_rank(); - - rmm::device_uvector set_a_query_edges_srcs(buffer_size, handle.get_stream()); - rmm::device_uvector set_a_query_edges_dsts(buffer_size, handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - set_a_query_edges, - set_a_query_edges + buffer_size, - thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin())); - - // auto& comm = handle.get_comms(); - auto const comm_size = comm.get_size(); - auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); - auto const major_comm_size = major_comm.get_size(); - auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); - auto const minor_comm_size = minor_comm.get_size(); - - rmm::device_uvector d_vertex_partition_range_lasts( - vertex_partition_range_lasts.size(), handle.get_stream()); - - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); - - auto func = cugraph::detail::compute_gpu_id_from_int_edge_endpoints_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - comm_size, - major_comm_size, - minor_comm_size}; - - auto d_tx_counts = cugraph::groupby_and_count( - thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()), - thrust::make_zip_iterator(set_a_query_edges_srcs.end(), set_a_query_edges_dsts.end()), - [func, major_comm_size] __device__(auto val) { - return func(val); //% major_comm_size; - }, - comm_size, - // major_comm_size, - std::numeric_limits::max(), - handle.get_stream()); - - std::vector h_tx_counts{d_tx_counts.size()}; - std::vector h_rx_counts{}; - - raft::update_host( - h_tx_counts.data(), d_tx_counts.data(), d_tx_counts.size(), handle.get_stream()); - - std::tie(set_a_query_edges_srcs, h_rx_counts) = shuffle_values( - handle.get_comms(), set_a_query_edges_srcs.begin(), h_tx_counts, handle.get_stream()); - - std::tie(set_a_query_edges_dsts, std::ignore) = shuffle_values( - handle.get_comms(), set_a_query_edges_dsts.begin(), h_tx_counts, handle.get_stream()); - - rmm::device_uvector has_edge(set_a_query_edges_srcs.size(), - handle.get_stream()); // type should be size_t - - auto set_c_weak_edges_first = thrust::make_zip_iterator( - set_c_weak_edges_srcs.begin(), set_c_weak_edges_dsts.begin()); // setBedges - auto set_c_weak_edges_last = - thrust::make_zip_iterator(set_c_weak_edges_srcs.end(), set_c_weak_edges_dsts.end()); - auto set_a_query_edges_first = - thrust::make_zip_iterator(set_a_query_edges_srcs.begin(), set_a_query_edges_dsts.begin()); - - // FIXME: Use thrust::transform instead - thrust::tabulate( - handle.get_thrust_policy(), - has_edge.begin(), - has_edge.end(), - [set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first] __device__(auto i) { - return thrust::binary_search( - thrust::seq, set_c_weak_edges_first, set_c_weak_edges_last, set_a_query_edges_first[i]); - }); - - std::tie(has_edge, std::ignore) = - shuffle_values(handle.get_comms(), has_edge.begin(), h_rx_counts, handle.get_stream()); - - auto set_a_and_b_query_edges_first = - thrust::make_zip_iterator(set_a_query_edges, set_b_query_edges); - auto set_a_and_b_query_edges_last = - thrust::make_zip_iterator(set_a_query_edges + buffer_size, set_b_query_edges + buffer_size); - - thrust::sort_by_key(handle.get_thrust_policy(), - set_a_query_edges, - set_a_query_edges + buffer_size, - thrust::make_zip_iterator(set_b_query_edges, has_edge.begin())); - - auto edges_not_overcomp = - thrust::remove_if(handle.get_thrust_policy(), - set_a_and_b_query_edges_first, - set_a_and_b_query_edges_last, - [set_a_query_edges, - buffer_size, - has_edge = raft::device_span( - has_edge.data(), has_edge.size())] __device__(auto pair_set) { - // auto set_a_query_edge = thrust::get<0>(pair_set) - auto itr = thrust::lower_bound(thrust::seq, - set_a_query_edges, - set_a_query_edges + buffer_size, - thrust::get<0>(pair_set)); - - auto idx = thrust::distance(set_a_query_edges, itr); - return has_edge[idx]; - }); - - auto dist = thrust::distance(set_a_and_b_query_edges_first, edges_not_overcomp); - return dist; - } -} - -template -struct extract_weak_edges { - edge_t k{}; - __device__ thrust::optional> operator()( - vertex_t src, vertex_t dst, thrust::nullopt_t, thrust::nullopt_t, edge_t count) const - { - return count < k - 2 - ? thrust::optional>{thrust::make_tuple(src, dst)} - : thrust::nullopt; - } -}; - -template -struct extract_edges { // FIXME: ******************************Remove this functor. For testing - // purposes only******************* - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const - { - return thrust::make_tuple(src, dst, count); - } -}; - -template -struct extract_edges_and_triangle_counts { - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto count) const - { - return thrust::make_tuple(src, dst, count); - } -}; - -template -struct extract_edges_to_q_r { - raft::device_span vertex_q_r_set{}; - __device__ thrust::optional> operator()( - - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) const - { - auto has_src = - thrust::binary_search(thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), src); - - auto has_dst = - thrust::binary_search(thrust::seq, vertex_q_r_set.begin(), vertex_q_r_set.end(), dst); - - if (has_src) { - return thrust::optional>{thrust::make_tuple(src, dst)}; - } else if (has_dst) { - return thrust::optional>{thrust::make_tuple(src, dst)}; - } else { - return thrust::nullopt; - } - } -}; - namespace { template @@ -310,261 +92,6 @@ struct extract_low_to_high_degree_edges_t { } }; -template -struct generate_p_r_or_q_r_from_p_q { - size_t chunk_start{}; - raft::device_span intersection_offsets{}; - raft::device_span intersection_indices{}; - raft::device_span weak_srcs{}; - raft::device_span weak_dsts{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::upper_bound( - thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - - if constexpr (generate_p_r) { - return thrust::make_tuple(weak_srcs[chunk_start + idx], intersection_indices[i]); - - } else { - return thrust::make_tuple(weak_dsts[chunk_start + idx], intersection_indices[i]); - } - } -}; - -template -struct extract_q_idx { - using return_type = thrust::optional>; - - return_type __device__ operator()(thrust::tuple tagged_src, - vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const - { - return thrust::make_optional(thrust::make_tuple(dst, thrust::get<1>(tagged_src))); - } -}; - -// FIXME: Remove multi_gpu as it is not used. -template -struct extract_q_idx_closing { - using return_type = thrust::optional>; - EdgeIterator major_weak_edgelist_dsts_tag_first{}; - EdgeIterator major_weak_edgelist_dsts_tag_last{}; - raft::device_span major_weak_edgelist_srcs{}; - - return_type __device__ operator()(thrust::tuple tagged_src, - vertex_t dst, - thrust::nullopt_t, - thrust::nullopt_t, - thrust::nullopt_t) const - { - auto itr = thrust::lower_bound(thrust::seq, - major_weak_edgelist_dsts_tag_first, - major_weak_edgelist_dsts_tag_last, - thrust::make_tuple(dst, thrust::get<1>(tagged_src))); - - auto idx = thrust::distance(major_weak_edgelist_dsts_tag_first, itr); - - return (itr != major_weak_edgelist_dsts_tag_last && - *itr == thrust::make_tuple(dst, thrust::get<1>(tagged_src))) - ? thrust::make_optional(thrust::make_tuple(thrust::get<0>(tagged_src), - dst, - major_weak_edgelist_srcs[idx], - thrust::get<1>(tagged_src))) - : thrust::nullopt; - } -}; - -template -struct generate_p_q { - size_t chunk_start{}; - raft::device_span intersection_offsets{}; - raft::device_span intersection_indices{}; - raft::device_span weak_srcs{}; - raft::device_span weak_dsts{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::upper_bound( - thrust::seq, intersection_offsets.begin() + 1, intersection_offsets.end(), i); - auto idx = thrust::distance(intersection_offsets.begin() + 1, itr); - - return thrust::make_tuple(weak_srcs[chunk_start + idx], weak_dsts[chunk_start + idx]); - } -}; - -template -struct generate_p_r { - EdgeIterator weak_edge_first{}; - EdgeIterator weak_edge_dst_tag_first{}; - EdgeIterator weak_edge_dst_tag_last{}; - EdgeIterator closing_r_tag{}; - - raft::device_span weak_edge_idx{}; - raft::device_span chunk_global_weak_edgelist_tags{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - auto itr = thrust::lower_bound( - thrust::seq, weak_edge_dst_tag_first, weak_edge_dst_tag_last, closing_r_tag[i]); - - auto idx = thrust::distance(weak_edge_dst_tag_first, itr); - return *(weak_edge_first + idx); - } -}; - -template -struct generate_p_q_q_r { - EdgeIterator weak_edge{}; - raft::device_span q_closing{}; - raft::device_span weak_edge_idx{}; - raft::device_span chunk_global_weak_edgelist_tags{}; - - __device__ thrust::tuple operator()(edge_t i) const - { - if constexpr (generate_p_q) { - return thrust::make_tuple(thrust::get<0>(*(weak_edge + weak_edge_idx[i])), q_closing[i]); - } else { - return thrust::make_tuple(q_closing[i], thrust::get<1>(*(weak_edge + weak_edge_idx[i]))); - } - } -}; - -template -void decrease_triangle_count( - raft::handle_t const& handle, - graph_view_t& cur_graph_view, - edge_property_t, edge_t>& edge_triangle_counts, - raft::device_span edge_srcs, - raft::device_span edge_dsts) -{ - // Before updating the count, we need to clear the mask - auto edge_buffer_first = thrust::make_zip_iterator(edge_srcs.begin(), edge_dsts.begin()); - - thrust::sort(handle.get_thrust_policy(), edge_buffer_first, edge_buffer_first + edge_srcs.size()); - - auto unique_pair_count = thrust::unique_count( - handle.get_thrust_policy(), edge_buffer_first, edge_buffer_first + edge_srcs.size()); - - rmm::device_uvector decrease_count(unique_pair_count, handle.get_stream()); - - auto vertex_pair_buffer_unique = allocate_dataframe_buffer>( - unique_pair_count, handle.get_stream()); - - thrust::reduce_by_key(handle.get_thrust_policy(), - edge_buffer_first, - edge_buffer_first + edge_srcs.size(), - thrust::make_constant_iterator(size_t{1}), - get_dataframe_buffer_begin(vertex_pair_buffer_unique), - decrease_count.begin(), - thrust::equal_to>{}); - - cugraph::edge_bucket_t edges_to_decrement_count(handle); - edges_to_decrement_count.insert(std::get<0>(vertex_pair_buffer_unique).begin(), - std::get<0>(vertex_pair_buffer_unique).end(), - std::get<1>(vertex_pair_buffer_unique).begin()); - - cugraph::transform_e( - handle, - cur_graph_view, - edges_to_decrement_count, - cugraph::edge_src_dummy_property_t{}.view(), - cugraph::edge_dst_dummy_property_t{}.view(), - edge_triangle_counts.view(), - [edge_buffer_first = get_dataframe_buffer_begin(vertex_pair_buffer_unique), - edge_buffer_last = get_dataframe_buffer_end(vertex_pair_buffer_unique), - decrease_count = decrease_count.data()] __device__(auto src, - auto dst, - thrust::nullopt_t, - thrust::nullopt_t, - edge_t count) { - auto e = thrust::make_tuple(src, dst); - auto itr_pair = thrust::lower_bound(thrust::seq, edge_buffer_first, edge_buffer_last, e); - - auto idx_pair = thrust::distance(edge_buffer_first, itr_pair); - return count - decrease_count[idx_pair]; - }, - edge_triangle_counts.mutable_view(), - true); // FIXME: set expensive check to False -}; - -template -std::tuple accumulate_triangles_p_q_or_q_r( - raft::handle_t const& handle, - graph_view_t& graph_view, - raft::device_span weak_edgelist_srcs, - raft::device_span weak_edgelist_dsts, - size_t prev_chunk_size, - size_t chunk_size, - bool do_expensive_check) -{ - auto weak_edgelist_first = - thrust::make_zip_iterator(weak_edgelist_srcs.begin(), weak_edgelist_dsts.begin()); - - auto [intersection_offsets, intersection_indices] = - detail::nbr_intersection(handle, - graph_view, - cugraph::edge_dummy_property_t{}.view(), - weak_edgelist_first + prev_chunk_size, - weak_edgelist_first + prev_chunk_size + chunk_size, - std::array{true, true}, - // do_expensive_check : FIXME - true); - - auto vertex_pair_buffer_p_q = allocate_dataframe_buffer>( - intersection_indices.size(), handle.get_stream()); - - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_q), - generate_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_p_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_p_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_p_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - auto vertex_pair_buffer_q_r_edge_p_q = - allocate_dataframe_buffer>(intersection_indices.size(), - handle.get_stream()); - thrust::tabulate( - handle.get_thrust_policy(), - get_dataframe_buffer_begin(vertex_pair_buffer_q_r_edge_p_q), - get_dataframe_buffer_end(vertex_pair_buffer_q_r_edge_p_q), - generate_p_r_or_q_r_from_p_q{ - prev_chunk_size, - raft::device_span(intersection_offsets.data(), intersection_offsets.size()), - raft::device_span(intersection_indices.data(), intersection_indices.size()), - weak_edgelist_srcs, - weak_edgelist_dsts}); - - return std::make_tuple(std::move(vertex_pair_buffer_p_q), - std::move(vertex_pair_buffer_p_r_edge_p_q), - std::move(vertex_pair_buffer_q_r_edge_p_q)); -} - } // namespace template @@ -778,27 +305,13 @@ k_truss(raft::handle_t const& handle, cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); - - std::chrono::seconds s (0); // 1 second - std::chrono::duration triangle_count_ms = duration_cast (s); - std::chrono::duration ext_w_edges_ms = duration_cast (s); - std::chrono::duration edge_mask_ms = duration_cast (s); while (true) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - auto start = high_resolution_clock::now(); auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - auto stop = high_resolution_clock::now(); - - triangle_count_ms += duration_cast(stop - start); - // Mask all the edges that have 0 count - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - start = high_resolution_clock::now(); + // Mask all the edges that have k - 2 count auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); @@ -816,21 +329,12 @@ k_truss(raft::handle_t const& handle, cur_graph_view.attach_edge_mask(edge_mask.view()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - stop = high_resolution_clock::now(); - - edge_mask_ms += duration_cast(stop - start); - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - std::cout << "edge triangle count took " << triangle_count_ms.count() << " milliseconds" << std::endl; - std::cout << "weak edge extraction took " << ext_w_edges_ms.count() << " milliseconds" << std::endl; - std::cout << "edge masking took " << edge_mask_ms.count() << " milliseconds" << std::endl; - rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index ae31b09303f..86d4ece3be7 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -78,7 +78,7 @@ class Tests_MGKTruss auto [mg_graph, edge_weight, mg_renumber_map] = cugraph::test::construct_graph( - *handle_, input_usecase, k_truss_usecase.test_weighted_, true, true, true); + *handle_, input_usecase, k_truss_usecase.test_weighted_, true, false, true); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -226,8 +226,15 @@ class Tests_MGKTruss template std::unique_ptr Tests_MGKTruss::handle_ = nullptr; +using Tests_MGKTruss_File = Tests_MGKTruss; using Tests_MGKTruss_Rmat = Tests_MGKTruss; +TEST_P(Tests_MGKTruss_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) { auto param = GetParam(); @@ -235,6 +242,36 @@ TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int32) std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } +TEST_P(Tests_MGKTruss_Rmat, CheckInt32Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGKTruss_Rmat, CheckInt64Int64) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_tests, + Tests_MGKTruss_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(KTruss_Usecase{4, false, true, true}, KTruss_Usecase{5, true, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGKTruss_Rmat, + ::testing::Combine( + ::testing::Values(KTruss_Usecase{4, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with --gtest_filter to select only the rmat_benchmark_test with a specific @@ -243,7 +280,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGKTruss_Rmat, ::testing::Combine( - ::testing::Values(KTruss_Usecase{3, false, false, false}), - ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + ::testing::Values(KTruss_Usecase{4, false, false, false}, + KTruss_Usecase{5, false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From 5892e449ecc8940474af0b61538bda8df80554b3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 08:13:35 -0700 Subject: [PATCH 84/93] update CMake file --- cpp/CMakeLists.txt | 2 +- cpp/tests/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index fac43efea9d..d6695786bdd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -631,7 +631,7 @@ add_library(cugraph_c src/c_api/eigenvector_centrality.cpp src/c_api/betweenness_centrality.cpp src/c_api/core_number.cpp - #src/c_api/k_truss.cpp + src/c_api/k_truss.cpp src/c_api/core_result.cpp src/c_api/extract_ego.cpp src/c_api/ecg.cpp diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 0666fe71831..c5781127321 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -830,7 +830,7 @@ ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) ConfigureCTest(CAPI_COUNT_MULTI_EDGES c_api/count_multi_edges_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) -#ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) +ConfigureCTest(CAPI_K_TRUSS_TEST c_api/k_truss_test.c) if (BUILD_CUGRAPH_MTMG_TESTS) ################################################################################################### From c639b762be7cd4a7c876e8b0217e2462a113703d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 12:27:05 -0700 Subject: [PATCH 85/93] describe rx_count in documentation --- cpp/include/cugraph/detail/shuffle_wrappers.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index 37130bf3c64..7dffcce298a 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -87,7 +87,7 @@ shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( * (exclusive) vertex ID. * * @return Tuple of vectors storing shuffled major vertices, minor vertices and optional weights, - * edge ids and edge types + * edge ids and edge types and rx counts */ template std::tuple, From 48078e9cdb0163fd0b20370d208cbb23e820f84f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 12:29:25 -0700 Subject: [PATCH 86/93] update copyright --- cpp/src/community/k_truss_mg_v32_e32.cu | 2 +- cpp/src/community/k_truss_mg_v32_e64.cu | 2 +- cpp/src/community/k_truss_mg_v64_e64.cu | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/k_truss_mg_v32_e32.cu b/cpp/src/community/k_truss_mg_v32_e32.cu index 69690e3c3e6..4feb69f6098 100644 --- a/cpp/src/community/k_truss_mg_v32_e32.cu +++ b/cpp/src/community/k_truss_mg_v32_e32.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/community/k_truss_mg_v32_e64.cu b/cpp/src/community/k_truss_mg_v32_e64.cu index 639269efc97..b07f9382612 100644 --- a/cpp/src/community/k_truss_mg_v32_e64.cu +++ b/cpp/src/community/k_truss_mg_v32_e64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/community/k_truss_mg_v64_e64.cu b/cpp/src/community/k_truss_mg_v64_e64.cu index 3fda694f342..1c730fe272d 100644 --- a/cpp/src/community/k_truss_mg_v64_e64.cu +++ b/cpp/src/community/k_truss_mg_v64_e64.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. From f88efb90ad3483c3b3dd74fa1819decad05b85de Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 12:33:11 -0700 Subject: [PATCH 87/93] fix copyright --- cpp/src/community/k_truss_impl.cuh | 20 ++++++-------------- cpp/src/cores/k_core_impl.cuh | 3 +-- 2 files changed, 7 insertions(+), 16 deletions(-) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 2fe9e1c9285..007ff0f980b 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -41,7 +41,6 @@ #include #include - namespace cugraph { namespace { @@ -105,13 +104,12 @@ k_truss(raft::handle_t const& handle, bool do_expensive_check) { // 1. Check input arguments. - + CUGRAPH_EXPECTS(graph_view.is_symmetric(), "Invalid input arguments: K-truss currently supports undirected graphs only."); CUGRAPH_EXPECTS(!graph_view.is_multigraph(), "Invalid input arguments: K-truss currently does not support multi-graphs."); - if (do_expensive_check) { // nothing to do } @@ -156,7 +154,7 @@ k_truss(raft::handle_t const& handle, } // 2. Find (k-1)-core and exclude edges that do not belong to (k-1)-core - + { auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; @@ -217,7 +215,6 @@ k_truss(raft::handle_t const& handle, // 3. Keep only the edges from a low-degree vertex to a high-degree vertex. { - auto cur_graph_view = modified_graph_view ? *modified_graph_view : graph_view; auto vertex_partition_range_lasts = @@ -291,7 +288,6 @@ k_truss(raft::handle_t const& handle, *vertex_partition_range_lasts); } renumber_map = std::move(tmp_renumber_map); - } // 4. Compute triangle count using nbr_intersection and unroll weak edges @@ -305,16 +301,15 @@ k_truss(raft::handle_t const& handle, cugraph::edge_property_t edge_mask(handle, cur_graph_view); cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); - + while (true) { - auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view); // Mask all the edges that have k - 2 count auto prev_number_of_edges = cur_graph_view.compute_number_of_edges(handle); - + cugraph::transform_e( handle, cur_graph_view, @@ -329,12 +324,9 @@ k_truss(raft::handle_t const& handle, cur_graph_view.attach_edge_mask(edge_mask.view()); - if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { - break; - } - + if (prev_number_of_edges == cur_graph_view.compute_number_of_edges(handle)) { break; } } - + rmm::device_uvector edgelist_srcs(0, handle.get_stream()); rmm::device_uvector edgelist_dsts(0, handle.get_stream()); std::optional> edgelist_wgts{std::nullopt}; diff --git a/cpp/src/cores/k_core_impl.cuh b/cpp/src/cores/k_core_impl.cuh index b3c832d1539..2c5bf987a47 100644 --- a/cpp/src/cores/k_core_impl.cuh +++ b/cpp/src/cores/k_core_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -37,7 +37,6 @@ k_core(raft::handle_t const& handle, std::optional> core_numbers, bool do_expensive_check) { - rmm::device_uvector computed_core_numbers(0, handle.get_stream()); if (!core_numbers) { From f743122fd1cd0b6b33574378ac8f12d5f214633c Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 25 Jul 2024 12:55:31 -0700 Subject: [PATCH 88/93] remove outdated fixme --- cpp/tests/community/mg_k_truss_test.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/tests/community/mg_k_truss_test.cpp b/cpp/tests/community/mg_k_truss_test.cpp index 86d4ece3be7..a1624949007 100644 --- a/cpp/tests/community/mg_k_truss_test.cpp +++ b/cpp/tests/community/mg_k_truss_test.cpp @@ -42,7 +42,6 @@ struct KTruss_Usecase { int32_t k_{3}; bool test_weighted_{false}; - // FIXME: test edge mask bool edge_masking_{false}; bool check_correctness_{true}; }; From 0c2f042729b24c6b4e1222c8a2b8790acede1862 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 26 Jul 2024 09:42:39 -0700 Subject: [PATCH 89/93] update docs --- cpp/tests/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c5781127321..c35811985dc 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -499,7 +499,7 @@ ConfigureTest(CORE_NUMBER_TEST cores/core_number_test.cpp) # - Core Number tests ----------------------------------------------------------------------------- ConfigureTest(K_CORE_TEST cores/k_core_test.cpp) -############################################################################################### +################################################################################################### # - K-truss tests -------------------------------------------------------------------------- ConfigureTest(K_TRUSS_TEST community/k_truss_test.cpp) From 94a4c89de2a892162bf10309799b9ba3f321fcc9 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 26 Jul 2024 09:49:28 -0700 Subject: [PATCH 90/93] add fixme --- cpp/src/community/k_truss_impl.cuh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cpp/src/community/k_truss_impl.cuh b/cpp/src/community/k_truss_impl.cuh index 007ff0f980b..e052a892917 100644 --- a/cpp/src/community/k_truss_impl.cuh +++ b/cpp/src/community/k_truss_impl.cuh @@ -303,6 +303,8 @@ k_truss(raft::handle_t const& handle, cugraph::fill_edge_property(handle, cur_graph_view, edge_mask.mutable_view(), bool{true}); while (true) { + // FIXME: This approach is very expensive when invalidating only few edges per iteration + // and should be address. auto edge_triangle_counts = edge_triangle_count(handle, cur_graph_view); From 4476d8d0295f48fbc7301a1667fed84c35a8fb1d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 28 Jul 2024 21:56:08 -0700 Subject: [PATCH 91/93] enable MG CAPI k_truss --- cpp/src/c_api/k_truss.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/cpp/src/c_api/k_truss.cpp b/cpp/src/c_api/k_truss.cpp index 18e256b022a..37a0672676e 100644 --- a/cpp/src/c_api/k_truss.cpp +++ b/cpp/src/c_api/k_truss.cpp @@ -60,10 +60,7 @@ struct k_truss_functor : public cugraph::c_api::abstract_functor { { if constexpr (!cugraph::is_candidate::value) { unsupported(); - } else if constexpr (multi_gpu) { - unsupported(); } else { - // k_truss expects store_transposed == false if constexpr (store_transposed) { error_code_ = cugraph::c_api:: transpose_storage( From ed78d3222a0f9639ff951b9ba86ff7bc568114a2 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 28 Jul 2024 21:56:34 -0700 Subject: [PATCH 92/93] add CAPI tests for MG k_truss --- cpp/tests/CMakeLists.txt | 1 + cpp/tests/c_api/mg_k_truss_test.c | 159 ++++++++++++++++++++++++++++++ 2 files changed, 160 insertions(+) create mode 100644 cpp/tests/c_api/mg_k_truss_test.c diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c35811985dc..6c10bd34d8a 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -786,6 +786,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_COUNT_MULTI_EDGES c_api/mg_count_multi_edges_test.c) ConfigureCTestMG(MG_CAPI_EGONET_TEST c_api/mg_egonet_test.c) ConfigureCTestMG(MG_CAPI_TWO_HOP_NEIGHBORS_TEST c_api/mg_two_hop_neighbors_test.c) + ConfigureCTestMG(MG_CAPI_K_TRUSS c_api/mg_k_truss_test.c) rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing_mg DESTINATION bin/gtests/libcugraph_mg) diff --git a/cpp/tests/c_api/mg_k_truss_test.c b/cpp/tests/c_api/mg_k_truss_test.c new file mode 100644 index 00000000000..2ad5f0fc11b --- /dev/null +++ b/cpp/tests/c_api/mg_k_truss_test.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2022, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +/* + * Simple check of creating a graph from a COO on device memory. + */ +int generic_k_truss_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_edges, + size_t num_results, + size_t k, + bool_t store_transposed, + vertex_t* h_result_src, + vertex_t* h_result_dst, + weight_t* h_result_wgt + ) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + + cugraph_induced_subgraph_result_t* result = NULL; + + data_type_id_t vertex_tid = INT32; + data_type_id_t size_t_tid = SIZE_T; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + + ret_code = cugraph_k_truss_subgraph(handle, + graph, + k, + FALSE, + &result, + &ret_error); +TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_k_truss failed."); + + cugraph_type_erased_device_array_view_t* k_truss_src; + cugraph_type_erased_device_array_view_t* k_truss_dst; + cugraph_type_erased_device_array_view_t* k_truss_wgt; + + k_truss_src = cugraph_induced_subgraph_get_sources(result); + k_truss_dst = cugraph_induced_subgraph_get_destinations(result); + k_truss_wgt = cugraph_induced_subgraph_get_edge_weights(result); + + size_t k_truss_size = cugraph_type_erased_device_array_view_size(k_truss_src); + + vertex_t h_k_truss_src[k_truss_size]; + vertex_t h_k_truss_dst[k_truss_size]; + weight_t h_k_truss_wgt[k_truss_size]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_src, k_truss_src, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_dst, k_truss_dst, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_k_truss_wgt, k_truss_wgt, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + for (size_t i = 0; (i < k_truss_size) && (test_ret_value == 0); ++i) { + bool_t found = FALSE; + for (size_t j = 0; (j < num_results) && !found; ++j) { + if ((h_k_truss_src[i] == h_result_src[j]) && (h_k_truss_dst[i] == h_result_dst[j]) && + (h_k_truss_wgt[i] == h_result_wgt[j])) + found = TRUE; + } + TEST_ASSERT(test_ret_value, found, "k_truss subgraph has an edge that doesn't match"); + } + + + cugraph_induced_subgraph_result_free(result); + cugraph_mg_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_k_truss_subgraph(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 14; + size_t num_vertices = 7; + size_t num_results = 6; + size_t k = 3; + + vertex_t h_src[] = {0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6}; + vertex_t h_dst[] = {1, 2, 5, 0, 2, 3, 4, 6, 0, 1, 1, 1, 0, 1}; + weight_t h_wgt[] = {1.2f, 1.3f, 1.6f, 1.2f, 2.3f, 2.4f, 2.5f, 2.7f, 1.3f, 2.3f, 2.4f, 2.5f, 1.6f, 2.7f}; + + vertex_t h_result_src[] = {0, 2, 2, 1, 1, 0}; + vertex_t h_result_dst[] = {1, 1, 0, 0, 2, 2}; + weight_t h_result_wgt[] = {1.2f, 2.3f, 1.3f, 1.2f, 2.3f, 1.3f}; + + return generic_k_truss_test(handle, + h_src, + h_dst, + h_wgt, + num_edges, + num_results, + k, + FALSE, + h_result_src, + h_result_dst, + h_result_wgt + ); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_k_truss_subgraph, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} From 6de53043e394ae8ea8d251b38c7bc8886aad0371 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 28 Jul 2024 21:58:31 -0700 Subject: [PATCH 93/93] fix style --- cpp/tests/c_api/mg_k_truss_test.c | 71 ++++++++++++++----------------- 1 file changed, 31 insertions(+), 40 deletions(-) diff --git a/cpp/tests/c_api/mg_k_truss_test.c b/cpp/tests/c_api/mg_k_truss_test.c index 2ad5f0fc11b..e406eb330a7 100644 --- a/cpp/tests/c_api/mg_k_truss_test.c +++ b/cpp/tests/c_api/mg_k_truss_test.c @@ -29,29 +29,28 @@ typedef float weight_t; * Simple check of creating a graph from a COO on device memory. */ int generic_k_truss_test(const cugraph_resource_handle_t* handle, - vertex_t* h_src, - vertex_t* h_dst, - weight_t* h_wgt, - size_t num_edges, - size_t num_results, - size_t k, - bool_t store_transposed, - vertex_t* h_result_src, - vertex_t* h_result_dst, - weight_t* h_result_wgt - ) + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_edges, + size_t num_results, + size_t k, + bool_t store_transposed, + vertex_t* h_result_src, + vertex_t* h_result_dst, + weight_t* h_result_wgt) { int test_ret_value = 0; cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; cugraph_error_t* ret_error; - cugraph_graph_t* graph = NULL; + cugraph_graph_t* graph = NULL; cugraph_induced_subgraph_result_t* result = NULL; - data_type_id_t vertex_tid = INT32; - data_type_id_t size_t_tid = SIZE_T; + data_type_id_t vertex_tid = INT32; + data_type_id_t size_t_tid = SIZE_T; ret_code = create_mg_test_graph( handle, h_src, h_dst, h_wgt, num_edges, store_transposed, TRUE, &graph, &ret_error); @@ -59,24 +58,17 @@ int generic_k_truss_test(const cugraph_resource_handle_t* handle, TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - - ret_code = cugraph_k_truss_subgraph(handle, - graph, - k, - FALSE, - &result, - &ret_error); -TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); - TEST_ASSERT( - test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_k_truss failed."); + ret_code = cugraph_k_truss_subgraph(handle, graph, k, FALSE, &result, &ret_error); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_k_truss failed."); cugraph_type_erased_device_array_view_t* k_truss_src; cugraph_type_erased_device_array_view_t* k_truss_dst; cugraph_type_erased_device_array_view_t* k_truss_wgt; - k_truss_src = cugraph_induced_subgraph_get_sources(result); - k_truss_dst = cugraph_induced_subgraph_get_destinations(result); - k_truss_wgt = cugraph_induced_subgraph_get_edge_weights(result); + k_truss_src = cugraph_induced_subgraph_get_sources(result); + k_truss_dst = cugraph_induced_subgraph_get_destinations(result); + k_truss_wgt = cugraph_induced_subgraph_get_edge_weights(result); size_t k_truss_size = cugraph_type_erased_device_array_view_size(k_truss_src); @@ -106,7 +98,6 @@ TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error) TEST_ASSERT(test_ret_value, found, "k_truss subgraph has an edge that doesn't match"); } - cugraph_induced_subgraph_result_free(result); cugraph_mg_graph_free(graph); cugraph_error_free(ret_error); @@ -115,18 +106,19 @@ TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error) int test_k_truss_subgraph(const cugraph_resource_handle_t* handle) { - size_t num_edges = 14; - size_t num_vertices = 7; - size_t num_results = 6; - size_t k = 3; + size_t num_edges = 14; + size_t num_vertices = 7; + size_t num_results = 6; + size_t k = 3; - vertex_t h_src[] = {0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6}; - vertex_t h_dst[] = {1, 2, 5, 0, 2, 3, 4, 6, 0, 1, 1, 1, 0, 1}; - weight_t h_wgt[] = {1.2f, 1.3f, 1.6f, 1.2f, 2.3f, 2.4f, 2.5f, 2.7f, 1.3f, 2.3f, 2.4f, 2.5f, 1.6f, 2.7f}; + vertex_t h_src[] = {0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 3, 4, 5, 6}; + vertex_t h_dst[] = {1, 2, 5, 0, 2, 3, 4, 6, 0, 1, 1, 1, 0, 1}; + weight_t h_wgt[] = { + 1.2f, 1.3f, 1.6f, 1.2f, 2.3f, 2.4f, 2.5f, 2.7f, 1.3f, 2.3f, 2.4f, 2.5f, 1.6f, 2.7f}; - vertex_t h_result_src[] = {0, 2, 2, 1, 1, 0}; - vertex_t h_result_dst[] = {1, 1, 0, 0, 2, 2}; - weight_t h_result_wgt[] = {1.2f, 2.3f, 1.3f, 1.2f, 2.3f, 1.3f}; + vertex_t h_result_src[] = {0, 2, 2, 1, 1, 0}; + vertex_t h_result_dst[] = {1, 1, 0, 0, 2, 2}; + weight_t h_result_wgt[] = {1.2f, 2.3f, 1.3f, 1.2f, 2.3f, 1.3f}; return generic_k_truss_test(handle, h_src, @@ -138,8 +130,7 @@ int test_k_truss_subgraph(const cugraph_resource_handle_t* handle) FALSE, h_result_src, h_result_dst, - h_result_wgt - ); + h_result_wgt); } /******************************************************************************/