diff --git a/core/src/CMakeLists.txt b/core/src/CMakeLists.txt index 66c656893..e40f66cb7 100644 --- a/core/src/CMakeLists.txt +++ b/core/src/CMakeLists.txt @@ -32,6 +32,12 @@ if(Cabana_ENABLE_MPI) ) endif() +if(Cabana_ENABLE_CAJITA) + list(APPEND HEADERS_PUBLIC + Cabana_ParticleGridCommunication.hpp + ) +endif() + set(HEADERS_IMPL impl/Cabana_CartesianGrid.hpp impl/Cabana_Index.hpp diff --git a/core/src/Cabana_Core.hpp b/core/src/Cabana_Core.hpp index 3f72e795d..67737f3a3 100644 --- a/core/src/Cabana_Core.hpp +++ b/core/src/Cabana_Core.hpp @@ -33,6 +33,10 @@ #include #endif +#ifdef Cabana_ENABLE_CAJITA +#include +#endif + #ifdef Cabana_ENABLE_ARBORX #include #endif diff --git a/core/src/Cabana_Halo.hpp b/core/src/Cabana_Halo.hpp index 45a0ed6b8..fce7d6ace 100644 --- a/core/src/Cabana_Halo.hpp +++ b/core/src/Cabana_Halo.hpp @@ -199,60 +199,61 @@ struct is_halo : public is_halo_impl::type>::type { }; -//---------------------------------------------------------------------------// -/*! - \brief Synchronously gather data from the local decomposition to the ghosts - using the halo forward communication plan. AoSoA version. This is a - uniquely-owned to multiply-owned communication. +namespace Impl +{ - A gather sends data from a locally owned elements to one or many ranks on - which they exist as ghosts. A locally owned element may be sent to as many - ranks as desired to be used as a ghost on those ranks. The value of the - element in the locally owned decomposition will be the value assigned to the - element in the ghosted decomposition. +template +void checkSize( const Halo_t &halo, Container_t &container ) +{ + // Check that the AoSoA/slice is the right size. + if ( container.size() != halo.numLocal() + halo.numGhost() ) + throw std::runtime_error( "AoSoA/slice is the wrong size for gather!" ); +} - \tparam Halo_t Halo type - must be a Halo. +template +void sendBuffer( const Halo_t &halo, AoSoA_t &aosoa, View_t &send_buffer ) +{ - \tparam AoSoA_t AoSoA type - must be an AoSoA. + // Get the steering vector for the sends. + auto steering = halo.getExportSteering(); - \param halo The halo to use for the gather. + // Gather from the local data into a tuple-contiguous send buffer. + // Pass send buffer to user modification functor class to add shifts. + auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) + { + send_buffer( i ) = aosoa.getTuple( steering( i ) ); + }; + Kokkos::RangePolicy + gather_send_buffer_policy( 0, halo.totalNumExport() ); + Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", + gather_send_buffer_policy, gather_send_buffer_func ); + Kokkos::fence(); +} - \param aosoa The AoSoA on which to perform the gather. The AoSoA should have - a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned - elements are expected to appear first (i.e. in the first halo.numLocal() - elements) and the ghosted elements are expected to appear second (i.e. in - the next halo.numGhost() elements()). -*/ -template -void gather( const Halo_t &halo, AoSoA_t &aosoa, - typename std::enable_if<( is_halo::value && - is_aosoa::value ), - int>::type * = 0 ) +template +void sendBuffer( const Halo_t &halo, AoSoA_t &aosoa, View_t &send_buffer, + const Periodic_t &periodic ) { - // Check that the AoSoA is the right size. - if ( aosoa.size() != halo.numLocal() + halo.numGhost() ) - throw std::runtime_error( "AoSoA is the wrong size for gather!" ); - - // Allocate a send buffer. - Kokkos::View - send_buffer( - Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), - halo.totalNumExport() ); - // Get the steering vector for the sends. auto steering = halo.getExportSteering(); // Gather from the local data into a tuple-contiguous send buffer. + // Pass send buffer to user modification functor class to add shifts. auto gather_send_buffer_func = KOKKOS_LAMBDA( const std::size_t i ) { send_buffer( i ) = aosoa.getTuple( steering( i ) ); + periodic.modify_buffer( send_buffer, i ); }; Kokkos::RangePolicy gather_send_buffer_policy( 0, halo.totalNumExport() ); Kokkos::parallel_for( "Cabana::gather::gather_send_buffer", gather_send_buffer_policy, gather_send_buffer_func ); Kokkos::fence(); +} +template +void recvBuffer( const Halo_t &halo, AoSoA_t &aosoa, const View_t &send_buffer ) +{ // Allocate a receive buffer. Kokkos::View recv_buffer( @@ -320,6 +321,68 @@ void gather( const Halo_t &halo, AoSoA_t &aosoa, MPI_Barrier( halo.comm() ); } +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief Synchronously gather data from the local decomposition to the ghosts + using the halo forward communication plan. AoSoA version. This is a + uniquely-owned to multiply-owned communication. + + A gather sends data from a locally owned elements to one or many ranks on + which they exist as ghosts. A locally owned element may be sent to as many + ranks as desired to be used as a ghost on those ranks. The value of the + element in the locally owned decomposition will be the value assigned to the + element in the ghosted decomposition. + + \tparam Halo_t Halo type - must be a Halo. + + \tparam AoSoA_t AoSoA type - must be an AoSoA. + + \param halo The halo to use for the gather. + + \param aosoa The AoSoA on which to perform the gather. The AoSoA should have + a size equivalent to halo.numGhost() + halo.numLocal(). The locally owned + elements are expected to appear first (i.e. in the first halo.numLocal() + elements) and the ghosted elements are expected to appear second (i.e. in + the next halo.numGhost() elements()). +*/ +template +void gather( const Halo_t &halo, AoSoA_t &aosoa, const Periodic_t &periodic, + typename std::enable_if<( is_halo::value && + is_aosoa::value ), + int>::type * = 0 ) +{ + Impl::checkSize( halo, aosoa ); + + // Allocate a send buffer. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport() ); + + Impl::sendBuffer( halo, aosoa, send_buffer, periodic ); + Impl::recvBuffer( halo, aosoa, send_buffer ); +} + +template +void gather( const Halo_t &halo, AoSoA_t &aosoa, + typename std::enable_if<( is_halo::value && + is_aosoa::value ), + int>::type * = 0 ) +{ + Impl::checkSize( halo, aosoa ); + + // Allocate a send buffer. + Kokkos::View + send_buffer( + Kokkos::ViewAllocateWithoutInitializing( "halo_send_buffer" ), + halo.totalNumExport() ); + + Impl::sendBuffer( halo, aosoa, send_buffer ); + Impl::recvBuffer( halo, aosoa, send_buffer ); +} + //---------------------------------------------------------------------------// /*! \brief Synchronously gather data from the local decomposition to the ghosts @@ -351,8 +414,7 @@ void gather( const Halo_t &halo, Slice_t &slice, int>::type * = 0 ) { // Check that the Slice is the right size. - if ( slice.size() != halo.numLocal() + halo.numGhost() ) - throw std::runtime_error( "Slice is the wrong size for gather!" ); + Impl::checkSize( halo, slice ); // Get the number of components in the slice. std::size_t num_comp = 1; diff --git a/core/src/Cabana_ParticleGridCommunication.hpp b/core/src/Cabana_ParticleGridCommunication.hpp new file mode 100644 index 000000000..8656925b5 --- /dev/null +++ b/core/src/Cabana_ParticleGridCommunication.hpp @@ -0,0 +1,712 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#ifndef CABANA_PERIODICCOMM_HPP +#define CABANA_PERIODICCOMM_HPP + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include + +namespace Cabana +{ + +namespace Impl +{ +//---------------------------------------------------------------------------// +// Of the 27 potential local grids figure out which are in our topology. +// Some of the ranks in this list may be invalid. This needs to be updated +// after computing destination ranks to only contain valid ranks. +template +std::vector getTopology( const LocalGridType &local_grid ) +{ + std::vector topology( 27, -1 ); + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + topology[nr] = local_grid.neighborRank( i, j, k ); + return topology; +} + +//---------------------------------------------------------------------------// +// Make the topology a list of unique and valid ranks. They must all be valid, +// but not necessarily unique (enforced within CommunicationPlan). +std::vector getValidTopology( std::vector topology ) +{ + auto remove_end = std::remove( topology.begin(), topology.end(), -1 ); + std::sort( topology.begin(), remove_end ); + auto unique_end = std::unique( topology.begin(), remove_end ); + topology.resize( std::distance( topology.begin(), unique_end ) ); + return topology; +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +// Grid Distributor/migrate +//---------------------------------------------------------------------------// + +//---------------------------------------------------------------------------// +/*! +\brief Wrap particles through periodic bounds according to Cajita grid global +bounds. + +\tparam LocalGridType Cajita LocalGrid type. + +\tparam PositionSliceType Particle position type. + +\param local_grid The local grid containing periodicity and system bound +information. + +\param positions The particle position container, either Slice or View. +*/ +template +KOKKOS_INLINE_FUNCTION void periodicWrap( const LocalGridType &local_grid, + PositionSliceType &positions ) +{ + using execution_space = typename PositionSliceType::execution_space; + + const auto &global_grid = local_grid.globalGrid(); + const auto &global_mesh = global_grid.globalMesh(); + const Kokkos::Array periodic = { + global_grid.isPeriodic( Cajita::Dim::I ), + global_grid.isPeriodic( Cajita::Dim::J ), + global_grid.isPeriodic( Cajita::Dim::K )}; + const Kokkos::Array global_low = { + global_mesh.lowCorner( Cajita::Dim::I ), + global_mesh.lowCorner( Cajita::Dim::J ), + global_mesh.lowCorner( Cajita::Dim::K )}; + const Kokkos::Array global_high = { + global_mesh.highCorner( Cajita::Dim::I ), + global_mesh.highCorner( Cajita::Dim::J ), + global_mesh.highCorner( Cajita::Dim::K )}; + const Kokkos::Array global_extent = { + global_mesh.extent( Cajita::Dim::I ), + global_mesh.extent( Cajita::Dim::J ), + global_mesh.extent( Cajita::Dim::K )}; + Kokkos::parallel_for( + "periodic_wrap", + Kokkos::RangePolicy( 0, positions.size() ), + KOKKOS_LAMBDA( const int p ) { + for ( int d = 0; d < 3; ++d ) + { + if ( periodic[d] ) + { + if ( positions( p, d ) > global_high[d] ) + positions( p, d ) -= global_extent[d]; + else if ( positions( p, d ) < global_low[d] ) + positions( p, d ) += global_extent[d]; + } + } + } ); +} + +//---------------------------------------------------------------------------// +/*! +\brief Check for the number of particles that must be communicated + +\tparam LocalGridType Cajita LocalGrid type. + +\tparam PositionSliceType Particle position type. + +\param local_grid The local grid containing periodicity and system bound +information. + +\param positions The particle position container, either Slice or View. + +\param minimum_halo_width Number of halo mesh widths to include for ghosting. +*/ +template +int migrateCount( const LocalGridType &local_grid, + const PositionSliceType &positions, + const int minimum_halo_width ) +{ + using execution_space = typename PositionSliceType::execution_space; + + // Check within the halo width, within the ghosted domain. + auto local_mesh = Cajita::createLocalMesh( local_grid ); + auto dx = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = local_grid.globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + const Kokkos::Array local_low = { + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::I ) + + minimum_halo_width * dx, + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::J ) + + minimum_halo_width * dy, + local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::K ) + + minimum_halo_width * dz}; + const Kokkos::Array local_high = { + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::I ) - + minimum_halo_width * dx, + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::J ) - + minimum_halo_width * dy, + local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::K ) - + minimum_halo_width * dz}; + int comm_count = 0; + Kokkos::parallel_reduce( + "redistribute_count", + Kokkos::RangePolicy( 0, positions.size() ), + KOKKOS_LAMBDA( const int p, int &result ) { + if ( positions( p, Cajita::Dim::I ) < local_low[Cajita::Dim::I] || + positions( p, Cajita::Dim::I ) > local_high[Cajita::Dim::I] || + positions( p, Cajita::Dim::J ) < local_low[Cajita::Dim::J] || + positions( p, Cajita::Dim::J ) > local_high[Cajita::Dim::J] || + positions( p, Cajita::Dim::K ) < local_low[Cajita::Dim::K] || + positions( p, Cajita::Dim::K ) > local_high[Cajita::Dim::K] ) + result += 1; + }, + comm_count ); + + MPI_Allreduce( MPI_IN_PLACE, &comm_count, 1, MPI_INT, MPI_SUM, + local_grid.globalGrid().comm() ); + + return comm_count; +} + +namespace Impl +{ +//---------------------------------------------------------------------------// +// Locate the particles in the local grid and get their destination rank. +// Particles are assumed to only migrate to a location in the 26 neighbor halo +// or stay on this rank. If the particle crosses a global periodic boundary, +// wrap it's coordinates back into the domain. +template +void getMigrateDestinations( const LocalGridType &local_grid, + const NeighborRankView &neighbor_ranks, + DestinationRankView &destinations, + const PositionSliceType &positions ) +{ + using execution_space = typename PositionSliceType::execution_space; + + auto local_mesh = Cajita::createLocalMesh( local_grid ); + + // Check within the local domain. + const Kokkos::Array local_low = { + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::I ), + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::J ), + local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::K )}; + const Kokkos::Array local_high = { + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::I ), + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::J ), + local_mesh.highCorner( Cajita::Own(), Cajita::Dim::K )}; + + Kokkos::parallel_for( + "get_migrate_destinations", + Kokkos::RangePolicy( 0, positions.size() ), + KOKKOS_LAMBDA( const int p ) { + // Compute the logical index of the neighbor we are sending to. + int nid[3] = {1, 1, 1}; + for ( int d = 0; d < 3; ++d ) + { + if ( positions( p, d ) < local_low[d] ) + nid[d] = 0; + else if ( positions( p, d ) > local_high[d] ) + nid[d] = 2; + } + + // Compute the destination MPI rank. + destinations( p ) = neighbor_ranks( + nid[Cajita::Dim::I] + + 3 * ( nid[Cajita::Dim::J] + 3 * nid[Cajita::Dim::K] ) ); + } ); + // TODO: fuse kernels + periodicWrap( local_grid, positions ); +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief gridDistributor determines which data should be migrated from one + uniquely-owned decomposition to another uniquely-owned decomposition, using + bounds of a Cajita grid and taking periodic boundaries into account. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionContainer AoSoA type. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param positions The particle positions. + + \return Distributor for later migration. +*/ +template +Distributor +gridDistributor( const LocalGridType &local_grid, PositionSliceType &positions ) +{ + using device_type = typename PositionSliceType::device_type; + + // Get all 26 neighbor ranks. + auto topology = Impl::getTopology( local_grid ); + + Kokkos::View + neighbor_ranks( topology.data(), topology.size() ); + auto nr_mirror = + Kokkos::create_mirror_view_and_copy( device_type(), neighbor_ranks ); + Kokkos::View destinations( + Kokkos::ViewAllocateWithoutInitializing( "destinations" ), + positions.size() ); + + // Determine destination ranks for all particles and wrap positions across + // periodic boundaries. + Impl::getMigrateDestinations( local_grid, nr_mirror, destinations, + positions ); + + // Ensure neighbor ranks are valid and unique. + auto valid_topology = Impl::getValidTopology( topology ); + + // Create the Cabana distributor. + Distributor distributor( local_grid.globalGrid().comm(), + destinations, valid_topology ); + return distributor; +} + +//---------------------------------------------------------------------------// +/*! + \brief gridMigrate migrates data from one uniquely-owned decomposition to + another uniquely-owned decomposition, using the bounds and periodic boundaries + of a Cajita grid to determine which particles should be moved. In-place + variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionContainer AoSoA type. + + \tparam PositionIndex Particle position index within the AoSoA. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param particles The particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param min_halo_width Number of halo mesh widths to allow particles before + migrating. + + \param force_migrate Migrate particles outside the local domain regardless of + ghosted halo. +*/ +template +void gridMigrate( const LocalGridType &local_grid, ParticleContainer &particles, + std::integral_constant, + const int min_halo_width, const bool force_migrate = false ) +{ + // Get the positions. + auto positions = slice( particles ); + + // When false, this option checks that any particles are nearly outside the + // ghosted halo region (outside the min_halo_width) before initiating + // migration. Otherwise, anything outside the local domain is migrated + // regardless of position in the halo. + if ( !force_migrate ) + { + // Check to see if we need to communicate. + auto comm_count = migrateCount( local_grid, positions, min_halo_width ); + + // If we have no particles near the ghosted boundary, then exit. + if ( 0 == comm_count ) + return; + } + + auto distributor = gridDistributor( local_grid, positions ); + + // Redistribute the particles. + migrate( distributor, particles ); +} + +//---------------------------------------------------------------------------// +/*! + \brief gridMigrate migrates data from one uniquely-owned decomposition to + another uniquely-owned decomposition, using the bounds and periodic boundaries + of a Cajita grid to determine which particles should be moved. Separate AoSoA + variant. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam ParticleContainer AoSoA type. + + \tparam PositionIndex Particle position index within the AoSoA. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param src_particles The source particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param src_particles The destination particle AoSoA, containing positions. + + \param min_halo_width Number of halo mesh widths to allow particles before + migrating. + + \param force_migrate Migrate particles outside the local domain regardless of + ghosted halo. +*/ +template +void gridMigrate( const LocalGridType &local_grid, + ParticleContainer &src_particles, + std::integral_constant, + ParticleContainer &dst_particles, const int min_halo_width, + const bool force_migrate = false ) +{ + // Get the positions. + auto positions = slice( src_particles ); + + // When false, this option checks that any particles are nearly outside the + // ghosted halo region (outside the min_halo_width) before initiating + // migration. Otherwise, anything outside the local domain is migrated + // regardless of position in the halo. + if ( !force_migrate ) + { + // Check to see if we need to communicate. + auto comm_count = migrateCount( local_grid, positions, min_halo_width ); + + // If we have no particles near the ghosted boundary, copy, then exit. + if ( 0 == comm_count ) + { + Cabana::deep_copy( dst_particles, src_particles ); + return; + } + } + + auto distributor = gridDistributor( local_grid, positions ); + + // Resize as needed. + dst_particles.resize( distributor.totalNumImport() ); + + // Redistribute the particles. + migrate( distributor, src_particles, dst_particles ); +} + +//---------------------------------------------------------------------------// +// Grid Halo/gather +//---------------------------------------------------------------------------// + +namespace Impl +{ +//---------------------------------------------------------------------------// +// Locate particles within the local grid and determine if any from this rank +// need to be ghosted to one (or more) of the 26 neighbor ranks, keeping track +// of destination rank, index in the container, and periodic shift needed (but +// not yet applied). +template +void getHaloIds( const LocalGridType &local_grid, CountView &send_count, + DestinationRankView &destinations, DestinationRankView &ids, + ShiftRankView &shifts, const PositionSliceType &positions, + const int minimum_halo_width ) +{ + using execution_space = typename PositionSliceType::execution_space; + + // Check within the halo width, within the local domain. + const auto &global_grid = local_grid.globalGrid(); + const Kokkos::Array periodic = { + global_grid.isPeriodic( Cajita::Dim::I ), + global_grid.isPeriodic( Cajita::Dim::J ), + global_grid.isPeriodic( Cajita::Dim::K )}; + const Kokkos::Array low_edge = { + global_grid.dimNumBlock( Cajita::Dim::I ) == 0, + global_grid.dimNumBlock( Cajita::Dim::J ) == 0, + global_grid.dimNumBlock( Cajita::Dim::K ) == 0}; + const Kokkos::Array high_edge = { + global_grid.dimNumBlock( Cajita::Dim::I ) - 1 == + global_grid.dimBlockId( Cajita::Dim::I ), + global_grid.dimNumBlock( Cajita::Dim::J ) - 1 == + global_grid.dimBlockId( Cajita::Dim::J ), + global_grid.dimNumBlock( Cajita::Dim::K ) - 1 == + global_grid.dimBlockId( Cajita::Dim::K )}; + const auto &global_mesh = global_grid.globalMesh(); + const Kokkos::Array global_extent = { + global_mesh.extent( Cajita::Dim::I ), + global_mesh.extent( Cajita::Dim::J ), + global_mesh.extent( Cajita::Dim::K )}; + auto local_mesh = Cajita::createLocalMesh( local_grid ); + + // TODO: rework with sharedIndexSpace + auto halo_ids_func = KOKKOS_LAMBDA( const int p ) + { + Kokkos::Array pos = {positions( p, Cajita::Dim::I ), + positions( p, Cajita::Dim::J ), + positions( p, Cajita::Dim::K )}; + + // Add a ghost if this particle is near the local boundary, potentially + // for each of the 26 neighbors cells. + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + { + // Check the if particle is both in the owned space and the + // ghosted space of this neighbor. + auto sis = local_grid.sharedIndexSpace( + Cajita::Own(), Cajita::Node(), i, j, k, + minimum_halo_width ); + Kokkos::Array min_coord; + Kokkos::Array max_coord; + const int min_ind_i = sis.min( Cajita::Dim::I ); + const int min_ind_j = sis.min( Cajita::Dim::J ); + const int min_ind_k = sis.min( Cajita::Dim::K ); + Kokkos::Array min_ind = {min_ind_i, min_ind_j, + min_ind_k}; + const int max_ind_i = sis.max( Cajita::Dim::I ); + const int max_ind_j = sis.max( Cajita::Dim::J ); + const int max_ind_k = sis.max( Cajita::Dim::K ); + Kokkos::Array max_ind = {max_ind_i, max_ind_j, + max_ind_k}; + local_mesh.coordinates( Cajita::Cell(), min_ind.data(), + min_coord.data() ); + local_mesh.coordinates( Cajita::Cell(), max_ind.data(), + max_coord.data() ); + if ( ( pos[Cajita::Dim::I] > min_coord[Cajita::Dim::I] && + pos[Cajita::Dim::I] < max_coord[Cajita::Dim::I] ) && + ( pos[Cajita::Dim::J] > min_coord[Cajita::Dim::J] && + pos[Cajita::Dim::J] < max_coord[Cajita::Dim::J] ) && + ( pos[Cajita::Dim::K] > min_coord[Cajita::Dim::K] && + pos[Cajita::Dim::K] < max_coord[Cajita::Dim::K] ) ) + { + const std::size_t sc = send_count()++; + // If the size of the arrays is exceeded, keep + // counting to resize and fill next. + if ( sc < destinations.extent( 0 ) ) + { + // Keep the destination MPI rank. + destinations( sc ) = + local_grid.neighborRank( i, j, k ); + // Keep the particle ID. + ids( sc ) = p; + // Determine if this ghost particle needs to be + // shifted through the periodic boundary. + for ( int d = 0; d < 3; ++d ) + { + if ( periodic[d] ) + { + if ( high_edge[d] ) + shifts( sc, d ) -= global_extent[d]; + else if ( low_edge[d] ) + shifts( sc, d ) += global_extent[d]; + } + } + } + } + } + }; + + auto policy = Kokkos::RangePolicy( 0, positions.size() ); + Kokkos::parallel_for( "get_halo_ids", policy, halo_ids_func ); + + // Shift periodic coordinates in send buffers. +} + +} // namespace Impl + +//---------------------------------------------------------------------------// +/*! + \brief gridHalo determines which data should be ghosted on another + decomposition, using bounds of a Cajita grid and taking periodic boundaries + into account. + + \tparam LocalGridType Cajita LocalGrid type. + + \tparam PositionSliceType Slice/View type. + + \param local_grid The local grid containing periodicity and system bound + information. + + \param positions The particle positions. + + \param min_halo_width Number of halo mesh widths to include for ghosting. + + \param max_export_guess The allocation size for halo export ranks, IDs, and + periodic shifts + + \return pair A std::pair containing the Halo and periodic shift array for + a later gather. +*/ +template +auto gridHalo( const LocalGridType &local_grid, PositionSliceType &positions, + const int min_halo_width, const int max_export_guess = 0 ) +{ + using device_type = typename PositionSliceType::device_type; + using pos_value = typename PositionSliceType::value_type; + + // Get all 26 neighbor ranks. + auto topology = Impl::getTopology( local_grid ); + + Kokkos::View destinations( + Kokkos::ViewAllocateWithoutInitializing( "destinations" ), + max_export_guess ); + Kokkos::View ids( + Kokkos::ViewAllocateWithoutInitializing( "ids" ), max_export_guess ); + Kokkos::View shifts( + Kokkos::ViewAllocateWithoutInitializing( "shifts" ), max_export_guess, + 3 ); + Kokkos::View> + send_count( "halo_send_count" ); + + // Determine which particles need to be ghosted to neighbors. + Impl::getHaloIds( local_grid, send_count, destinations, ids, shifts, + positions, min_halo_width ); + + // Resize views to actual send sizes. + int dest_size = destinations.extent( 0 ); + int dest_count = 0; + Kokkos::deep_copy( dest_count, send_count ); + if ( dest_count != dest_size ) + { + Kokkos::resize( destinations, dest_count ); + Kokkos::resize( ids, dest_count ); + Kokkos::resize( shifts, dest_count, 3 ); + } + // If original view sizes were exceeded, only counting was done so we + // need to rerun. + if ( dest_count > dest_size ) + { + Kokkos::deep_copy( send_count, 0 ); + Impl::getHaloIds( local_grid, send_count, destinations, ids, shifts, + positions, min_halo_width ); + } + + // Ensure neighbor ranks are valid and unique. + auto valid_topology = Impl::getValidTopology( topology ); + + // Create the Cabana Halo. + Halo halo( local_grid.globalGrid().comm(), positions.size(), + ids, destinations, valid_topology ); + return std::make_pair( halo, shifts ); +} + +//---------------------------------------------------------------------------// +/*! + \class PeriodicHalo + + \brief Store information for periodic halo communication. +*/ +template +struct PeriodicHalo +{ + using TupleType = typename ParticleContainer::tuple_type; + + std::shared_ptr _halo; + ShiftViewType _shifts; + + /*! + \brief Constructor. + + \param pair Pair of inputs containing Halo and periodic shift View. + This pair is returned by the gridHalo function. + + \param PositionIndex Particle position index within the AoSoA. + */ + PeriodicHalo( std::pair pair, + std::integral_constant ) + : _halo( std::make_shared( pair.first ) ) + , _shifts( pair.second ) + { + } + ~PeriodicHalo(){}; + + auto getHalo() const { return _halo; } + + template + KOKKOS_INLINE_FUNCTION void modify_buffer( ViewType &send_buffer, + const int i ) const + { + for ( int d = 0; d < 3; ++d ) + get( send_buffer( i ), d ) += _shifts( i, d ); + } +}; + +//---------------------------------------------------------------------------// +/*! + \brief Create a periodic halo. + + \param local_grid The local grid for creating halo and periodicity. + + \param particles The particle AoSoA, containing positions. + + \param PositionIndex Particle position index within the AoSoA. + + \param min_halo_width Number of halo mesh widths to include for ghosting. + + \param max_export_guess The allocation size for halo export ranks, IDs, and + periodic shifts. + + \return pair A std::pair containing the Halo and periodic shift array. +*/ +template +auto createPeriodicHalo( const LocalGridType &local_grid, + const ParticleContainer &particles, + std::integral_constant, + const int min_halo_width, + const int max_export_guess = 0 ) +{ + using view_type = + Kokkos::View; + using halo_type = Halo; + + auto positions = slice( particles ); + std::pair pair = + gridHalo( local_grid, positions, min_halo_width, max_export_guess ); + + using phalo_type = + PeriodicHalo; + return std::make_shared( phalo_type( + pair, std::integral_constant() ) ); +} + +//---------------------------------------------------------------------------// +/*! + \brief gridGather gathers data from one decomposition and ghosts on + another decomposition, using the bounds and periodicity of a Cajita grid + to determine which particles should be copied. AoSoA variant. + + \tparam PeriodicHaloType Periodic halo type. + + \tparam ParticleContainer AoSoA type. + + \param periodic_halo The halo and periodicity shift details. + + \param particles The particle AoSoA, containing positions. +*/ +template +void gridGather( const PeriodicHaloType &periodic_halo, + ParticleContainer &particles ) +{ + auto halo = periodic_halo.getHalo(); + particles.resize( halo->numLocal() + halo->numGhost() ); + + gather( *halo, particles, periodic_halo ); +} + +// TODO: slice version + +} // end namespace Cabana + +#endif // end CABANA_PERIODICCOMM_HPP diff --git a/core/unit_test/CMakeLists.txt b/core/unit_test/CMakeLists.txt index 1fc043d05..fe2c9a711 100644 --- a/core/unit_test/CMakeLists.txt +++ b/core/unit_test/CMakeLists.txt @@ -59,6 +59,9 @@ macro(Cabana_add_tests) target_include_directories(${_target} PRIVATE ${_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(${_target} cabanacore cabana_core_gtest) + if(_test STREQUAL ParticleGridCommunication) + target_link_libraries(${_target} Cajita) + endif() if(CABANA_UNIT_TEST_MPI) foreach(_np ${CABANA_UNIT_TEST_MPIEXEC_NUMPROCS}) # NOTE: When moving to CMake 3.10+ make sure to use MPIEXEC_EXECUTABLE instead @@ -86,4 +89,7 @@ if(Cabana_ENABLE_ARBORX) endif() if(Cabana_ENABLE_MPI) Cabana_add_tests(MPI NAMES CommunicationPlan Distributor Halo) + if(Cabana_ENABLE_CAJITA) + Cabana_add_tests(MPI NAMES ParticleGridCommunication) + endif() endif() diff --git a/core/unit_test/tstParticleGridCommunication.hpp b/core/unit_test/tstParticleGridCommunication.hpp new file mode 100644 index 000000000..02c58aac0 --- /dev/null +++ b/core/unit_test/tstParticleGridCommunication.hpp @@ -0,0 +1,387 @@ +/**************************************************************************** + * Copyright (c) 2018-2020 by the Cabana authors * + * All rights reserved. * + * * + * This file is part of the Cabana library. Cabana is distributed under a * + * BSD 3-clause license. For the licensing terms see the LICENSE file in * + * the top-level directory. * + * * + * SPDX-License-Identifier: BSD-3-Clause * + ****************************************************************************/ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include +#include +#include + +namespace Test +{ + +//---------------------------------------------------------------------------// +void testMigrate( const int halo_width, const int test_halo_width, + const bool force_comm, const int test_type ) +{ + // Create the MPI partitions. + Cajita::UniformDimPartitioner partitioner; + + // Create the global MPI decomposition mesh. + std::array lo_corner = {-3.2, -0.5, 2.2}; + std::array hi_corner = {-0.2, 5.9, 4.2}; + std::array num_cell = {10, 10, 10}; + auto global_mesh = + Cajita::createUniformGlobalMesh( lo_corner, hi_corner, num_cell ); + + // Create the global grid. + std::array is_periodic = {true, true, true}; + auto global_grid = Cajita::createGlobalGrid( MPI_COMM_WORLD, global_mesh, + is_periodic, partitioner ); + + // Create a grid local_grid with large enough halo for test loops below. + auto local_grid = Cajita::createLocalGrid( global_grid, 4 ); + auto local_mesh = Cajita::createLocalMesh( *local_grid ); + + // Make some data to migrate, one for each neighbor (and one in the + // center). + int num_data = 27; + const int pos_index = 1; + using DataTypes = Cabana::MemberTypes; + Cabana::AoSoA data_host( "host", num_data ); + auto id = Cabana::slice<0>( data_host ); + auto pos = Cabana::slice( data_host ); + + // Get mesh info. + auto dx = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + auto lo_x = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::I ); + auto lo_y = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::J ); + auto lo_z = local_mesh.lowCorner( Cajita::Ghost(), Cajita::Dim::K ); + auto hi_x = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::I ); + auto hi_y = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::J ); + auto hi_z = local_mesh.highCorner( Cajita::Ghost(), Cajita::Dim::K ); + auto width_x = local_mesh.extent( Cajita::Ghost(), Cajita::Dim::I ) / 2.0; + auto width_y = local_mesh.extent( Cajita::Ghost(), Cajita::Dim::J ) / 2.0; + auto width_z = local_mesh.extent( Cajita::Ghost(), Cajita::Dim::K ) / 2.0; + auto center_x = width_x + lo_x; + auto center_y = width_y + lo_y; + auto center_z = width_z + lo_z; + auto shift_x = width_x - ( halo_width - 0.1 ) * dx; + auto shift_y = width_y - ( halo_width - 0.1 ) * dy; + auto shift_z = width_z - ( halo_width - 0.1 ) * dz; + + // Fill the data. Add particles outside the local domain in each direction + // and one in the center (that should never move). + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + { + id( nr ) = nr; + pos( nr, 0 ) = center_x + i * shift_x; + pos( nr, 1 ) = center_y + j * shift_y; + pos( nr, 2 ) = center_z + k * shift_z; + } + + // Add a particle on rank zero to force some resizing for sends. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + if ( my_rank == 0 ) + { + num_data++; + data_host.resize( num_data ); + auto pos = Cabana::slice( data_host ); + pos( num_data - 1, 0 ) = center_x + shift_x; + pos( num_data - 1, 1 ) = center_y + shift_y; + pos( num_data - 1, 2 ) = center_z + shift_z; + } + + Cabana::AoSoA initial( "initial", num_data ); + Cabana::deep_copy( initial, data_host ); + auto pos_initial = Cabana::slice( initial ); + + // Copy to the device. + Cabana::AoSoA data_src( "data_src", num_data ); + Cabana::deep_copy( data_src, data_host ); + + // Do the migration in-place. + if ( test_type == 0 ) + { + Cabana::gridMigrate( *local_grid, data_src, + std::integral_constant(), + test_halo_width, force_comm ); + + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + // Do the migration with a separate destination AoSoA. + else if ( test_type == 1 ) + { + Cabana::AoSoA data_dst( "data_dst", + num_data ); + Cabana::gridMigrate( *local_grid, data_src, + std::integral_constant(), + data_dst, test_halo_width, force_comm ); + + data_host.resize( data_dst.size() ); + Cabana::deep_copy( data_host, data_dst ); + } + // Do the migration with separate slices (need to use gridDistributor + // directly since slices can't be resized). + else if ( test_type == 2 ) + { + auto pos_src = Cabana::slice( data_src ); + int comm_count = 0; + if ( !force_comm ) + { + // Check to see if we need to communicate. + comm_count = migrateCount( *local_grid, pos_src, test_halo_width ); + } + + if ( force_comm || comm_count > 0 ) + { + auto distributor = Cabana::gridDistributor( *local_grid, pos_src ); + Cabana::AoSoA data_dst( + "data_dst", distributor.totalNumImport() ); + auto pos_dst = Cabana::slice( data_dst ); + Cabana::migrate( distributor, pos_src, pos_dst ); + + data_host.resize( data_dst.size() ); + Cabana::deep_copy( data_host, data_dst ); + } + else + { + data_host.resize( data_src.size() ); + Cabana::deep_copy( data_host, data_src ); + } + } + + // Check the results. + int new_num_data = data_host.size(); + auto pos_host = Cabana::slice( data_host ); + + for ( int i = 0; i < new_num_data; ++i ) + { + // Make sure particles haven't moved if within allowable halo mesh and + // migrate is not being forced. + if ( !force_comm && test_halo_width < halo_width ) + { + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::I ), + pos_initial( i, Cajita::Dim::I ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::J ), + pos_initial( i, Cajita::Dim::J ) ); + EXPECT_DOUBLE_EQ( pos_host( i, Cajita::Dim::K ), + pos_initial( i, Cajita::Dim::K ) ); + } + else + { + // Make sure everything was wrapped into the global domain. + EXPECT_LE( pos_host( i, Cajita::Dim::I ), + global_mesh->highCorner( Cajita::Dim::I ) ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), + global_mesh->highCorner( Cajita::Dim::J ) ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), + global_mesh->highCorner( Cajita::Dim::K ) ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), + global_mesh->lowCorner( Cajita::Dim::I ) ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), + global_mesh->lowCorner( Cajita::Dim::J ) ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), + global_mesh->lowCorner( Cajita::Dim::K ) ); + + // Make sure everything was wrapped into the local domain. + EXPECT_LE( pos_host( i, Cajita::Dim::I ), hi_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), hi_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), hi_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), lo_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), lo_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), lo_z ); + } + } + + // Make sure the particles are still unique. + int final_total = 0; + int initial_total = 0; + MPI_Allreduce( &new_num_data, &final_total, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD ); + MPI_Allreduce( &num_data, &initial_total, 1, MPI_INT, MPI_SUM, + MPI_COMM_WORLD ); + EXPECT_EQ( final_total, initial_total ); +} + +//---------------------------------------------------------------------------// +void testHalo( const int halo_width, const int test_type ) +{ + // Create the MPI partitions. + Cajita::UniformDimPartitioner partitioner; + + // Create the global MPI decomposition mesh. + std::array lo_corner = {0, 0, 0}; + std::array hi_corner = {1, 1, 1}; + std::array num_cell = {10, 10, 10}; + auto global_mesh = + Cajita::createUniformGlobalMesh( lo_corner, hi_corner, num_cell ); + + // Create the global grid. + std::array is_periodic = {true, true, true}; + auto global_grid = Cajita::createGlobalGrid( MPI_COMM_WORLD, global_mesh, + is_periodic, partitioner ); + + // Create a grid local_grid. + auto local_grid = Cajita::createLocalGrid( global_grid, 4 ); + auto local_mesh = Cajita::createLocalMesh( *local_grid ); + + // Make some data to migrate, one for each rank neighbor. This tests + // only for neighbors within one ghost shell. + int num_data = 27; + const int pos_index = 1; + using DataTypes = Cabana::MemberTypes; + Cabana::AoSoA data_host( "host", num_data ); + auto id = Cabana::slice<0>( data_host ); + auto pos = Cabana::slice( data_host ); + + // Get mesh info. + auto dx = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::I ); + auto dy = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::J ); + auto dz = local_grid->globalGrid().globalMesh().cellSize( Cajita::Dim::K ); + auto lo_x = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::I ); + auto lo_y = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::J ); + auto lo_z = local_mesh.lowCorner( Cajita::Own(), Cajita::Dim::K ); + auto hi_x = local_mesh.highCorner( Cajita::Own(), Cajita::Dim::I ); + auto hi_y = local_mesh.highCorner( Cajita::Own(), Cajita::Dim::J ); + auto hi_z = local_mesh.highCorner( Cajita::Own(), Cajita::Dim::K ); + auto width_x = local_mesh.extent( Cajita::Own(), Cajita::Dim::I ) / 2.0; + auto width_y = local_mesh.extent( Cajita::Own(), Cajita::Dim::J ) / 2.0; + auto width_z = local_mesh.extent( Cajita::Own(), Cajita::Dim::K ) / 2.0; + auto center_x = width_x + lo_x; + auto center_y = width_y + lo_y; + auto center_z = width_z + lo_z; + auto shift_x = width_x - ( halo_width - 0.1 ) * dx; + auto shift_y = width_y - ( halo_width - 0.1 ) * dy; + auto shift_z = width_z - ( halo_width - 0.1 ) * dz; + + // Fill the data. Add particles near the local domain boundary in each + // direction. + int nr = 0; + for ( int k = -1; k < 2; ++k ) + for ( int j = -1; j < 2; ++j ) + for ( int i = -1; i < 2; ++i, ++nr ) + { + id( nr ) = nr; + pos( nr, 0 ) = center_x + i * shift_x; + pos( nr, 1 ) = center_y + j * shift_y; + pos( nr, 2 ) = center_z + k * shift_z; + } + + // Add a particle on rank zero to force some resizing for sends. + int my_rank = -1; + MPI_Comm_rank( MPI_COMM_WORLD, &my_rank ); + if ( my_rank == 0 ) + { + num_data++; + data_host.resize( num_data ); + auto pos = Cabana::slice( data_host ); + pos( num_data - 1, 0 ) = center_x + shift_x; + pos( num_data - 1, 1 ) = center_y + shift_y; + pos( num_data - 1, 2 ) = center_z + shift_z; + } + + Cabana::AoSoA initial( "initial", num_data ); + Cabana::deep_copy( initial, data_host ); + auto pos_initial = Cabana::slice( initial ); + + // Copy to the device. + Cabana::AoSoA data_src( "data_src", num_data ); + Cabana::deep_copy( data_src, data_host ); + + if ( test_type == 0 ) + { + // Do the gather with the AoSoA. + auto periodic_halo = createPeriodicHalo( + *local_grid, data_src, + std::integral_constant(), halo_width ); + gridGather( *periodic_halo, data_src ); + Cabana::deep_copy( data_host, data_src ); + } + + // Check the results. + int new_num_data = data_host.size(); + auto pos_host = Cabana::slice( data_host ); + + for ( int i = 0; i < num_data; ++i ) + { + // Make sure the original particles are unchanged. + for ( int d = 0; d < 3; ++d ) + EXPECT_FLOAT_EQ( pos_host( i, d ), pos_initial( i, d ) ); + } + + for ( int i = num_data; i < new_num_data; ++i ) + { + // Make sure all ghosts are within halo region. + EXPECT_GE( pos_host( i, Cajita::Dim::I ), hi_x + shift_x ); + EXPECT_LE( pos_host( i, Cajita::Dim::J ), hi_y + shift_y ); + EXPECT_LE( pos_host( i, Cajita::Dim::K ), hi_z + shift_z ); + EXPECT_GE( pos_host( i, Cajita::Dim::I ), lo_x - shift_x ); + EXPECT_GE( pos_host( i, Cajita::Dim::J ), lo_y - shift_y ); + EXPECT_GE( pos_host( i, Cajita::Dim::K ), lo_z - shift_z ); + } + + // TODO: more checks... +} + +//---------------------------------------------------------------------------// +// RUN TESTS +//---------------------------------------------------------------------------// + +TEST( TEST_CATEGORY, periodic_test_migrate_inplace ) +{ + // Call with varied halo data and halo width, without forced communication + for ( int i = 1; i < 4; i++ ) + for ( int j = 0; j < 4; j++ ) + testMigrate( i, j, false, 0 ); + + // Call with forced communication + testMigrate( 1, 1, true, 0 ); +} + +TEST( TEST_CATEGORY, periodic_test_migrate_separate ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testMigrate( i, j, false, 1 ); + + testMigrate( 1, 1, true, 1 ); +} + +TEST( TEST_CATEGORY, periodic_test_migrate_slice ) +{ + for ( int i = 1; i < 4; i++ ) + for ( int j = 1; j < 4; j++ ) + testMigrate( i, j, false, 2 ); + + testMigrate( 1, 1, true, 2 ); +} + +TEST( TEST_CATEGORY, periodic_test_gather_inplace ) +{ + for ( int i = 1; i < 2; i++ ) + testHalo( i, 0 ); +} +//---------------------------------------------------------------------------// + +} // end namespace Test