diff --git a/.github/workflows/AT2.yml b/.github/workflows/AT2.yml index 01937667d612..b232051eddf2 100644 --- a/.github/workflows/AT2.yml +++ b/.github/workflows/AT2.yml @@ -6,8 +6,8 @@ on: - opened - synchronize branches: - - master - - develop + - master + - develop workflow_dispatch: # Cancels any in progress 'workflows' associated with this PR diff --git a/.github/workflows/spack.yml b/.github/workflows/spack.yml index c722c1287ed1..59976c1d9b3e 100644 --- a/.github/workflows/spack.yml +++ b/.github/workflows/spack.yml @@ -4,9 +4,9 @@ on: types: - opened - synchronize - branches: - - master - - develop + branches: + - master + - develop workflow_dispatch: # Cancels any in progress 'workflow' associated with this PR diff --git a/packages/PyTrilinos2/CMakeLists.txt b/packages/PyTrilinos2/CMakeLists.txt index d912b7efe078..3813a95d5cf0 100644 --- a/packages/PyTrilinos2/CMakeLists.txt +++ b/packages/PyTrilinos2/CMakeLists.txt @@ -29,7 +29,7 @@ PYTRILINOS2_CMAKE_ERROR TRIBITS_ADD_OPTION_AND_DEFINE(PyTrilinos2_BINDER_VERBOSE PYTRILINOS2_B_VERBOSE "Increase the verbosity of binder." - OFF ) + OFF ) SET(PyTrilinos2_BINDER_NUM_FILES "100" CACHE STRING "Maxinum number of generated files by binder.") @@ -184,7 +184,7 @@ FOREACH(line IN LISTS eti_files_without_dir) ENDFOREACH(line) file(WRITE ${all_ETI_files_list} ${CONTENTS}) -SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector") +SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector;Tpetra_FEMultiVector;Tpetra_FECrsMatrix") SET(CONTENTS "") FOREACH(line IN LISTS ETI_classes) SET(CONTENTS "${CONTENTS}${line}\n") @@ -229,7 +229,7 @@ IF(PYTRILINOS2_B_VERBOSE) ENDIF() IF(PYTRILINOS2_SUPPRESS_ERRORS) list(APPEND BINDER_OPTIONS --suppress-errors) -ENDIF() +ENDIF() list(APPEND BINDER_OPTIONS --config ${CMAKE_CURRENT_SOURCE_DIR}/scripts/PyTrilinos2_config.cfg) list(APPEND BINDER_OPTIONS --) IF(TPL_ENABLE_CUDA) @@ -241,6 +241,14 @@ if (NOT(MPI_BASE_DIR STREQUAL "")) list(APPEND BINDER_OPTIONS -I${MPI_BASE_DIR}/include) ENDIF() list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/mdspan) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/View/MDSpan) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p0009_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p1684_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2389_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2630_bits) +list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2642_bits) list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/src) list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_SOURCE_DIR}/src) IF(NOT DEFINED PyTrilinos2_BINDER_GCC_TOOLCHAIN) diff --git a/packages/framework/ini-files/config-specs.ini b/packages/framework/ini-files/config-specs.ini index 2c0ce84d57bf..de052bca3530 100644 --- a/packages/framework/ini-files/config-specs.ini +++ b/packages/framework/ini-files/config-specs.ini @@ -1248,6 +1248,7 @@ opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF [CUDA11-RUN-SERIAL-TESTS] +opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp index 5e8378c027cb..ed918212c44d 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp @@ -165,6 +165,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix // kk_handle_type;//test Teuchos::RCP KernelHandle_; + Teuchos::RCP L_Sptrsv_KernelHandle_; + Teuchos::RCP U_Sptrsv_KernelHandle_; //@} @@ -336,6 +338,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix //! The inverse of the diagonal Teuchos::RCP D_block_inverse_; + + Kokkos::View tmp_; }; diff --git a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp index f68d8d96a793..592d4dcfafd2 100644 --- a/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp +++ b/packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp @@ -18,7 +18,7 @@ #include "Ifpack2_LocalFilter.hpp" #include "Ifpack2_Utilities.hpp" #include "Ifpack2_RILUK.hpp" -#include "KokkosSparse_trsv.hpp" +#include "KokkosSparse_sptrsv.hpp" //#define IFPACK2_RBILUK_INITIAL //#define IFPACK2_RBILUK_INITIAL_NOKK @@ -194,6 +194,11 @@ void RBILUK::allocate_L_and_U_blocks () U_block_->setAllToScalar (STM::zero ()); D_block_->setAllToScalar (STM::zero ()); + // Allocate temp space for apply + if (this->isKokkosKernelsSpiluk_) { + const auto numRows = L_block_->getLocalNumRows(); + tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_); + } } this->isAllocated_ = true; } @@ -322,12 +327,21 @@ void RBILUK::initialize () if (this->isKokkosKernelsSpiluk_) { this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + const auto numRows = this->A_local_->getLocalNumRows(); KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1, - this->A_local_->getLocalNumRows(), + numRows, 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), 2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1), blockSize_); this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic + + this->L_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + this->U_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ()); + + KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1; + + this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_); + this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_); } else { this->Graph_->initialize (); @@ -914,6 +928,10 @@ void RBILUK::compute () KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_, A_local_rowmap, A_local_entries, A_local_values, L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values ); + + // Now call symbolic for sptrsvs + KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values); + KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values); } } // Stop timing @@ -1070,7 +1088,7 @@ apply (const Tpetra::MultiVectorgetCrsGraph().getLocalRowPtrsHost(); - auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost(); - auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost(); - auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost(); - auto L_values_host = L_block_->getValuesHost(); - auto U_values_host = U_block_->getValuesHost(); - - row_map_type* L_row_ptrs_host_ri = reinterpret_cast(&L_row_ptrs_host); - index_type* L_entries_host_ri = reinterpret_cast(&L_entries_host); - row_map_type* U_row_ptrs_host_ri = reinterpret_cast(&U_row_ptrs_host); - index_type* U_entries_host_ri = reinterpret_cast(&U_entries_host); - values_type* L_values_host_ri = reinterpret_cast(&L_values_host); - values_type* U_values_host_ri = reinterpret_cast(&U_values_host); + // Kokkos kernels impl. + auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly); + auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite); - const auto numRows = L_block_->getLocalNumRows(); - local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_); - local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_); + auto lclL = L_block_->getLocalMatrixDevice(); + auto L_rowmap = lclL.graph.row_map; + auto L_entries = lclL.graph.entries; + auto L_values = lclL.values; + + auto lclU = U_block_->getLocalMatrixDevice(); + auto U_rowmap = lclU.graph.row_map; + auto U_entries = lclU.graph.entries; + auto U_values = lclU.values; if (mode == Teuchos::NO_TRANS) { - KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view); - KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + { + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec); + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_); + } + } + + { + const LO numVecs = X.getNumVectors(); + for (LO vec = 0; vec < numVecs; ++vec) { + auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec); + KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view); + } + } + + KokkosBlas::axpby(alpha, Y_views, beta, Y_views); } else { - KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view); - KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view); - KokkosBlas::axpby(alpha, Y_view, beta, Y_view); + TEUCHOS_TEST_FOR_EXCEPTION( + true, std::runtime_error, + "Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm"); } //Y.getWrappedDualView().sync(); diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp index 74b34efb6681..5779d95741e8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp @@ -379,6 +379,61 @@ using HostBasisPtr = BasisPtrinputPoints is only used to deduce the type of the points where to evaluate basis functions. + The rank of inputPoints and its size are not relevant, however, + when using DFAD types, inputPoints cannot be empty, + otherwise the size of the scracth space needed won't be deduced correctly. + + \param space [in] - inputPoints + \param perTeamSpaceSize [out] - size of the scratch space needed per team + \param perThreadeSize [out] - size of the scratch space beeded per thread + */ + virtual + void getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error, + ">>> ERROR (Basis::getValuesScratchSpace): this method is not supported or should be overridden accordingly by derived classes."); + } + + + /** \brief Team-level evaluation of basis functions on a reference cell. + + Returns values of operatorType acting on basis functions for a set of + points in the reference cell for which the basis is defined. + + The interface allow also to select basis functions associated to a particular entity. + As an example, if subcellDim==1 (edges) and subcellOrdinal==0, outputValues will contain all the basis functions associated with the first edge. + outputValues will contain all the cell basis functions when the default value (-1) is used for subcellDim and subcellOrdinal + + \param outputValues [out] - variable rank array with the basis values + \param inputPoints [in] - rank-2 array (P,D) with the evaluation points + \param operatorType [in] - the operator acting on the basis functions + \param teamMember [in] - team member of the Kokkos::TemaPolicy + \param scratchStorage [in] - scratch space to use by each team + \param subcellDim [in] - the dimension of the subcells, the default values of -1 returns basis functions associated to subcells of all dimensions + \param subcellOrdinal [in] - the ordinal of the subcell, the default values of -1 returns basis functions associated to subcells of all ordinals + + \remark This function is supposed to be called within a TeamPolicy kernel. + The size of the required scratch space is determined by the getScratchSpaceSize function. + */ + KOKKOS_INLINE_FUNCTION + virtual + void getValues( OutputViewType /* outputValues */, + const PointViewType /* inputPoints */, + const EOperator /* operatorType */, + const typename Kokkos::TeamPolicy::member_type& teamMember, + const typename ExecutionSpace::scratch_memory_space &scratchStorage, + const ordinal_type subcellDim=-1, + const ordinal_type subcellOrdinal=-1) const { + INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error, + ">>> ERROR (Basis::getValues): this method is not supported or should be overridden accordingly by derived classes."); + } + /** \brief Evaluation of a FEM basis on a reference cell. Returns values of operatorType acting on FEM basis functions for a set of diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp index 299054557fca..72d0e9112c01 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEM.hpp @@ -185,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp index 7eff91667e1b..71ea78656fc1 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_I1_FEMDef.hpp @@ -330,6 +330,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_HEX_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_HEX_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp index 1af120be9949..64327bb29c08 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEM.hpp @@ -148,20 +148,21 @@ namespace Intrepid2 { class Basis_HCURL_HEX_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_HEX_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -184,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp index 1d18b7887096..182c05d721b0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_HEX_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_HEX_In_FEM::Serial:: + Basis_HCURL_HEX_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -44,22 +44,22 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data() + cardLine*npts*dim_s; auto ptr2 = work.data() + 2*cardLine*npts*dim_s; auto ptr3 = work.data() + 3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -142,12 +142,12 @@ namespace Intrepid2 { auto ptr4 = work.data() + 4*cardLine*npts*dim_s; auto ptr5 = work.data() + 5*cardLine*npts*dim_s; - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType outputLine_DA(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); - viewType outputLine_DB(Kokkos::view_wrap(ptr4, vcprop), cardLine, npts, 1); - viewType outputBubble(Kokkos::view_wrap(ptr5, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine_A(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputLine_B(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType outputLine_DA(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); + ViewType outputLine_DB(Kokkos::view_wrap(ptr4, vcprop), cardLine, npts, 1); + ViewType outputBubble(Kokkos::view_wrap(ptr5, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -588,6 +588,70 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffsHost); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + + template + void + Basis_HCURL_HEX_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? + 3*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0): + 5*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0); + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_HEX_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? + 3*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0): + 5*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0); + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_HEX_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp index 15f266e2db91..24c4b26bf746 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEM.hpp @@ -144,20 +144,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HCURL_QUAD_I1_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_QUAD_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -178,6 +179,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp index 548929fb74cc..8380a4665a05 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_I1_FEMDef.hpp @@ -60,7 +60,7 @@ namespace Intrepid2 { default: { INTREPID2_TEST_FOR_ABORT( opType != OPERATOR_VALUE && opType != OPERATOR_CURL, - ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues) operator is not supported"); + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::Serial::getValues) operator is not supported"); } } //end switch } @@ -219,7 +219,56 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_QUAD_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } -}// namespace Intrepid2 + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_QUAD_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_I1_FEM::getValues), Operator Type not supported."); + } + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp index 077f6de07afb..13d0c227d421 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEM.hpp @@ -136,20 +136,21 @@ namespace Intrepid2 { class Basis_HCURL_QUAD_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HCURL_QUAD_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -172,6 +173,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp index 13a732abb88d..b00248a51fc8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_QUAD_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_QUAD_In_FEM::Serial:: + Basis_HCURL_QUAD_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -43,19 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const int dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -101,11 +101,11 @@ namespace Intrepid2 { case OPERATOR_CURL: { ordinal_type idx = 0; { // x - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x bubble value - viewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // y line grad - viewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, workLine, vinvBubble); @@ -120,11 +120,11 @@ namespace Intrepid2 { output.access(idx,k) = -output_x.access(i,k)*output_y.access(j,k,0); } { // y - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x line grad - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); // y bubble value - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, workLine, vinvBubble); @@ -386,6 +386,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + template + void + Basis_HCURL_QUAD_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_QUAD_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_QUAD_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_QUAD_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp index 6d90318a4961..d293da0e38c0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEM.hpp @@ -184,6 +184,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp index 9c3d2b2d1c23..4d38583f8e49 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_I1_FEMDef.hpp @@ -255,6 +255,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_TET_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TET_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TET_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_TET_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp index cae49e5b09a5..ed253d57ec13 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEM.hpp @@ -217,9 +217,26 @@ class Basis_HCURL_TET_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( rank(dofCoords) != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp index 8bb82254291f..56149a4a1820 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TET_In_FEMDef.hpp @@ -26,18 +26,18 @@ namespace Intrepid2 { namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HCURL_TET_In_FEM::Serial:: +Basis_HCURL_TET_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -54,17 +54,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -282,7 +282,7 @@ Basis_HCURL_TET_In_FEM( const ordinal_type order, #ifdef HAVE_INTREPID2_DEBUG ordinal_type num_nonzero_sv = 0; for (int i=0;i tolerence()); + num_nonzero_sv += (S(i,0) > 10*tolerence()); INTREPID2_TEST_FOR_EXCEPTION( num_nonzero_sv != card, std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HCURL_TET_In_FEM( order, pointType), Matrix V1 should have rank equal to the cardinality of HCURL space"); @@ -562,5 +562,64 @@ Basis_HCURL_TET_In_FEM( const ordinal_type order, posDfOrd); } } + +template +void +Basis_HCURL_TET_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); +} + +template +KOKKOS_INLINE_FUNCTION +void +Basis_HCURL_TET_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TET_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_TET_In_FEM): getValues not implemented for this operator"); + } + } +} } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp index 816b999560a6..109c96988649 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEM.hpp @@ -187,6 +187,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp index 85e639ea8f10..813b764608db 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_I1_FEMDef.hpp @@ -208,7 +208,56 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_TRI_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TRI_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_!1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp index 3c34d125847a..a030f292fb50 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEM.hpp @@ -209,9 +209,26 @@ class Basis_HCURL_TRI_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( rank(dofCoords) != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp index 7d10682a5e45..6cb65ab386de 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_TRI_In_FEMDef.hpp @@ -25,18 +25,18 @@ namespace Intrepid2 { namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HCURL_TRI_In_FEM::Serial:: + Basis_HCURL_TRI_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -53,17 +53,16 @@ namespace Intrepid2 { } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts), dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -452,5 +451,66 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HCURL_TRI_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_TRI_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_TRI_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HCURL_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HCURL_TRI_In_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp index c7587cf3eec1..d2831d0ac47a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEM.hpp @@ -185,6 +185,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp index 59ad4da436e8..754355ffbd7d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HCURL_WEDGE_I1_FEMDef.hpp @@ -276,5 +276,57 @@ namespace Intrepid2 { } + template + void + Basis_HCURL_WEDGE_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HCURL_WEDGE_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HCURL_WEDGE_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HCURL_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_WEDGE_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp index 1de1d7c654c7..66ab525b3aec 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEM.hpp @@ -190,6 +190,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp index 79e9aaef60f8..b7e865178e64 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_I1_FEMDef.hpp @@ -236,5 +236,57 @@ namespace Intrepid2 { } + template + void + Basis_HDIV_HEX_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_HEX_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_HEX_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp index 4ed98a89967f..f563bd998237 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEM.hpp @@ -138,20 +138,21 @@ namespace Intrepid2 { class Basis_HDIV_HEX_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_HEX_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -174,6 +175,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { @@ -254,8 +272,6 @@ namespace Intrepid2 { }// namespace Intrepid2 - - #include "Intrepid2_HDIV_HEX_In_FEMDef.hpp" #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp index 0bae2c8b1b3d..0d5d25113bdb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_HEX_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HDIV_HEX_In_FEM::Serial:: + Basis_HDIV_HEX_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -44,21 +44,21 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+(2*cardLine+cardBubble)*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); - viewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -138,13 +138,13 @@ namespace Intrepid2 { break; } case OPERATOR_DIV: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // A line value - viewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType outputBubble_A(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // B line value - viewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); + ViewType outputBubble_B(Kokkos::view_wrap(ptr3, vcprop), cardBubble, npts); // Line grad - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); // tensor product ordinal_type idx = 0; @@ -508,6 +508,64 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffsHost); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + + template + void + Basis_HDIV_HEX_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+2*this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_HEX_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_HEX_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+2*this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_HEX_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_HEX_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp index fab13618142c..66c5843d4da0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEM.hpp @@ -145,20 +145,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_QUAD_I1_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_QUAD_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -179,6 +180,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp index 1924df01978b..22eeef61929a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_I1_FEMDef.hpp @@ -213,10 +213,59 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffs); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); - } + template + void + Basis_HDIV_QUAD_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_QUAD_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_QUAD_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp index 3db4472991f2..60da55f64220 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEM.hpp @@ -135,20 +135,21 @@ namespace Intrepid2 { class Basis_HDIV_QUAD_In_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; /** \brief Constructor. */ Basis_HDIV_QUAD_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -170,6 +171,24 @@ namespace Intrepid2 { this->vinvBubble_, operatorType ); } + + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp index f6958e8152c2..ee5bdc9458c4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_QUAD_In_FEMDef.hpp @@ -21,19 +21,19 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HDIV_QUAD_In_FEM::Serial:: + Basis_HDIV_QUAD_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinvLine, - const vinvViewType vinvBubble) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinvLine, + const VinvViewType vinvBubble) { const ordinal_type cardLine = vinvLine.extent(0); const ordinal_type cardBubble = vinvBubble.extent(0); @@ -43,20 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType outputLine(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType outputBubble(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // tensor product ordinal_type idx = 0; @@ -100,11 +99,11 @@ namespace Intrepid2 { case OPERATOR_DIV: { ordinal_type idx = 0; { // x - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x bubble value - viewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_x(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); // y line grad - viewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_y(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, workLine, vinvBubble); @@ -119,11 +118,11 @@ namespace Intrepid2 { output.access(idx,k) = output_x.access(i,k)*output_y.access(j,k,0); } { // y - component - viewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType workLine(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); // x line grad - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts,1); // y bubble value - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardBubble, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, workLine, vinvBubble); @@ -381,6 +380,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffsHost); } -} + template + void + Basis_HDIV_QUAD_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_QUAD_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_QUAD_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = (2*this->vinvLine_.extent(0)+this->vinvBubble_.extent(0))*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_QUAD_In_FEM::Serial::getValues( output, input, work, this->vinvLine_, this->vinvBubble_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_QUAD_In_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp index a5b72c79d540..40b58117d658 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEM.hpp @@ -144,19 +144,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_TET_I1_FEM: public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HDIV_TET_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -177,6 +179,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp index febd2f87333c..7aac987e9439 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_I1_FEMDef.hpp @@ -230,6 +230,58 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); } + template + void + Basis_HDIV_TET_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TET_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TET_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp index c2f17ac83aef..9224322f9539 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEM.hpp @@ -165,131 +165,144 @@ template class Basis_HDIV_TET_In_FEM : public Basis { - public: - typedef typename Basis::OrdinalTypeArray1DHost OrdinalTypeArray1DHost; - typedef typename Basis::OrdinalTypeArray2DHost OrdinalTypeArray2DHost; - typedef typename Basis::OrdinalTypeArray3DHost OrdinalTypeArray3DHost; - - /** \brief Constructor. - */ - Basis_HDIV_TET_In_FEM(const ordinal_type order, - const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; - - virtual - void - getValues( /* */ OutputViewType outputValues, - const PointViewType inputPoints, - const EOperator operatorType = OPERATOR_VALUE) const override { + public: + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; + + /** \brief Constructor. + */ + Basis_HDIV_TET_In_FEM(const ordinal_type order, + const EPointType pointType = POINTTYPE_EQUISPACED); + + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; + + virtual + void + getValues( /* */ OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override { #ifdef HAVE_INTREPID2_DEBUG - Intrepid2::getValues_HDIV_Args(outputValues, + Intrepid2::getValues_HDIV_Args(outputValues, inputPoints, operatorType, this->getBaseCellTopology(), this->getCardinality() ); #endif -constexpr ordinal_type numPtsPerEval = Parameters::MaxNumPtsPerBasisEval; -Impl::Basis_HDIV_TET_In_FEM:: -getValues( outputValues, - inputPoints, - this->coeffs_, - operatorType); - } - - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + constexpr ordinal_type numPtsPerEval = Parameters::MaxNumPtsPerBasisEval; + Impl::Basis_HDIV_TET_In_FEM:: + getValues( outputValues, + inputPoints, + this->coeffs_, + operatorType); + } + + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG - // Verify rank of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) rank = 2 required for dofCoords array"); - // Verify 0th dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoords.extent(0)) != this->getCardinality(), std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) mismatch in number of dof and 0th dimension of dofCoords array"); - // Verify 1st dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoords.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) incorrect reference cell (1st) dimension in dofCoords array"); + // Verify rank of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) rank = 2 required for dofCoords array"); + // Verify 0th dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoords.extent(0)) != this->getCardinality(), std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) mismatch in number of dof and 0th dimension of dofCoords array"); + // Verify 1st dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoords.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoords) incorrect reference cell (1st) dimension in dofCoords array"); #endif - Kokkos::deep_copy(dofCoords, this->dofCoords_); - } + Kokkos::deep_copy(dofCoords, this->dofCoords_); + } - virtual - void - getDofCoeffs( ScalarViewType dofCoeffs ) const override { -#ifdef HAVE_INTREPID2_DEBUG - // Verify rank of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.rank() != 2, std::invalid_argument, - ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) rank = 2 required for dofCoeffs array"); - // Verify 0th dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoeffs.extent(0)) != this->getCardinality(), std::invalid_argument, + virtual + void + getDofCoeffs( ScalarViewType dofCoeffs ) const override { + #ifdef HAVE_INTREPID2_DEBUG + // Verify rank of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.rank() != 2, std::invalid_argument, + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) rank = 2 required for dofCoeffs array"); + // Verify 0th dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( static_cast(dofCoeffs.extent(0)) != this->getCardinality(), std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) mismatch in number of dof and 0th dimension of dofCoeffs array"); - // Verify 1st dimension of output array. - INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, + // Verify 1st dimension of output array. + INTREPID2_TEST_FOR_EXCEPTION( dofCoeffs.extent(1) != this->getBaseCellTopology().getDimension(), std::invalid_argument, ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getDofCoeffs) incorrect reference cell (1st) dimension in dofCoeffs array"); #endif - Kokkos::deep_copy(dofCoeffs, this->dofCoeffs_); - } - - void - getExpansionCoeffs( ScalarViewType coeffs ) const { - // has to be same rank and dimensions - Kokkos::deep_copy(coeffs, this->coeffs_); - } - - virtual - const char* - getName() const override { - return "Intrepid2_HDIV_TET_In_FEM"; - } - - virtual - bool - requireOrientation() const override { - return true; - } - - /** \brief returns the basis associated to a subCell. - - The bases of the subCell are the restriction to the subCell of the bases of the parent cell, - projected along normal to the subCell. - - \param [in] subCellDim - dimension of subCell - \param [in] subCellOrd - position of the subCell among of the subCells having the same dimension - \return pointer to the subCell basis of dimension subCellDim and position subCellOrd - */ - BasisPtr - getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{ + Kokkos::deep_copy(dofCoeffs, this->dofCoeffs_); + } - if(subCellDim == 2) { - return Teuchos::rcp(new - Basis_HVOL_TRI_Cn_FEM - (this->basisDegree_-1, pointType_)); + void + getExpansionCoeffs( ScalarViewType coeffs ) const { + // has to be same rank and dimensions + Kokkos::deep_copy(coeffs, this->coeffs_); } - INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds"); - } - BasisPtr - getHostBasis() const override{ - return Teuchos::rcp(new Basis_HDIV_TET_In_FEM(this->basisDegree_, pointType_)); - } - private: + virtual + const char* + getName() const override { + return "Intrepid2_HDIV_TET_In_FEM"; + } - /** \brief expansion coefficients of the nodal basis in terms of the - orthgonal one */ - Kokkos::DynRankView coeffs_; + virtual + bool + requireOrientation() const override { + return true; + } - /** \brief type of lattice used for creating the DoF coordinates */ - EPointType pointType_; + /** \brief returns the basis associated to a subCell. -}; + The bases of the subCell are the restriction to the subCell of the bases of the parent cell, + projected along normal to the subCell. + + \param [in] subCellDim - dimension of subCell + \param [in] subCellOrd - position of the subCell among of the subCells having the same dimension + \return pointer to the subCell basis of dimension subCellDim and position subCellOrd + */ + BasisPtr + getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{ + + if(subCellDim == 2) { + return Teuchos::rcp(new + Basis_HVOL_TRI_Cn_FEM + (this->basisDegree_-1, pointType_)); + } + INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds"); + } + + BasisPtr + getHostBasis() const override{ + return Teuchos::rcp(new Basis_HDIV_TET_In_FEM(this->basisDegree_, pointType_)); + } + private: + + /** \brief expansion coefficients of the nodal basis in terms of the orthgonal one */ + Kokkos::DynRankView coeffs_; + + /** \brief type of lattice used for creating the DoF coordinates */ + EPointType pointType_; + }; }// namespace Intrepid2 diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp index e8c97199198f..96e0e7cf2267 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TET_In_FEMDef.hpp @@ -25,18 +25,18 @@ namespace Intrepid2 { namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HDIV_TET_In_FEM::Serial:: +Basis_HDIV_TET_In_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType coeffs ) { + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -53,17 +53,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -104,10 +104,10 @@ typename inputPointValueType, class ...inputPointProperties, typename vinvValueType, class ...vinvProperties> void Basis_HDIV_TET_In_FEM:: -getValues( /* */ Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView coeffs, - const EOperator operatorType) { +getValues( Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView coeffs, + const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; typedef Kokkos::DynRankView vinvViewType; @@ -450,5 +450,64 @@ Basis_HDIV_TET_In_FEM( const ordinal_type order, posDfOrd); } } + +template +void +Basis_HDIV_TET_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); +} + +template +KOKKOS_INLINE_FUNCTION +void +Basis_HDIV_TET_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TET_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 7*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TET_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_TET_In_FEM): getValues not implemented for this operator"); + } + } +} } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp index 6f6596778567..8315ac027c92 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEM.hpp @@ -142,19 +142,21 @@ namespace Intrepid2 { typename pointValueType = double> class Basis_HDIV_TRI_I1_FEM: public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HDIV_TRI_I1_FEM(); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -175,6 +177,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp index a7c13864b3a2..ef00a009fdc7 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_I1_FEMDef.hpp @@ -72,6 +72,7 @@ namespace Intrepid2 { typedef Kokkos::DynRankView inputPointViewType; typedef typename ExecSpace::ExecSpaceType ExecSpaceType; + // Number of evaluation points = dim 0 of inputPoints const auto loopSize = inputPoints.extent(0); Kokkos::RangePolicy > policy(0, loopSize); @@ -215,5 +216,56 @@ namespace Intrepid2 { } + template + void + Basis_HDIV_TRI_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TRI_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_TRI_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_!1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp index 71fd4f8ae99d..1ca324c938ef 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEM.hpp @@ -169,31 +169,30 @@ template class Basis_HDIV_TRI_In_FEM : public Basis { - public: - typedef typename Basis::OrdinalTypeArray1DHost OrdinalTypeArray1DHost; - typedef typename Basis::OrdinalTypeArray2DHost OrdinalTypeArray2DHost; - typedef typename Basis::OrdinalTypeArray3DHost OrdinalTypeArray3DHost; - - /** \brief Constructor. - */ - Basis_HDIV_TRI_In_FEM(const ordinal_type order, + public: + using BasisBase = Basis; + using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; + using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; + using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; + + /** \brief Constructor. + */ + Basis_HDIV_TRI_In_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using HostBasis = Basis_HDIV_TRI_In_FEM; + using HostBasis = Basis_HDIV_TRI_In_FEM; - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + using OutputViewType = typename BasisBase::OutputViewType; + using PointViewType = typename BasisBase::PointViewType; + using ScalarViewType = typename BasisBase::ScalarViewType; + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void - getValues( /* */ OutputViewType outputValues, - const PointViewType inputPoints, - const EOperator operatorType = OPERATOR_VALUE) const override { + getValues( OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override { #ifdef HAVE_INTREPID2_DEBUG Intrepid2::getValues_HDIV_Args(outputValues, inputPoints, @@ -209,9 +208,26 @@ class Basis_HDIV_TRI_In_FEM operatorType); } - virtual - void - getDofCoords( ScalarViewType dofCoords ) const override { + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual + void + getDofCoords( ScalarViewType dofCoords ) const override { #ifdef HAVE_INTREPID2_DEBUG // Verify rank of output array. INTREPID2_TEST_FOR_EXCEPTION( dofCoords.rank() != 2, std::invalid_argument, diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp index e412989caa56..89c86f5274e9 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_TRI_In_FEMDef.hpp @@ -24,18 +24,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HDIV_TRI_In_FEM::Serial:: -getValues( /* */ OutputViewType output, - const inputViewType input, - /* */ workViewType work, - const vinvViewType coeffs ) { +Basis_HDIV_TRI_In_FEM::Serial:: +getValues( OutputViewType output, + const InputViewType input, + WorkViewType work, + const VinvViewType coeffs ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -52,17 +52,17 @@ getValues( /* */ OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); @@ -447,5 +447,65 @@ Basis_HDIV_TRI_In_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HDIV_TRI_In_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + ordinal_type scalarWorkViewExtent = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + perThreadSpaceSize = scalarWorkViewExtent*get_dimension_scalar(inputPoints)*sizeof(scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_TRI_In_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_TRI_In_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using WorkViewType = Kokkos::DynRankView< scalarType, typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type scalarSizePerPoint = (operatorType == OPERATOR_VALUE) ? this->basisCardinality_ : 5*this->basisCardinality_; + ordinal_type sizePerPoint = scalarSizePerPoint*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HDIV_TRI_In_FEM::Serial::getValues( output, input, work, this->coeffs_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HDIV_TRI_In_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp index d30c7d1ee4a3..56a257b9ce54 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEM.hpp @@ -159,6 +159,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp index 79dabbc6bfa6..ceefba8ac59e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HDIV_WEDGE_I1_FEMDef.hpp @@ -183,7 +183,60 @@ namespace Intrepid2 { this->dofCoeffs_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoeffs); Kokkos::deep_copy(this->dofCoeffs_, dofCoeffs); + + } + + template + void + Basis_HDIV_WEDGE_I1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HDIV_WEDGE_I1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HDIV_WEDGE_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_DIV: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HDIV_WEDGE_I1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HDIV_WEDGE_I1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp index 2ea99d81a411..59dfdfff0451 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEM.hpp @@ -177,6 +177,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp index 96a5fcf39429..3c2813faa81e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C1_FEMDef.hpp @@ -413,6 +413,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_HEX_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_HEX_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_HEX_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp index 9823b1fe23bd..1af419c4300e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEM.hpp @@ -245,6 +245,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp index 22e1a54b74b4..c5a5ec102087 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_C2_FEMDef.hpp @@ -1626,8 +1626,57 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); + } + template + void + Basis_HGRAD_HEX_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_HEX_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_HEX_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + default: {} + } + } }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp index 4bcefb52d286..987206b0a1e0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEM.hpp @@ -184,6 +184,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp index ac50cde72fb3..36139dfb95f4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_HEX_Cn_FEMDef.hpp @@ -44,14 +44,14 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView viewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { @@ -382,7 +382,64 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - + + template + void + Basis_HGRAD_HEX_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + (void) operatorType; //avoid warning for unused variable + perTeamSpaceSize = 0; + perThreadSpaceSize = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_HEX_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_HEX_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp index c07fdfd71f04..71ee77e7d55a 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEM.hpp @@ -161,6 +161,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp index 4572b5b5c899..05291e830a3d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C1_FEMDef.hpp @@ -177,6 +177,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_LINE_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp index 595cc815e9be..4e141fc5ad41 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEM.hpp @@ -161,6 +161,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp index c08bd3692a27..cb3157028b0e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_C2_FEMDef.hpp @@ -173,6 +173,55 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_LINE_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_LINE_C2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp index c5e0e2308545..ac2bfdbcae0b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEM.hpp @@ -192,6 +192,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp index fc116eb32ebe..6140c13821cb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_LINE_Cn_FEMDef.hpp @@ -23,16 +23,16 @@ namespace Intrepid2 { template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -42,12 +42,12 @@ namespace Intrepid2 { const ordinal_type order = card - 1; const double alpha = 0.0, beta = 0.0; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta); @@ -75,7 +75,7 @@ namespace Intrepid2 { case OPERATOR_Dn: { // dkcard is always 1 for 1D element const ordinal_type dkcard = 1; - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta, opDn); @@ -326,22 +326,64 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HGRAD_LINE_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_LINE_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_LINE_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_LINE_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif - - - - - - - - - - - - - - - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp index 36463fc8baec..7f00c2a90a2d 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEM.hpp @@ -163,6 +163,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp index e25212499fbd..078ad0e97178 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_C1_FEMDef.hpp @@ -256,6 +256,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_PYR_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_PYR_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_PYR_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp index aad5bae97d00..81a09cc56c91 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEM.hpp @@ -180,6 +180,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp index 3247c5aaa0b7..a6ce124d1c2b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_PYR_I2_FEMDef.hpp @@ -371,6 +371,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_PYR_I2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_PYR_I2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_PYR_I2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_I2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_PYR_I2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp index 17af8c1c8685..e6b9d7b10c44 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEM.hpp @@ -167,6 +167,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp index 39504493f063..246a92ac44d5 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C1_FEMDef.hpp @@ -245,5 +245,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_QUAD_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_QUAD_C1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_C1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp index d82127eb1ead..038214825966 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEM.hpp @@ -194,6 +194,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp index 4ec411a0ffc1..36ae4378091c 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_C2_FEMDef.hpp @@ -557,5 +557,66 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_QUAD_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialCurl = typename Impl::Basis_HGRAD_QUAD_DEG2_FEM::template Serial; + SerialCurl::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_DEG2_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp index d07b46cafb28..00a206c12ec8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEM.hpp @@ -181,6 +181,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp index 582b465a6c57..9e232352285e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_QUAD_Cn_FEMDef.hpp @@ -21,18 +21,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HGRAD_QUAD_Cn_FEM::Serial:: + Basis_HGRAD_QUAD_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -43,19 +43,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const int dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -73,29 +73,29 @@ namespace Intrepid2 { } case OPERATOR_CURL: { for (auto l=0;l<2;++l) { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x, output_y; + ViewType output_x, output_y; - typename workViewType::value_type s = 0.0; + typename WorkViewType::value_type s = 0.0; if (l) { // l = 1 - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv, 1); - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); s = -1.0; } else { // l = 0 - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, 1); @@ -122,33 +122,33 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const auto dkcard = opDn + 1; for (auto l=0;l:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HGRAD_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } @@ -357,7 +357,72 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -}// namespace Intrepid2 + + template + void + Basis_HGRAD_QUAD_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_QUAD_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_QUAD_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_QUAD_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp index a2bd5cce665e..d8ed43bad7c7 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEM.hpp @@ -164,6 +164,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp index 65d632b1b578..c8e0cc996c65 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C1_FEMDef.hpp @@ -202,5 +202,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TET_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C1_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp index 3b544a29b8b9..703eef86b224 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEM.hpp @@ -183,6 +183,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp index 36aa3d7df7b8..7f4f39634cd0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_C2_FEMDef.hpp @@ -323,5 +323,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TET_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_C2_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp index 2bd2814a2d6e..6cb9802be376 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEM.hpp @@ -203,12 +203,23 @@ namespace Intrepid2 { operatorType); } - /** \brief Returns spatial locations (coordinates) of degrees of freedom on a - reference Tetrahedron. + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; - \param DofCoords [out] - array with the coordinates of degrees of freedom, - dimensioned (F,D) - */ virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp index ca7c10d67005..09888eddc924 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_COMP12_FEMDef.hpp @@ -408,6 +408,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); } -} + template + void + Basis_HGRAD_TET_COMP12_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_COMP12_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_COMP12_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_COMP12_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TET_COMP12_FEM::Serial::getValues( output, input); + }); + break; + default: {} + } + } + +}// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp index 39b8900291f4..d23d95594bdb 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEM.hpp @@ -62,53 +62,57 @@ namespace Intrepid2 { /** \brief See Intrepid2::Basis_HGRAD_TET_Cn_FEM */ - template + template struct Serial { - template + template KOKKOS_INLINE_FUNCTION static void - getValues( outputValueViewType outputValues, - const inputPointViewType inputPoints, - workViewType work, - const vinvViewType vinv ); + getValues( OutputValueViewType outputValues, + const InputPointViewType inputPoints, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order); }; template + typename OutputValueValueType, class ...OutputValueProperties, + typename InputPointValueType, class ...InputPointProperties, + typename VinvValueType, class ...VinvProperties> static void getValues( const typename DeviceType::execution_space& space, - Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView vinv, + Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType); /** \brief See Intrepid2::Basis_HGRAD_TET_Cn_FEM */ - template struct Functor { - outputValueViewType _outputValues; - const inputPointViewType _inputPoints; - const vinvViewType _vinv; - workViewType _work; + OutputValueViewType _outputValues; + const InputPointViewType _inputPoints; + const VinvViewType _vinv; + WorkViewType _work; + const ordinal_type _order; KOKKOS_INLINE_FUNCTION - Functor( outputValueViewType outputValues_, - inputPointViewType inputPoints_, - vinvViewType vinv_, - workViewType work_) + Functor( OutputValueViewType outputValues_, + InputPointViewType inputPoints_, + VinvViewType vinv_, + WorkViewType work_, + ordinal_type order_) : _outputValues(outputValues_), _inputPoints(inputPoints_), - _vinv(vinv_), _work(work_) {} + _vinv(vinv_), _work(work_), _order(order_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type iter) const { @@ -118,15 +122,15 @@ namespace Intrepid2 { const auto ptRange = Kokkos::pair(ptBegin, ptEnd); const auto input = Kokkos::subview( _inputPoints, ptRange, Kokkos::ALL() ); - typename workViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); + typename WorkViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); auto vcprop = Kokkos::common_view_alloc_prop(_work); - workViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); + WorkViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } case OPERATOR_GRAD : @@ -135,7 +139,7 @@ namespace Intrepid2 { //case OPERATOR_D3 : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange, Kokkos::ALL() ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } default: { @@ -204,9 +208,29 @@ namespace Intrepid2 { outputValues, inputPoints, this->vinv_, + this->basisDegree_, operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp index c2c5aaf6cbe3..1c8715525bc0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TET_Cn_FEMDef.hpp @@ -24,44 +24,36 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HGRAD_TET_Cn_FEM::Serial:: +Basis_HGRAD_TET_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order ) { constexpr ordinal_type spaceDim = 3; const ordinal_type card = vinv.extent(0), npts = input.extent(0); - // compute order - ordinal_type order = 0; - for (ordinal_type p=0;p<=Parameters::MaxOrder;++p) { - if (card == Intrepid2::getPnCardinality(p)) { - order = p; - break; - } - } - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - viewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - viewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i outputValues, const Kokkos::DynRankView inputPoints, const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; @@ -156,7 +149,7 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_GRAD: @@ -164,23 +157,16 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_D2: { typedef Functor FunctorType; workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality*outputValues.extent(2), inputPoints.extent(0)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } - /* case OPERATOR_D3: { - typedef Functor FunctorType; - workViewType work(Kokkos::view_alloc("Basis_HGRAD_TET_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0), outputValues.extent(2)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); - break; - }*/ default: { INTREPID2_TEST_FOR_EXCEPTION( true , std::invalid_argument, ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): Operator type not implemented" ); @@ -431,5 +417,65 @@ Basis_HGRAD_TET_Cn_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HGRAD_TET_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = getWorkSizePerPoint(operatorType)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TET_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TET_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + constexpr ordinal_type spaceDim = 3; + auto sizePerPoint = (operatorType==OPERATOR_VALUE) ? + this->vinv_.extent(0)*get_dimension_scalar(inputPoints) : + (2*spaceDim+1)*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp index 46349310b210..81439bdf3f50 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEM.hpp @@ -162,6 +162,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp index e771ae7a3ee0..bc926788c290 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C1_FEMDef.hpp @@ -204,5 +204,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TRI_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C1_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C1_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp index 9eb45f9c2716..627fa113720e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEM.hpp @@ -173,6 +173,23 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp index 87bb96e2bbe0..86df77f41a27 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_C2_FEMDef.hpp @@ -263,5 +263,63 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_TRI_C2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_C2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_TRI_C2_FEM::Serial::getValues( output, input); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_C2_FEM::getValues), Operator Type not supported."); + } + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp index c8bc97c3fb76..17ada895efe0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM.hpp @@ -60,53 +60,57 @@ namespace Intrepid2 { work is a rank 1 view having the same value_type of inputPoints and having size equal to getWorkSizePerPoint()*inputPoints.extent(0); */ - template + template struct Serial { - template + template KOKKOS_INLINE_FUNCTION static void - getValues( outputValueViewType outputValues, - const inputPointViewType inputPoints, - workViewType work, - const vinvViewType vinv ); + getValues( OutputValueViewType outputValues, + const InputPointViewType inputPoints, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order); }; template + typename OutputValueValueType, class ...OutputValueProperties, + typename InputPointValueType, class ...InputPointProperties, + typename VinvValueType, class ...VinvProperties> static void - getValues(const typename DeviceType::execution_space& space, - Kokkos::DynRankView outputValues, - const Kokkos::DynRankView inputPoints, - const Kokkos::DynRankView vinv, - const EOperator operatorType); + getValues( const typename DeviceType::execution_space& space, + Kokkos::DynRankView outputValues, + const Kokkos::DynRankView inputPoints, + const Kokkos::DynRankView vinv, + const ordinal_type order, + const EOperator operatorType); /** \brief See Intrepid2::Basis_HGRAD_TRI_Cn_FEM */ - template struct Functor { - outputValueViewType _outputValues; - const inputPointViewType _inputPoints; - const vinvViewType _vinv; - workViewType _work; + OutputValueViewType _outputValues; + const InputPointViewType _inputPoints; + const VinvViewType _vinv; + WorkViewType _work; + const ordinal_type _order; KOKKOS_INLINE_FUNCTION - Functor( outputValueViewType outputValues_, - inputPointViewType inputPoints_, - vinvViewType vinv_, - workViewType work_) + Functor( OutputValueViewType outputValues_, + InputPointViewType inputPoints_, + VinvViewType vinv_, + WorkViewType work_, + ordinal_type order_) : _outputValues(outputValues_), _inputPoints(inputPoints_), - _vinv(vinv_), _work(work_) {} + _vinv(vinv_), _work(work_), _order(order_) {} KOKKOS_INLINE_FUNCTION void operator()(const size_type iter) const { @@ -116,22 +120,22 @@ namespace Intrepid2 { const auto ptRange = Kokkos::pair(ptBegin, ptEnd); const auto input = Kokkos::subview( _inputPoints, ptRange, Kokkos::ALL() ); - typename workViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); + typename WorkViewType::pointer_type ptr = _work.data() + _work.extent(0)*ptBegin*get_dimension_scalar(_work); auto vcprop = Kokkos::common_view_alloc_prop(_work); - workViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); + WorkViewType work(Kokkos::view_wrap(ptr,vcprop), (ptEnd-ptBegin)*_work.extent(0)); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE : { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } case OPERATOR_CURL: case OPERATOR_D1: case OPERATOR_D2: { auto output = Kokkos::subview( _outputValues, Kokkos::ALL(), ptRange, Kokkos::ALL() ); - Serial::getValues( output, input, work, _vinv ); + Serial::getValues( output, input, work, _vinv, _order ); break; } default: { @@ -200,9 +204,29 @@ namespace Intrepid2 { outputValues, inputPoints, this->vinv_, + this->basisDegree_, operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp index 681148713a06..c7b7a40cfa7b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEMDef.hpp @@ -23,44 +23,36 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template +template template +typename InputViewType, +typename WorkViewType, +typename VinvViewType> KOKKOS_INLINE_FUNCTION void -Basis_HGRAD_TRI_Cn_FEM::Serial:: +Basis_HGRAD_TRI_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, + const ordinal_type order ) { constexpr ordinal_type spaceDim = 2; const ordinal_type card = vinv.extent(0), npts = input.extent(0); - // compute order - ordinal_type order = 0; - for (ordinal_type p=0;p<=Parameters::MaxOrder;++p) { - if (card == Intrepid2::getPnCardinality(p) ) { - order = p; - break; - } - } - - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - viewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - viewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i outputValues, const Kokkos::DynRankView inputPoints, const Kokkos::DynRankView vinv, + const ordinal_type order, const EOperator operatorType) { typedef Kokkos::DynRankView outputValueViewType; typedef Kokkos::DynRankView inputPointViewType; @@ -175,7 +168,7 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_GRAD: @@ -183,30 +176,23 @@ getValues( workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_CURL: { workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*(2*spaceDim+1), inputPoints.extent(0)); typedef Functor FunctorType; - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } case OPERATOR_D2: { typedef Functor FunctorType; workViewType work(Kokkos::view_alloc(space, "Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality*outputValues.extent(2), inputPoints.extent(0)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); + Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work, order) ); break; } - /* case OPERATOR_D3: { - typedef Functor FunctorType; - workViewType work(Kokkos::view_alloc("Basis_HGRAD_TRI_Cn_FEM::getValues::work", vcprop), cardinality, inputPoints.extent(0), outputValues.extent(2)); - Kokkos::parallel_for( policy, FunctorType(outputValues, inputPoints, vinv, work) ); - break; - }*/ default: { INTREPID2_TEST_FOR_EXCEPTION( true , std::invalid_argument, ">>> ERROR (Basis_HGRAD_TRI_Cn_FEM): Operator type not implemented" ); @@ -242,7 +228,7 @@ Basis_HGRAD_TRI_Cn_FEM( const ordinal_type order, PointTools::getLattice( dofCoords, cellTopo, order, offset, - pointType_ ); + this->pointType_ ); this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoords); Kokkos::deep_copy(this->dofCoords_, dofCoords); @@ -384,5 +370,74 @@ Basis_HGRAD_TRI_Cn_FEM( const ordinal_type order, posDfOrd); } } + + template + void + Basis_HGRAD_TRI_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = getWorkSizePerPoint(operatorType)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_TRI_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + constexpr ordinal_type spaceDim = 2; + auto sizePerPoint = (operatorType==OPERATOR_VALUE) ? + this->vinv_.extent(0)*get_dimension_scalar(inputPoints) : + (2*spaceDim+1)*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + case OPERATOR_CURL: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type(pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt,pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HGRAD_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HGRAD_TRI_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 + #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp index 6f5e5abb1dd0..a2b4271518ba 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_TRI_Cn_FEM_ORTHDef.hpp @@ -198,69 +198,8 @@ void OrthPolynomialTri::ge const inputViewType /* input */, workViewType /* work */, const ordinal_type /* order */ ) { -#if 0 //#ifdef HAVE_INTREPID2_SACADO - -constexpr ordinal_type spaceDim = 2; -constexpr ordinal_type maxCard = Intrepid2::getPnCardinality(); - -typedef typename OutputViewType::value_type value_type; -typedef Sacado::Fad::SFad fad_type; - -const ordinal_type -npts = input.extent(0), -card = output.extent(0); - -// use stack buffer -fad_type inBuf[Parameters::MaxNumPtsPerBasisEval][spaceDim], -outBuf[maxCard][Parameters::MaxNumPtsPerBasisEval][n]; - -typedef typename inputViewType::memory_space memory_space; -typedef typename Kokkos::View outViewType; -typedef typename Kokkos::View inViewType; -auto vcprop = Kokkos::common_view_alloc_prop(input); - -inViewType in(Kokkos::view_wrap((value_type*)&inBuf[0][0], vcprop), npts, spaceDim); -outViewType out(Kokkos::view_wrap((value_type*)&outBuf[0][0][0], vcprop), card, npts, n); - -for (ordinal_type i=0;i outViewType_; -outViewType_ workView; -if (n==2) { - //char outBuf[bufSize*sizeof(typename inViewType::value_type)]; - fad_type outBuf[maxCard][Parameters::MaxNumPtsPerBasisEval][spaceDim+1]; - auto vcprop = Kokkos::common_view_alloc_prop(in); - workView = outViewType_( Kokkos::view_wrap((value_type*)&outBuf[0][0][0], vcprop), card, npts, spaceDim+1); -} -OrthPolynomialTri::generate(out, in, workView, order); - -for (ordinal_type i=0;i 0) { - //n=2: (f_x)_x, (f_y)_x - //n=3: (f_xx)_x, (f_xy)_x, (f_yy)_x - ordinal_type i_Dnm1 = i_dy; - output.access(i,j,i_Dn) = out(i,j,i_Dnm1).dx(0); - } - else { - //n=2: (f_y)_y, (f_z)_y - //n=3: (f_yy)_y - ordinal_type i_Dnm1 = i_dy-1; - output.access(i,j,i_Dn) = out(i,j,i_Dnm1).dx(1); - } - } - } -#else INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HGRAD_TRI_Cn_FEM_ORTH::OrthPolynomialTri) Computing of second and higher-order derivatives is not currently supported"); -#endif } diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp index 15daedfbfe49..5b8b73634bb4 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEM.hpp @@ -166,6 +166,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp index 9d2c461edca2..8d76318a49e0 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C1_FEMDef.hpp @@ -245,5 +245,54 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_WEDGE_C1_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_WEDGE_C1_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_WEDGE_C1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_WEDGE_C1_FEM::template Serial::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + Impl::Basis_HGRAD_WEDGE_C1_FEM::template Serial::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp index d4cb38e7ca55..c952afcf0e6e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEM.hpp @@ -215,6 +215,23 @@ namespace Intrepid2 { operatorType );; } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp index 363d21ad19ea..9f5327d94187 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HGRAD_WEDGE_C2_FEMDef.hpp @@ -30,12 +30,13 @@ namespace Intrepid2 { Basis_HGRAD_WEDGE_DEG2_FEM::Serial:: getValues( OutputViewType output, const inputViewType input ) { + typedef typename inputViewType::value_type value_type; switch (opType) { case OPERATOR_VALUE: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); - const auto w = 1.0 - x - y; + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); + const value_type w = 1.0 - x - y; // output is a rank-1 array with dimensions (basisCardinality_) if constexpr (!serendipity) { @@ -80,9 +81,9 @@ namespace Intrepid2 { break; } case OPERATOR_GRAD: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); if constexpr (!serendipity) { output.access(0, 0) = ((-3 + 4*x + 4*y)*(-1 + z)*z)/2.; @@ -158,7 +159,7 @@ namespace Intrepid2 { output.access(17, 1) = 4*(-1 + x + 2*y)*(-1 + z*z); output.access(17, 2) = 8*y*(-1 + x + y)*z; } else { - const auto w = 1.0 - x - y; + const value_type w = 1.0 - x - y; output.access(0, 0) = -(2.0*w - 1.0 - 0.5*z)*(1.0 - z); output.access(0, 1) = -(2.0*w - 1.0 - 0.5*z)*(1.0 - z); @@ -223,9 +224,9 @@ namespace Intrepid2 { break; } case OPERATOR_D2: { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); if constexpr (!serendipity) { output.access(0, 0) = 2.*(-1. + z)*z; @@ -356,7 +357,7 @@ namespace Intrepid2 { } else { //serendipity element - const auto w = 1.0 - x - y; + const value_type w = 1.0 - x - y; output.access(0, 0) = 2.0*(1.0 - z); output.access(0, 1) = 2.0*(1.0 - z); output.access(0, 2) = 2.0*w - 0.5 - z; @@ -466,9 +467,9 @@ namespace Intrepid2 { } case OPERATOR_D3: { if constexpr (!serendipity) { - const auto x = input(0); - const auto y = input(1); - const auto z = input(2); + const value_type x = input(0); + const value_type y = input(1); + const value_type z = input(2); output.access(0, 0) = 0.; output.access(0, 1) = 0.; @@ -1082,5 +1083,56 @@ namespace Intrepid2 { Kokkos::deep_copy(this->dofCoords_, dofCoords); } + template + void + Basis_HGRAD_WEDGE_DEG2_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 0; + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HGRAD_WEDGE_DEG2_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim <= 0) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HGRAD_WEDGE_DEG2_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + (void) scratchStorage; //avoid unused variable warning + + const int numPoints = inputPoints.extent(0); + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialValue = typename Impl::Basis_HGRAD_WEDGE_DEG2_FEM::template Serial; + SerialValue::getValues( output, input); + }); + break; + case OPERATOR_GRAD: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() ); + using SerialGrad = typename Impl::Basis_HGRAD_WEDGE_DEG2_FEM::template Serial; + SerialGrad::getValues( output, input); + }); + break; + default: {} + } + } + }// namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp index 96cf0a64405b..388eb9ccdd1b 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEM.hpp @@ -137,20 +137,22 @@ namespace Intrepid2 { class Basis_HVOL_HEX_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_HEX_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -172,6 +174,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp index 652df8ee2689..617eeb9cad84 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_HEX_Cn_FEMDef.hpp @@ -9,7 +9,7 @@ /** \file Intrepid2_HVOL_HEX_Cn_FEMDef.hpp \brief Definition file for FEM basis functions of degree n for H(vol) functions on HEX cells - \author Created by M. Perego, based on the Intrepid2::HGRAD_HEX_Cn_FEM class + \author Created by M. Perego, based on the Intrepid2::HVOL_HEX_Cn_FEM class */ #ifndef __INTREPID2_HVOL_HEX_CN_FEMDEF_HPP__ @@ -20,18 +20,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_HEX_Cn_FEM::Serial:: + Basis_HVOL_HEX_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -43,21 +43,21 @@ namespace Intrepid2 { const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); const auto input_z = Kokkos::subview(input, Kokkos::ALL(), range_type(2,3)); - const ordinal_type dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; auto ptr3 = work.data()+3*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); - viewType output_z(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType output_z(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -88,7 +88,7 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const ordinal_type dkcard = opDn + 1; @@ -105,35 +105,35 @@ namespace Intrepid2 { if (mult_x < 0) { // pass } else { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); decltype(work_line) output_x, output_y, output_z; if (mult_x) { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } if (mult_z) { - output_z = viewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); + output_z = ViewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_z, input_z, work_line, vinv, mult_z); } else { - output_z = viewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); + output_z = ViewType(Kokkos::view_wrap(ptr3, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_z, input_z, work_line, vinv); } @@ -316,7 +316,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -}// namespace Intrepid2 + + template + void + Basis_HVOL_HEX_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_HEX_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_HEX_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = 4*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_HEX_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_HEX_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp index 380438f33bb4..0be4ce27fba8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEM.hpp @@ -145,15 +145,16 @@ namespace Intrepid2 { : public Basis { public: using BasisBase = Basis; + using HostBasis = Basis_HVOL_LINE_Cn_FEM; - - using OrdinalTypeArray1DHost = typename BasisBase::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename BasisBase::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename BasisBase::OrdinalTypeArray3DHost; - - using OutputViewType = typename BasisBase::OutputViewType; - using PointViewType = typename BasisBase::PointViewType ; - using ScalarViewType = typename BasisBase::ScalarViewType; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ @@ -182,6 +183,23 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPointsconst, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp index 3d742d4a30a4..dc8f25d3cd7e 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_LINE_Cn_FEMDef.hpp @@ -9,7 +9,7 @@ /** \file Intrepid2_HVOL_LINE_Cn_FEMDef.hpp \brief Definition file for FEM basis functions of degree n for H(vol) functions on LINE. - \author Created by M. Perego, based on the Intrepid2::HGRAD_LINE_Cn_FEM class + \author Created by M. Perego, based on the Intrepid2::HVOL_LINE_Cn_FEM class */ #ifndef __INTREPID2_HVOL_LINE_CN_FEM_DEF_HPP__ @@ -22,16 +22,16 @@ namespace Intrepid2 { template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void Basis_HVOL_LINE_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; @@ -41,12 +41,12 @@ namespace Intrepid2 { const ordinal_type order = card - 1; const double alpha = 0.0, beta = 0.0; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); switch (opType) { case OPERATOR_VALUE: { - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta); @@ -74,7 +74,7 @@ namespace Intrepid2 { case OPERATOR_Dn: { // dkcard is always 1 for 1D element const ordinal_type dkcard = 1; - viewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); + ViewType phis(Kokkos::view_wrap(work.data(), vcprop), card, npts, dkcard); Impl::Basis_HGRAD_LINE_Cn_FEM_JACOBI:: Serial::getValues(phis, input, order, alpha, beta, opDn); @@ -289,22 +289,56 @@ namespace Intrepid2 { posDfOrd); } } + + template + void + Basis_HVOL_LINE_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_LINE_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_LINE_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + ordinal_type sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_LINE_Cn_FEM::Serial::getValues( output, input, work, this->vinv_ ); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_LINE_Cn_FEM): getValues not implemented for this operator"); + } + } + } }// namespace Intrepid2 #endif - - - - - - - - - - - - - - - diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp index 496522a6278c..6329c3cb30ce 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEM.hpp @@ -132,20 +132,22 @@ namespace Intrepid2 { class Basis_HVOL_QUAD_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_QUAD_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - using Basis::getValues; + using BasisBase::getValues; virtual void @@ -167,6 +169,24 @@ namespace Intrepid2 { operatorType ); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp index 2a9e2678b771..f492b6a65f7c 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_QUAD_Cn_FEMDef.hpp @@ -19,22 +19,22 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { - - template + + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_QUAD_Cn_FEM::Serial:: + Basis_HVOL_QUAD_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv, const ordinal_type operatorDn ) { ordinal_type opDn = operatorDn; - + const ordinal_type cardLine = vinv.extent(0); const ordinal_type npts = input.extent(0); @@ -42,19 +42,19 @@ namespace Intrepid2 { const auto input_x = Kokkos::subview(input, Kokkos::ALL(), range_type(0,1)); const auto input_y = Kokkos::subview(input, Kokkos::ALL(), range_type(1,2)); - const int dim_s = get_dimension_scalar(work); + const ordinal_type dim_s = get_dimension_scalar(input); auto ptr0 = work.data(); auto ptr1 = work.data()+cardLine*npts*dim_s; auto ptr2 = work.data()+2*cardLine*npts*dim_s; - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); - - switch (opType) { + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); + + switch (OpType) { case OPERATOR_VALUE: { - viewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); - viewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); - viewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + ViewType work_line(Kokkos::view_wrap(ptr0, vcprop), cardLine, npts); + ViewType output_x(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + ViewType output_y(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); @@ -81,33 +81,33 @@ namespace Intrepid2 { case OPERATOR_D8: case OPERATOR_D9: case OPERATOR_D10: - opDn = getOperatorOrder(opType); + opDn = getOperatorOrder(OpType); case OPERATOR_Dn: { const auto dkcard = opDn + 1; for (auto l=0;l:: getValues(output_x, input_x, work_line, vinv, mult_x); } else { - output_x = viewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); + output_x = ViewType(Kokkos::view_wrap(ptr1, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_x, input_x, work_line, vinv); } if (mult_y) { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts, 1); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv, mult_y); } else { - output_y = viewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); + output_y = ViewType(Kokkos::view_wrap(ptr2, vcprop), cardLine, npts); Impl::Basis_HVOL_LINE_Cn_FEM::Serial:: getValues(output_y, input_y, work_line, vinv); } @@ -282,7 +282,55 @@ namespace Intrepid2 { this->dofCoords_ = Kokkos::create_mirror_view(typename DT::memory_space(), dofCoordsHost); Kokkos::deep_copy(this->dofCoords_, dofCoordsHost); } - -} + + template + void + Basis_HVOL_QUAD_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_QUAD_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_QUAD_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = 3*this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_QUAD_Cn_FEM::Serial::getValues( output, input, work, this->vinv_, this->basisDegree_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_QUAD_Cn_FEM): getValues not implemented for this operator"); + } + } + } + +} // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp index d47afbf7724f..8f9010f619b8 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEM.hpp @@ -156,23 +156,23 @@ namespace Intrepid2 { class Basis_HVOL_TET_Cn_FEM : public Basis { public: - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + using BasisBase = Basis; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_TET_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void @@ -194,6 +194,24 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp index a0945a008159..7927a1e124f6 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TET_Cn_FEMDef.hpp @@ -23,18 +23,18 @@ namespace Intrepid2 { namespace Impl { - template + template template + typename InputViewType, + typename WorkViewType, + typename VinvViewType> KOKKOS_INLINE_FUNCTION void - Basis_HVOL_TET_Cn_FEM::Serial:: + Basis_HVOL_TET_Cn_FEM::Serial:: getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + const InputViewType input, + WorkViewType work, + const VinvViewType vinv ) { constexpr ordinal_type spaceDim = 3; const ordinal_type @@ -50,17 +50,17 @@ namespace Intrepid2 { } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - workViewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const + ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TET_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i + void + Basis_HVOL_TET_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TET_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_TET_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_TET_Cn_FEM::Serial::getValues( output, input, work, this->vinv_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_TET_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp index 43a2161c9050..ff20e7426957 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEM.hpp @@ -151,25 +151,24 @@ namespace Intrepid2 { class Basis_HVOL_TRI_Cn_FEM : public Basis { public: + using BasisBase = Basis; using HostBasis = Basis_HVOL_TRI_Cn_FEM; - - using OrdinalTypeArray1DHost = typename Basis::OrdinalTypeArray1DHost; - using OrdinalTypeArray2DHost = typename Basis::OrdinalTypeArray2DHost; - using OrdinalTypeArray3DHost = typename Basis::OrdinalTypeArray3DHost; + + using typename BasisBase::OrdinalTypeArray1DHost; + using typename BasisBase::OrdinalTypeArray2DHost; + using typename BasisBase::OrdinalTypeArray3DHost; + + using typename BasisBase::OutputViewType; + using typename BasisBase::PointViewType ; + using typename BasisBase::ScalarViewType; /** \brief Constructor. */ Basis_HVOL_TRI_Cn_FEM(const ordinal_type order, const EPointType pointType = POINTTYPE_EQUISPACED); - - - using OutputViewType = typename Basis::OutputViewType; - using PointViewType = typename Basis::PointViewType; - using ScalarViewType = typename Basis::ScalarViewType; - - typedef typename Basis::scalarType scalarType; - - using Basis::getValues; + + using scalarType = typename BasisBase::scalarType; + using BasisBase::getValues; virtual void @@ -191,6 +190,24 @@ namespace Intrepid2 { operatorType); } + virtual void + getScratchSpaceSize( ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType = OPERATOR_VALUE) const override; + + KOKKOS_INLINE_FUNCTION + virtual void + getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DeviceType::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim = -1, + const ordinal_type subcellOrdinal = -1) const override; + + virtual void getDofCoords( ScalarViewType dofCoords ) const override { diff --git a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp index f870940f506b..aa6f54065ff6 100644 --- a/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp +++ b/packages/intrepid2/src/Discretization/Basis/Intrepid2_HVOL_TRI_Cn_FEMDef.hpp @@ -22,18 +22,18 @@ namespace Intrepid2 { // ------------------------------------------------------------------------------------- namespace Impl { -template -template -KOKKOS_INLINE_FUNCTION -void -Basis_HVOL_TRI_Cn_FEM::Serial:: -getValues( OutputViewType output, - const inputViewType input, - workViewType work, - const vinvViewType vinv ) { + template + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TRI_Cn_FEM::Serial:: + getValues( OutputViewType output, + const InputViewType input, + WorkViewType work, + const VinvViewType vinv ) { constexpr ordinal_type spaceDim = 2; const ordinal_type @@ -49,17 +49,17 @@ getValues( OutputViewType output, } } - typedef typename Kokkos::DynRankView viewType; - auto vcprop = Kokkos::common_view_alloc_prop(work); + typedef typename Kokkos::DynRankView ViewType; + auto vcprop = Kokkos::common_view_alloc_prop(input); auto ptr = work.data(); - switch (opType) { + switch (OpType) { case OPERATOR_VALUE: { - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); - workViewType dummyView; + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i::getValues(phis, input, workView, order); + Serial::getValues(phis, input, workView, order); for (ordinal_type i=0;i(); //(orDn + 1); - const viewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); - workViewType dummyView; + const ordinal_type dkcard = getDkCardinality(); //(orDn + 1); + const ViewType phis(Kokkos::view_wrap(ptr, vcprop), card, npts, dkcard); + ViewType dummyView; Impl::Basis_HGRAD_TRI_Cn_FEM_ORTH:: - Serial::getValues(phis, input, dummyView, order); + Serial::getValues(phis, input, dummyView, order); for (ordinal_type i=0;i + void + Basis_HVOL_TRI_Cn_FEM::getScratchSpaceSize( + ordinal_type& perTeamSpaceSize, + ordinal_type& perThreadSpaceSize, + const PointViewType inputPoints, + const EOperator operatorType) const { + perTeamSpaceSize = 0; + perThreadSpaceSize = this->vinv_.extent(0)*get_dimension_scalar(inputPoints)*sizeof(typename BasisBase::scalarType); + } + + template + KOKKOS_INLINE_FUNCTION + void + Basis_HVOL_TRI_Cn_FEM::getValues( + OutputViewType outputValues, + const PointViewType inputPoints, + const EOperator operatorType, + const typename Kokkos::TeamPolicy::member_type& team_member, + const typename DT::execution_space::scratch_memory_space & scratchStorage, + const ordinal_type subcellDim, + const ordinal_type subcellOrdinal) const { + + INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)), + ">>> ERROR: (Intrepid2::Basis_HVOL_TRI_Cn_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet."); + + const int numPoints = inputPoints.extent(0); + using ScalarType = typename ScalarTraits::scalar_type; + using WorkViewType = Kokkos::DynRankView< ScalarType,typename DT::execution_space::scratch_memory_space,Kokkos::MemoryTraits >; + auto sizePerPoint = this->vinv_.extent(0)*get_dimension_scalar(inputPoints); + WorkViewType workView(scratchStorage, sizePerPoint*team_member.team_size()); + using range_type = Kokkos::pair; + switch(operatorType) { + case OPERATOR_VALUE: + Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) { + auto output = Kokkos::subview( outputValues, Kokkos::ALL(), range_type (pt,pt+1), Kokkos::ALL() ); + const auto input = Kokkos::subview( inputPoints, range_type(pt, pt+1), Kokkos::ALL() ); + WorkViewType work(workView.data() + sizePerPoint*team_member.team_rank(), sizePerPoint); + Impl::Basis_HVOL_TRI_Cn_FEM::Serial::getValues( output, input, work, this->vinv_); + }); + break; + default: { + INTREPID2_TEST_FOR_ABORT( true, + ">>> ERROR (Basis_HVOL_TRI_Cn_FEM): getValues not implemented for this operator"); + } + } + } + } // namespace Intrepid2 #endif diff --git a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp index ad2308807bf7..cb90d54c2c08 100644 --- a/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp +++ b/packages/intrepid2/src/Discretization/Integration/Intrepid2_CubatureControlVolumeSideDef.hpp @@ -113,7 +113,7 @@ namespace Intrepid2 { const auto numSideNodeMaps = (spaceDim == 2 ? 1 : 2); const ordinal_type sideOrd[2] = { 1, 5 }; - Kokkos::pair nodeRangePerSide[2]; + Kokkos::pair nodeRangePerSide[2]={}; // the second rage is cell specific to handle remained sides switch (primaryCellTopo_.getKey()) { diff --git a/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp b/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp index 26d71b652d17..7a69b74f9fa7 100644 --- a/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp +++ b/packages/intrepid2/src/Shared/Intrepid2_PolylibDef.hpp @@ -226,21 +226,22 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); - + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); JacobiPolynomialDerivative(np, z, pd, np, alpha, beta); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - //D(i*np+j) = pd(j)/(pd(i)*(z(j)-z(i))); <--- This is either a bug, or the derivative matrix is not defined consistently. - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - D(i,j) = (alpha - beta + (alpha + beta + two)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + D(i,i) = (alpha - beta + (alpha + beta + two)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -260,13 +261,8 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); - - pd(0) = pow(-one,np-1)*GammaFunction(np+beta+one); - pd(0) /= GammaFunction(np)*GammaFunction(beta+two); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); + pd(0) = pow(-one,np-1)*GammaFunction(np+beta+one) / (GammaFunction(np)*GammaFunction(beta+two)); auto pd_plus_1 = Kokkos::subview(pd, Kokkos::pair(1, pd.extent(0))); auto z_plus_1 = Kokkos::subview( z, Kokkos::pair(1, z.extent(0))); @@ -275,17 +271,22 @@ namespace Intrepid2 { for(ordinal_type i = 1; i < np; ++i) pd(i) *= (1+z(i)); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == 0) - D(i,j) = -(np + alpha + beta + one)*(np - one)/ - (two*(beta + two)); - else - D(i,j) = (alpha - beta + one + (alpha + beta + one)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == 0) + D(i,i) = -(np + alpha + beta + one)*(np - one) / (two*(beta + two)); + else + D(i,i) = (alpha - beta + one + (alpha + beta + one)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -305,29 +306,30 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); JacobiPolynomialDerivative(np-1, z, pd, np-1, alpha+1, beta); for (ordinal_type i = 0; i < np-1; ++i) pd(i) *= (1-z(i)); - pd(np-1) = -GammaFunction(np+alpha+one); - pd(np-1) /= GammaFunction(np)*GammaFunction(alpha+two); - - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == np-1) - D(i,j) = (np + alpha + beta + one)*(np - one)/ - (two*(alpha + two)); - else - D(i,j) = (alpha - beta - one + (alpha + beta + one)*z(j))/ - (two*(one - z(j)*z(j))); + pd(np-1) = -GammaFunction(np+alpha+one) / (GammaFunction(np)*GammaFunction(alpha+two)); + + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == np-1) + D(i,i) = (np + alpha + beta + one)*(np - one) / (two*(alpha + two)); + else + D(i,i) = (alpha - beta - one + (alpha + beta + one)*z_i) / (two*(one - z_i*z_i)); + } } } @@ -347,10 +349,7 @@ namespace Intrepid2 { } else { const double one = 1.0, two = 2.0; - typename zViewType::value_type pd_buf[MaxPolylibPoint]; - Kokkos::View - pd((typename zViewType::pointer_type)&pd_buf[0], MaxPolylibPoint); + auto pd = Kokkos::subview(D, np-1, Kokkos::pair(0,np)); pd(0) = two*pow(-one,np)*GammaFunction(np + beta); pd(0) /= GammaFunction(np - one)*GammaFunction(beta + two); @@ -359,24 +358,32 @@ namespace Intrepid2 { auto z_plus_1 = Kokkos::subview( z, Kokkos::pair(1, z.extent(0))); JacobiPolynomialDerivative(np-2, z_plus_1, pd_plus_1, np-2, alpha+1, beta+1); - for (ordinal_type i = 1; i < np-1; ++i) - pd(i) *= (one-z(i)*z(i)); + for (ordinal_type i = 1; i < np-1; ++i) { + const auto & z_i = z(i); + pd(i) *= (one-z_i*z_i); + } pd(np-1) = -two*GammaFunction(np + alpha); pd(np-1) /= GammaFunction(np - one)*GammaFunction(alpha + two); - for (ordinal_type i = 0; i < np; ++i) - for (ordinal_type j = 0; j < np; ++j) - if (i != j) - D(i,j) = pd(i)/(pd(j)*(z(i)-z(j))); - else - if (j == 0) - D(i,j) = (alpha - (np-1)*(np + alpha + beta))/(two*(beta+ two)); - else if (j == np-1) - D(i,j) =-(beta - (np-1)*(np + alpha + beta))/(two*(alpha+ two)); - else - D(i,j) = (alpha - beta + (alpha + beta)*z(j))/ - (two*(one - z(j)*z(j))); + // The temporary view pd is stored in the last row of the matrix D + // This loop is designed so that we do not overwrite pd entries before we read them + for (ordinal_type i = 0; i < np; ++i) { + const auto & pd_i = pd(i); + const auto & z_i = z(i); + for (ordinal_type j = 0; j < i; ++j) { + const auto & pd_j = pd(j); + const auto & z_j = z(j); + D(j,i) = pd_j/(pd_i*(z_j-z_i)); + D(i,j) = pd_i/(pd_j*(z_i-z_j)); + } + if (i == 0) + D(i,i) = (alpha - (np-1)*(np + alpha + beta))/(two*(beta+ two)); + else if (i == np-1) + D(i,i) =-(beta - (np-1)*(np + alpha + beta))/(two*(alpha+ two)); + else + D(i,i) = (alpha - beta + (alpha + beta)*z_i)/(two*(one - z_i*z_i)); + } } } @@ -591,57 +598,51 @@ namespace Intrepid2 { for (ordinal_type i = 0; i < np; ++i) polyd(i) = 0.5*(alpha + beta + two); } else { - double a1, a2, a3, a4; - const double apb = alpha + beta; + INTREPID2_TEST_FOR_ABORT(polyd.data() && !polyd.data() , + ">>> ERROR (Polylib::Serial::JacobiPolynomial): polyi view needed to compute polyd view."); + if(!polyi.data()) return; - typename polyiViewType::value_type - poly[MaxPolylibPoint]={}, polyn1[MaxPolylibPoint]={}, polyn2[MaxPolylibPoint]={}; + constexpr ordinal_type maxOrder = 2*MaxPolylibPoint-1; - if (polyi.data()) - for (ordinal_type i=0;i>> ERROR (Polylib::Serial::JacobiPolynomial): Requested order exceeds maxOrder ."); + + double a2[maxOrder-1]={}, a3[maxOrder-1]={}, a4[maxOrder-1]={}; + double ad1(0.0), ad2(0.0), ad3(0.0); + const double apb = alpha + beta; + const double amb = alpha - beta; - for (ordinal_type i = 0; i < np; ++i) { - polyn2[i] = one; - polyn1[i] = 0.5*(alpha - beta + (alpha + beta + two)*z(i)); - } for (auto k = 2; k <= n; ++k) { - a1 = two*k*(k + apb)*(two*k + apb - two); - a2 = (two*k + apb - one)*(alpha*alpha - beta*beta); - a3 = (two*k + apb - two)*(two*k + apb - one)*(two*k + apb); - a4 = two*(k + alpha - one)*(k + beta - one)*(two*k + apb); - - a2 /= a1; - a3 /= a1; - a4 /= a1; - - for (ordinal_type i = 0; i < np; ++i) { - poly [i] = (a2 + a3*z(i))*polyn1[i] - a4*polyn2[i]; - polyn2[i] = polyn1[i]; - polyn1[i] = poly [i]; - } + double a1 = two*k*(k + apb)*(two*k + apb - two); + a2[k-2] = (two*k + apb - one)*(apb*amb)/a1; + a3[k-2] = (two*k + apb - two)*(two*k + apb - one)*(two*k + apb)/a1; + a4[k-2] = two*(k + alpha - one)*(k + beta - one)*(two*k + apb)/a1; } if (polyd.data()) { - a1 = n*(alpha - beta); - a2 = n*(two*n + alpha + beta); - a3 = two*(n + alpha)*(n + beta); - a4 = (two*n + alpha + beta); - a1 /= a4; - a2 /= a4; - a3 /= a4; - - // note polyn2 points to polyn1 at end of poly iterations - for (ordinal_type i = 0; i < np; ++i) { - polyd(i) = (a1- a2*z(i))*poly[i] + a3*polyn2[i]; - polyd(i) /= (one - z(i)*z(i)); - } + double ad4 = (two*n + alpha + beta); + ad1 = n*(alpha - beta)/ad4; + ad2 = n*(two*n + alpha + beta)/ad4; + ad3 = two*(n + alpha)*(n + beta)/ad4; } - if (polyi.data()) - for (ordinal_type i=0;i::value && std::is_trivial::value) ? 0 : get_dimension_scalar(view); } + + /// Struct for deleting device instantiation + template + struct DeviceDeleter { + template + void operator()(T* ptr) { + Kokkos::parallel_for(Kokkos::RangePolicy(0,1), + KOKKOS_LAMBDA (const int i) { ptr->~T(); }); + typename Device::execution_space().fence(); + Kokkos::kokkos_free(ptr); + } + }; + + /// Function for creating a vtable on device (requires copy ctor for + /// derived object). Allocates device memory and must be called from + /// host. + template + std::unique_ptr> + copy_virtual_class_to_device(const Derived& host_source) + { + auto* p = static_cast(Kokkos::kokkos_malloc(sizeof(Derived))); + Kokkos::parallel_for(Kokkos::RangePolicy(0,1), + KOKKOS_LAMBDA (const int i) {new (p) Derived(host_source); }); + typename Device::execution_space().fence(); + return std::unique_ptr>(p); + } } // end namespace Intrepid2 #endif diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt index f080139e2292..f0c9e31cd911 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_HEX_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..cd4d6dabdf43 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_HEX_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_HEX_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..7f4e2807360d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_I1_FEM/test_02.hpp @@ -0,0 +1,187 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_HEX_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_HEX_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_HEX_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_HEX_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_HEX_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt index b682181c9d5b..2e5f6844ed27 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -83,3 +86,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_HEX_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..41f15e65574e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_HEX_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_HEX_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp new file mode 100644 index 000000000000..e9ecf8ca65a4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_HEX_In_FEM/test_02.hpp @@ -0,0 +1,203 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_HEX_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_HEX_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_HEX_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_HEX_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_HEX_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt index 716000daf9b3..89117d0742fb 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_QUAD_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..01d5359f6b02 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_QUAD_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_QUAD_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..ecef9a26d9d4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_QUAD_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_QUAD_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_QUAD_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_QUAD_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_QUAD_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt index c831e83ec896..2f44c158238f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_QUAD_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..daa3176be226 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_QUAD_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_QUAD_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp new file mode 100644 index 000000000000..2ae8438a11ac --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_QUAD_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_QUAD_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_QUAD_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_QUAD_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_QUAD_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_QUAD_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt index 46e84774b70d..234b8e2d6fed 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TET_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..8fadc4a2c865 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TET_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TET_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..9c112664aff0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_I1_FEM/test_02.hpp @@ -0,0 +1,187 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TET_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TET_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TET_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TET_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_TET_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt index 46e4453c0d57..c40f3503ccf9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TET_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..278f9326b54c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TET_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TET_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp new file mode 100644 index 000000000000..d51a4ed29ae1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TET_In_FEM/test_02.hpp @@ -0,0 +1,205 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TET_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TET_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TET_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TET_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 7; + + try { + for (int order=1;order <= maxOrder;++order) { + using BasisType = Basis_HCURL_TET_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + // avoid using a team size larger than needed, to reduce allocated scrach space memory + ordinal_type team_size = teamPolicy.team_size_recommended(functor, Kokkos::ParallelForTag()); + *outStream << "Max Recommended team size: " << team_size << ", Requested team size: " << npts <(ncells, team_size,vectorSize); + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1.0e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt index 2bf7bfdee691..b87adda0a338 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TRI_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..2fe8396db2d1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TRI_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TRI_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..3b255303dd1f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TRI_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TRI_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TRI_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TRI_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_TRI_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt index 49f5b786efc9..ec30d2154004 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HCURL_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_TRI_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HCURL_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..509fff60809f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_TRI_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_TRI_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp new file mode 100644 index 000000000000..aa19ce2114e0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_TRI_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_TRI_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_TRI_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_TRI_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_TRI_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HCURL_TRI_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j)<<"]" + << ", curls B: [" << outputCurlsB_Host(i,j) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt index ba0496748a48..cb1ebf7b3de1 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HCURL_WEDGE_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1b963155651c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HCURL_WEDGE_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HCURL_WEDGE_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..de75f4cf2d72 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HCURL_WEDGE_I1_FEM/test_02.hpp @@ -0,0 +1,188 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HCURL_WEDGE_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HCURL_WEDGE_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HCURL_WEDGE_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HCURL_WEDGE_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HCURL_WEDGE_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and curls for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and curls for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and curls on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0)<< ", " << outputCurlsA_Host(ic,i,j,1) << ", " << outputCurlsA_Host(ic,i,j,2) << "]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1)<< ", " << outputCurlsB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt index fd4d688a591d..3fb8fc747f9c 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_HEX_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..ab24cfec247d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_HEX_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_HEX_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..fb05ad186945 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_I1_FEM/test_02.hpp @@ -0,0 +1,186 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_HEX_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_HEX_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_HEX_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_HEX_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_HEX_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt index a0e677500751..7a81181c8403 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_HEX_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_HEX_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_HEX_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..71b715c78833 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_HEX_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_HEX_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp new file mode 100644 index 000000000000..61c3d844f5dd --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_HEX_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_HEX_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_HEX_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + + template + int HDIV_HEX_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_HEX_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_HEX_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt index 5900fa72e32a..b21760f88ec4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_QUAD_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..5e1eb4c2dc79 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_QUAD_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_QUAD_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..a811df7230c7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_QUAD_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_QUAD_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_QUAD_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_QUAD_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_QUAD_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt index 59d15e7c716c..cdc2989d6036 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_QUAD_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_QUAD_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..328d40fda920 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_QUAD_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_QUAD_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp new file mode 100644 index 000000000000..529007a97787 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_QUAD_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_QUAD_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_QUAD_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_QUAD_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_QUAD_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_QUAD_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt index ea49b4cde715..6b8c89a459f9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TET_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..72f12aed7a9b --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TET_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TET_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..2da333826892 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TET_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TET_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TET_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TET_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_TET_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt index 264ce7c056ff..f4a2093e0e4f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TET_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_TET_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..c08e06044acf --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TET_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TET_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp new file mode 100644 index 000000000000..1d5f9059327c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TET_In_FEM/test_02.hpp @@ -0,0 +1,190 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TET_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TET_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TET_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TET_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 7; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_TET_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt index 4f47ee20c141..581f594311e0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/CMakeLists.txt @@ -1,13 +1,18 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") - + IF (HAVE_INTREPID2_SACADO) # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DOUBLE") # LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,76 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TRI_I1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..99b3fb273163 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TRI_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TRI_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..15b4152a781a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TRI_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TRI_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TRI_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TRI_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_TRI_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt index 4f11a0b1e70c..f06b5f1bb859 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HDIV_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_TRI_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HDIV_TRI_In_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..060c322dc641 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_TRI_In_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_TRI_In_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp new file mode 100644 index 000000000000..1d3c940090d7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_TRI_In_FEM/test_02.hpp @@ -0,0 +1,189 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_TRI_In_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_TRI_In_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_TRI_In_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_TRI_In_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HDIV_TRI_In_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Order: " << order << ": Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt index 67fcf71311f7..d801c634869e 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,3 +71,75 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HDIV_WEDGE_In_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..16906a746c00 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HDIV_WEDGE_I1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HDIV_WEDGE_I1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp new file mode 100644 index 000000000000..c991769a4852 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HDIV_WEDGE_I1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HDIV_WEDGE_I1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HDIV_WEDGE_I1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HDIV_WEDGE_I1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HDIV_WEDGE_I1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HDIV_WEDGE_I1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputDivergencesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and divergences for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute divergences + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto divergencesACell = Kokkos::subview(outputDivergencesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(divergencesACell, inputPoints, OPERATOR_DIV, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_DIV); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and divergences for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputDivergencesB, inputPoints, OPERATOR_DIV); + + *outStream << "Comparing values and divergences on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: [" << outputValuesA_Host(ic,i,j,0) << ", " << outputValuesA_Host(ic,i,j,1) << ", " << outputValuesA_Host(ic,i,j,2) << "]" + << ", val B: [" << outputValuesB_Host(i,j,0) << ", " << outputValuesB_Host(i,j,1) << ", " << outputValuesB_Host(i,j,2) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare divergences + const auto outputDivergencesA_Host = Kokkos::create_mirror_view(outputDivergencesA); Kokkos::deep_copy(outputDivergencesA_Host, outputDivergencesA); + const auto outputDivergencesB_Host = Kokkos::create_mirror_view(outputDivergencesB); Kokkos::deep_copy(outputDivergencesB_Host, outputDivergencesB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", divergence A: " << outputDivergencesA_Host(ic,i,j) + << ", divergence B: " << outputDivergencesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt index 88da0999c2ab..4982bd4f8dff 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/CMakeLists.txt @@ -3,6 +3,7 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -67,6 +68,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") + IF(Kokkos_ENABLE_CUDA) LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") @@ -106,3 +108,75 @@ IF (${ETI_DEVICE_COUNT} GREATER_EQUAL 0) ENDFOREACH() ENDFOREACH() ENDIF() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_03") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_HEX_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_HEX_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in new file mode 100644 index 000000000000..a88bf31183c7 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/eti/test_03_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_03.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_03.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_HEX_C1_FEM_Test03<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp new file mode 100644 index 000000000000..9d326a80cf33 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C1_FEM/test_03.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_03.hpp + \brief Unit tests for the Intrepid2::HGRAD_HEX_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_HEX_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_HEX_C1_FEM_Test03(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_C1_FEM, Test 3", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_HEX_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt index a9bfc7f38abb..29e3244386c9 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_HEX_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_HEX_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..f10b05aa223e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_HEX_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..a29875462280 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_HEX_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_HEX_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_HEX_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_HEX_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt index 793d773f707f..ba86fece89df 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in index 487708632660..25426631d6de 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_01_ETI.in @@ -28,9 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -constexpr int num_deriv = 9; -constexpr int max_deriv = 10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in index 6a200d58b21d..d314677fd1db 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/eti/test_02_ETI.in @@ -7,22 +7,45 @@ // ***************************************************************************** // @HEADER -/** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_Cn_FEM. +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_HEX_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp index b98955113bde..e392f1540447 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_HEX_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_HEX_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,100 +23,151 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_HEX_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_HEX_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_HEX_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - // for higher orders in certain environments, this test can take a while to run in ctest. So we reduce the number of points as we go beyond 10th order. Also, @mperego is replacing this test, so for now we just restrict to the 10 orders we supported until recently. - for (int order=1;order<10;++order) { - Basis_HGRAD_HEX_Cn_FEM basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_HEX_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 3; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); - // random values between (-1,1) x (-1,1) + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*4; - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); - - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_HEX_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;i 1.0e-9)) { - std::cout << " order = " << order - << " i = " << i << " j = " << j - << " val A = " << outputValuesA_Host(i,j) - << " val B = " << outputValuesB_Host(i,j) - << " diff = " << (outputValuesA_Host(i,j) - outputValuesB_Host(i,j)) - << std::endl; - } + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); } - if (diff/sum > 1.0e-9) { - errorFlag = -1; + } + + *outStream << "Order: " << order << ": Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt index 940d4ad3ebb4..186c0369d09e 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + # MESSAGE(STATUS "Generating TEST HGRAD_LINE_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..69bb74d6a746 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_C1_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..c0d1db740ce9 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C1_FEM/test_02.hpp @@ -0,0 +1,185 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + template + int HGRAD_LINE_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_LINE_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + // problem setup + // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. + // A is compuated via serial interface and B is computed with top-level interface. + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt index f26e93eb35f6..47fc663ffd89 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_LINE_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..76e7d225aa79 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_C2_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..7c40e6e00dd1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_LINE_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_LINE_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt index 40be3eb7ddf0..088e2285ac8f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_LINE_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in index 070fba1f3916..b662965e7493 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_01_ETI.in @@ -28,10 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..2ff629694b47 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_LINE_Cn_FEM team-level get Values. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_LINE_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..23dafa935f7e --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_LINE_Cn_FEM/test_02.hpp @@ -0,0 +1,188 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_LINE_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_LINE_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function.s + template + int HGRAD_LINE_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_LINE_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + for (int order=1;order<=Parameters::MaxOrder;++order) { + using BasisType = Basis_HGRAD_LINE_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt index aac1913c1e91..60b3eaa1ed20 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_PYR_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_PYR_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..ac8e1393df20 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_PYR_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_PYR_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..ae1ba8b9b47d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_PYR_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_PYR_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_PYR_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_PYR_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_PYR_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt index fdbf58124c2e..813910ef9f3d 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,7 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") - +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -68,6 +71,79 @@ FOREACH(I RANGE ${ETI_DEVICE_COUNT}) ENDFOREACH() ENDFOREACH() + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + TRIBITS_COPY_FILES_TO_BINARY_DIR(HGRAD_PYR_I2TestDataCopy SOURCE_FILES PYR_I2_D2Vals.dat diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..b1bc686c303d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_PYR_I2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_PYR_I2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp new file mode 100644 index 000000000000..39b7903b384a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_PYR_I2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_PYR_I2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_PYR_I2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_PYR_I2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_PYR_I2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_PYR_I2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt index ef324d6c681f..593042946b82 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_QUAD_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_QUAD_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..7650cb60968c --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_C1_FEM. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_QUAD_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..2dba47d6f022 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C1_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_QUAD_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_QUAD_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_QUAD_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_QUAD_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt index 6d92bb337ac4..14d863a19fc7 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_QUAD_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_QUAD_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..3dac2095b0b6 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_C2_FEM. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_QUAD_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..cf2ba0043d7b --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_C2_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_QUAD_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_QUAD_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + namespace Test { + + template + int HGRAD_QUAD_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_QUAD_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt index 9a6190ea4405..50f38bf22177 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in index 46bd4b13ccfb..0cae06ee9e31 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_01_ETI.in @@ -28,9 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -constexpr int num_deriv = 9; -constexpr int max_deriv = 10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in index ccb60ba60798..07adf2c5e888 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/eti/test_02_ETI.in @@ -8,21 +8,44 @@ // @HEADER /** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_QUAD_Cn_FEM. + \brief Unit test of Intrepid2::Basis_HGRAD_QUAD_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp index 36a858dec901..2e2bdf715470 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_QUAD_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_QUAD_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,98 +23,196 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_QUAD_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_QUAD_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_QUAD_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - for (int order=1;order basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_QUAD_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 2; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + using ScalarType = typename ScalarTraits::scalar_type; - // random values between (-1,1) x (-1,1) + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*3; - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_QUAD_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt index 37135caa841f..325ab37afd81 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..7fab72655932 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..48d5b3eb9e2f --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TET_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt index 2d0041218982..cac75a8505b4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..b01c59418753 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..5b788cc85328 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_C2_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + namespace Test { + + template + int HGRAD_TET_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt index 09fddb77ac02..3bc181264b2b 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TET_COMP12_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TET_COMP12_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..c7a155d2f8a9 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TET_COMP12_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TET_COMP12_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp new file mode 100644 index 000000000000..9120a9bf8b53 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_COMP12_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TET_COMP12_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TET_COMP12_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TET_COMP12_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_COMP12_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TET_COMP12_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt index cb2c34d9f2e6..b669b1b2ba1f 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in index 74f1bccc00db..c997523b3120 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_01_ETI.in @@ -28,10 +28,8 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in index c26586d323cf..a6b3263eb7c0 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/eti/test_02_ETI.in @@ -8,21 +8,44 @@ // @HEADER /** \file test_01.cpp - \brief Unit test of serial interface Intrepid2::Basis_HGRAD_TET_Cn_FEM. + \brief Unit test of Intrepid2::Basis_HGRAD_TET_Cn_FEM team-level getValues. \author Kyungjoo Kim */ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp index 4f6c6c3a33e0..711fd6d35bdb 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TET_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_TET_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,99 +23,153 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_TET_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_TET_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TET_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 7; try { - for (int order=1;order<10;++order) { - Basis_HGRAD_TET_Cn_FEM basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_TET_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 3; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); - // random values between (-1,1) x (-1,1) + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*(Parameters::MaxOrder+1)*(Parameters::MaxOrder+1); - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); - - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_TET_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + + //Note, the PR intel 2021 serial build shows substantially higher errors (possibly due to operation rearrangements). + auto tol = 1.0e6*epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt index eb726da6bb26..c5f307a89f52 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TRI_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TRI_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1a918203d7cc --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TRI_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TRI_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..928394ded0a4 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C1_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_TRI_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TRI_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TRI_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TRI_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt index 21c4f220d58a..ae831c937e39 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_TRI_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_TRI_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..cd49ca800b02 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_TRI_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_TRI_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..60b8c49aa454 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_C2_FEM/test_02.hpp @@ -0,0 +1,228 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.hpp + \brief Unit tests for the Intrepid2::HGRAD_TRI_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_TRI_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_TRI_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_TRI_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_CURL); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt index 28b96612c334..4855e54c85a1 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/CMakeLists.txt @@ -7,6 +7,7 @@ SET(Intrepid2_TEST_ETI_FILE "test_01") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -94,11 +95,18 @@ SET(Intrepid2_TEST_ETI_FILE "test_02") # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in index 513cf7cb37bc..eaaead469fb6 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_01_ETI.in @@ -14,7 +14,6 @@ #include "Kokkos_Core.hpp" - #define ETI_SACADO @ETI_SACADO@ #if (ETI_SACADO != 0) /// SACADO #include "Kokkos_ViewFactory.hpp" @@ -29,16 +28,15 @@ constexpr int num_deriv = 10; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 23) -/// Mauro, the master branch uses this derivative dimension which sounds dummy -constexpr int num_deriv = 0; //9; -constexpr int max_deriv = 1; //10; -#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, max_deriv+1) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #elif (ETI_SACADO == 20) constexpr int num_deriv = 2; #define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) #define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) #endif + #include "test_01.hpp" int main(int argc, char *argv[]) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in index c78997d6ea0c..a5343a485d3a 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/eti/test_02_ETI.in @@ -14,15 +14,38 @@ #include "Kokkos_Core.hpp" +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + #include "test_02.hpp" int main(int argc, char *argv[]) { + const bool verbose = (argc-1) > 0; Kokkos::initialize(); - { - const bool verbose = (argc-1) > 0; - Intrepid2::Test::HGRAD_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); - } + + Intrepid2::Test::HGRAD_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + Kokkos::finalize(); return 0; } diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp index 80d75c9bf099..a82178b45f9a 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_TRI_Cn_FEM/test_02.hpp @@ -7,9 +7,9 @@ // ***************************************************************************** // @HEADER -/** \file test_01.hpp +/** \file test_02.hpp \brief Unit tests for the Intrepid2::HGRAD_TRI_Cn_FEM class. - \author Created by P. Bochev, D. Ridzal, K. Peterson, Kyungjoo Kim + \author Created by Kyungjoo Kim, Mauro Perego */ @@ -23,99 +23,196 @@ #include "Intrepid2_Utils.hpp" #include "Intrepid2_HGRAD_TRI_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" namespace Intrepid2 { namespace Test { - // This code provides an example to use serial interface of high order elements + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. template int HGRAD_TRI_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_TRI_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + using DeviceSpaceType = typename DeviceType::execution_space; Kokkos::print_configuration(std::cout, false); int errorFlag = 0; - + constexpr int maxOrder = 9; try { - for (int order=1;order basis(order); + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HGRAD_TRI_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); - // problem setup - // let's say we want to evaluate 1000 points in parallel. output values are stored in outputValuesA and B. - // A is compuated via serial interface and B is computed with top-level interface. - const int npts = 1000, ndim = 2; - Kokkos::DynRankView outputValuesA("outputValuesA", basis.getCardinality(), npts); - Kokkos::DynRankView outputValuesB("outputValuesB", basis.getCardinality(), npts); + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputCurlsB, basisPtr->getCardinality(), npts, ndim); + + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); - Kokkos::View inputPointsViewToUseRandom("inputPoints", npts, ndim); - Kokkos::DynRankView inputPoints (inputPointsViewToUseRandom.data(), npts, ndim); + using ScalarType = typename ScalarTraits::scalar_type; - // random values between (-1,1) x (-1,1) + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) Kokkos::Random_XorShift64_Pool random(13718); Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); - // compute setup - // we need vinv and workspace - const auto vinv = basis.getVandermondeInverse(); - - // worksize - // workspace per thread is required for serial interface. - // parallel_for with range policy would be good to use stack workspace - // as team policy only can create shared memory - // this part would be tricky as the max size should be determined at compile time - // let's think about this and find out the best practice. for now I use the following. - constexpr int worksize = (Parameters::MaxOrder+1)*(Parameters::MaxOrder+1); - - // if you use team policy, worksize can be gathered from the basis object and use - // kokkos shmem_size APIs to create workspace per team or per thread. - //const auto worksize_for_teampolicy = basis.getWorksizePerPoint(OPERATOR_VALUE); - - // extract point range to be evaluated in each thread - typedef Kokkos::pair range_type; - - // parallel execution with serial interface - Kokkos::RangePolicy policy(0, npts); - Kokkos::parallel_for(policy, KOKKOS_LAMBDA(int i) { - // we evaluate a single point - const range_type pointRange = range_type(i,i+1); + + *outStream << "Order: " << order << ": Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; - // out (# dofs, # pts), input (# pts, # dims) - auto output = Kokkos::subview(outputValuesA, Kokkos::ALL(), pointRange); - auto input = Kokkos::subview(inputPoints, pointRange, Kokkos::ALL()); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; - // wrap static workspace with a view; serial interface has a template view interface. - // either view or dynrankview with a right size is okay. - OutValueType workbuf[worksize]; - Kokkos::View work(&workbuf[0], worksize); + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute curls + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto curlsACell = Kokkos::subview(outputCurlsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(curlsACell, inputPoints, OPERATOR_CURL, team_member, team_member.team_scratch(scratch_space_level)); + }; - // evaluate basis using serial interface - Impl::Basis_HGRAD_TRI_Cn_FEM - ::Serial::getValues(output, input, work, vinv); - }); - - // evaluation using high level interface - basis.getValues(outputValuesB, inputPoints, OPERATOR_VALUE); - - // compare - const auto outputValuesA_Host = Kokkos::create_mirror_view(outputValuesA); Kokkos::deep_copy(outputValuesA_Host, outputValuesA); - const auto outputValuesB_Host = Kokkos::create_mirror_view(outputValuesB); Kokkos::deep_copy(outputValuesB_Host, outputValuesB); - - double sum = 0, diff = 0; - for (size_t i=0;igetValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + basisPtr->getValues(outputCurlsB, inputPoints, OPERATOR_CURL); + + *outStream << "Order: " << order << ": Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << "]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare curls + const auto outputCurlsA_Host = Kokkos::create_mirror_view(outputCurlsA); Kokkos::deep_copy(outputCurlsA_Host, outputCurlsA); + const auto outputCurlsB_Host = Kokkos::create_mirror_view(outputCurlsB); Kokkos::deep_copy(outputCurlsB_Host, outputCurlsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", curls A: [" << outputCurlsA_Host(ic,i,j,0) << ", " << outputCurlsA_Host(ic,i,j,1) <<"]" + << ", curls B: [" << outputCurlsB_Host(i,j,0) << ", " << outputCurlsB_Host(i,j,1) << "]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } } } } catch (std::exception &err) { diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt index a32463e45988..4b1a7626d4ff 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,9 +22,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "test_01") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_WEDGE_C1_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_WEDGE_C1_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..759d2a05ad2a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_WEDGE_C1_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_WEDGE_C1_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp new file mode 100644 index 000000000000..33f9bb85137d --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C1_FEM/test_02.hpp @@ -0,0 +1,184 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_WEDGE_C1_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_WEDGE_C1_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HGRAD_WEDGE_C1_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_WEDGE_C1_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_WEDGE_C1_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt index 759b1bc6bc22..f82d6a4f7fc4 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/CMakeLists.txt @@ -1,8 +1,16 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "") +LIST(APPEND Intrepid2_TEST_ETI_FILE + "test_01" + "test_01_Serendipity") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double") @@ -17,12 +25,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") -# Host test -SET(Intrepid2_TEST_ETI_FILE "") -LIST(APPEND Intrepid2_TEST_ETI_FILE - "test_01" - "test_01_Serendipity") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HGRAD_WEDGE_C2_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HGRAD_WEDGE_C2_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + +# test +SET(Intrepid2_TEST_ETI_FILE "test_02") + +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -83,4 +159,3 @@ TRIBITS_COPY_FILES_TO_BINARY_DIR(HGRAD_WEDGE_C2TestDataCopy SOURCE_DIR "${CMAKE_CURRENT_SOURCE_DIR}/testdata" DEST_DIR "${CMAKE_CURRENT_BINARY_DIR}/testdata" ) - diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..86de2ff60329 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_01.cpp + \brief Unit test of Intrepid2::Basis_HGRAD_WEDGE_C2_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HGRAD_WEDGE_C2_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp new file mode 100644 index 000000000000..35f0e18c3dbe --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HGRAD_WEDGE_C2_FEM/test_02.hpp @@ -0,0 +1,182 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HGRAD_WEDGE_C2_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HGRAD_WEDGE_C2_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + template + int HGRAD_WEDGE_C2_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HGRAD_WEDGE_C2_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + try { + using BasisType = Basis_HGRAD_WEDGE_C2_FEM; + auto basisPtr = Teuchos::rcp(new BasisType()); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsA, ncells, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelOutView(outputGradsB, basisPtr->getCardinality(), npts, ndim); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Computing values and gradients for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + + { //compute gradients + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto gradsACell = Kokkos::subview(outputGradsA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(gradsACell, inputPoints, OPERATOR_GRAD, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_GRAD); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Computing values and gradients for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + basisPtr->getValues(outputGradsB, inputPoints, OPERATOR_GRAD); + + *outStream << "Comparing values and gradients on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + + { + // compare grads + const auto outputGradsA_Host = Kokkos::create_mirror_view(outputGradsA); Kokkos::deep_copy(outputGradsA_Host, outputGradsA); + const auto outputGradsB_Host = Kokkos::create_mirror_view(outputGradsB); Kokkos::deep_copy(outputGradsB_Host, outputGradsB); + + OutValueType diff = 0; + auto tol = epsilon(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", grads A: [" << outputGradsA_Host(ic,i,j,0) << ", " << outputGradsA_Host(ic,i,j,1) << ", " << outputGradsA_Host(ic,i,j,2) <<"]" + << ", grads B: [" << outputGradsB_Host(i,j,0) << ", " << outputGradsB_Host(i,j,1) << ", " << outputGradsB_Host(i,j,2) <<"]" + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt index ebf2144cc03f..fa03caf7d02b 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_HEX_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_HEX_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_HEX_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..766fea986506 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_HEX_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_HEX_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..115a371bc82a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_HEX_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_HEX_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_HEX_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_HEX_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_HEX_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_HEX_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt index f5caa97dcaf1..24d663415a74 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_LINE_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_LINE_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..a6f42f8ba7b8 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_LINE_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_LINE_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..c71f401eda49 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_LINE_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_LINE_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_LINE_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_LINE_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_LINE_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_LINE_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 1; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt index 769157316641..aedb132dcc82 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_QUAD_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_QUAD_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_QUAD_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..1c01cd896135 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_QUAD_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_QUAD_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..044f8fad53a0 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_QUAD_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_QUAD_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_QUAD_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_QUAD_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_QUAD_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_QUAD_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt index 0f61f9a7cfff..7dfea7c9986c 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_TET_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_TET_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_TET_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..d15050227457 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_TET_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_TET_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..d27c666355c1 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TET_Cn_FEM/test_02.hpp @@ -0,0 +1,144 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_TET_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_TET_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_TET_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_TET_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_TET_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 3; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt index 700140bb8337..49398919d584 100644 --- a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/CMakeLists.txt @@ -1,8 +1,13 @@ TRIBITS_INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# test +SET(Intrepid2_TEST_ETI_FILE "test_01") + # value types SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") @@ -32,9 +37,80 @@ ENDIF() LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") +# device +SET(Intrepid2_TEST_ETI_DEVICE_NAME "") +SET(Intrepid2_TEST_ETI_DEVICE "") +IF(Kokkos_ENABLE_SERIAL) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "Serial") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_OPENMP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "OpenMP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_CUDA) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "CUDA") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() +IF(Kokkos_ENABLE_HIP) + LIST(APPEND Intrepid2_TEST_ETI_DEVICE_NAME "HIP") + LIST(APPEND Intrepid2_TEST_ETI_DEVICE "Kokkos::Device") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_DEVICE_NAME ETI_DEVICE_COUNT) +MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") + +FOREACH(I RANGE ${ETI_DEVICE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) + LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) + #MESSAGE(STATUS "Generating TEST HVOL_TRI_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") + FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) + LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) + LIST(GET Intrepid2_TEST_ETI_SACADO ${J} ETI_SACADO) + FOREACH(ETI_FILE IN LISTS Intrepid2_TEST_ETI_FILE) + SET(ETI_NAME "${ETI_FILE}_${ETI_DEVICE_NAME}_${ETI_VALUETYPE_NAME}") + MESSAGE(STATUS "Generating TEST: HVOL_TRI_Cn_FEM ${ETI_NAME}.cpp") + CONFIGURE_FILE(eti/${ETI_FILE}_ETI.in ${ETI_NAME}.cpp) + + TRIBITS_ADD_EXECUTABLE_AND_TEST( + ${ETI_NAME} + SOURCES ${ETI_NAME}.cpp + ARGS PrintItAll + NUM_MPI_PROCS 1 + PASS_REGULAR_EXPRESSION "TEST PASSED" + ADD_DIR_TO_NAME + ) + + ENDFOREACH() + ENDFOREACH() +ENDFOREACH() + + + + # test -SET(Intrepid2_TEST_ETI_FILE "test_01") +SET(Intrepid2_TEST_ETI_FILE "test_02") +# value types +SET(Intrepid2_TEST_ETI_VALUETYPE_NAME "") +SET(Intrepid2_TEST_ETI_VALUETYPE "") +SET(Intrepid2_TEST_ETI_SACADO "") + +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DOUBLE_DOUBLE") +LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "double,double") +LIST(APPEND Intrepid2_TEST_ETI_SACADO "0") + +IF (HAVE_INTREPID2_SACADO) + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE_NAME "DFAD_DFAD") + LIST(APPEND Intrepid2_TEST_ETI_VALUETYPE "Sacado::Fad::DFad,Sacado::Fad::DFad ") + LIST(APPEND Intrepid2_TEST_ETI_SACADO "33") +ENDIF() + +LIST(LENGTH Intrepid2_TEST_ETI_VALUETYPE_NAME ETI_VALUETYPE_COUNT) +MATH(EXPR ETI_VALUETYPE_COUNT "${ETI_VALUETYPE_COUNT}-1") + +# device SET(Intrepid2_TEST_ETI_DEVICE_NAME "") SET(Intrepid2_TEST_ETI_DEVICE "") IF(Kokkos_ENABLE_SERIAL) @@ -60,7 +136,6 @@ MATH(EXPR ETI_DEVICE_COUNT "${ETI_DEVICE_COUNT}-1") FOREACH(I RANGE ${ETI_DEVICE_COUNT}) LIST(GET Intrepid2_TEST_ETI_DEVICE_NAME ${I} ETI_DEVICE_NAME) LIST(GET Intrepid2_TEST_ETI_DEVICE ${I} ETI_DEVICE) - #MESSAGE(STATUS "Generating TEST HVOL_TRI_Cn_FEM for ${ETI_DEVICE_NAME} with ${ETI_DEVICE}") FOREACH(J RANGE ${ETI_VALUETYPE_COUNT}) LIST(GET Intrepid2_TEST_ETI_VALUETYPE_NAME ${J} ETI_VALUETYPE_NAME) LIST(GET Intrepid2_TEST_ETI_VALUETYPE ${J} ETI_VALUETYPE) diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in new file mode 100644 index 000000000000..4b98bc03263a --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/eti/test_02_ETI.in @@ -0,0 +1,52 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.cpp + \brief Unit test of Intrepid2::Basis_HVOL_TRI_Cn_FEM team-level getValues. + \author Kyungjoo Kim +*/ + +#include "Kokkos_Core.hpp" + +#define ETI_SACADO @ETI_SACADO@ +#if (ETI_SACADO != 0) /// SACADO +#include "Kokkos_ViewFactory.hpp" +#include "Sacado.hpp" +#endif + +#if (ETI_SACADO == 0) /// double double +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#elif (ETI_SACADO == 11 /* SFAD SFAD */ || ETI_SACADO == 33 /* DFAD DFAD */) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 23) +constexpr int num_deriv = 3; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#elif (ETI_SACADO == 20) +constexpr int num_deriv = 2; +#define ConstructWithLabelOutView(obj, ...) obj(#obj, __VA_ARGS__, num_deriv+1) +#define ConstructWithLabelPointView(obj, ...) obj(#obj, __VA_ARGS__) +#endif + +#include "test_02.hpp" + +int main(int argc, char *argv[]) { + + const bool verbose = (argc-1) > 0; + Kokkos::initialize(); + + Intrepid2::Test::HVOL_TRI_Cn_FEM_Test02<@ETI_VALUETYPE@,@ETI_DEVICE@>(verbose); + + Kokkos::finalize(); + return 0; +} + diff --git a/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp new file mode 100644 index 000000000000..aaefaa951c33 --- /dev/null +++ b/packages/intrepid2/unit-test/Discretization/Basis/HVOL_TRI_Cn_FEM/test_02.hpp @@ -0,0 +1,145 @@ +// @HEADER +// ***************************************************************************** +// Intrepid2 Package +// +// Copyright 2007 NTESS and the Intrepid2 contributors. +// SPDX-License-Identifier: BSD-3-Clause +// ***************************************************************************** +// @HEADER + +/** \file test_02.hpp + \brief Unit tests for the Intrepid2::HVOL_TRI_Cn_FEM class. + \author Created by Kyungjoo Kim, Mauro Perego + */ + + +#include "Intrepid2_config.h" +#include "Kokkos_Random.hpp" +#ifdef HAVE_INTREPID2_DEBUG +#define INTREPID2_TEST_FOR_DEBUG_ABORT_OVERRIDE_TO_CONTINUE +#endif + +#include "Intrepid2_Types.hpp" +#include "Intrepid2_Utils.hpp" + +#include "Intrepid2_HVOL_TRI_Cn_FEM.hpp" +#include "packages/intrepid2/unit-test/Discretization/Basis/Setup.hpp" + +namespace Intrepid2 { + + namespace Test { + + // This test evaluates the basis functions at a set of points on a batch of cells using the team-level getValues, + // and compares the results with those obtained using the classic getValues function. + template + int HVOL_TRI_Cn_FEM_Test02(const bool verbose) { + + //! Setup test output stream. + Teuchos::RCP outStream = setup_output_stream( + verbose, "HVOL_TRI_Cn_FEM, Test 2", {} + ); + + *outStream + << "\n" + << "===============================================================================\n" + << "| Testing Team-level Implemntation of getValues |\n" + << "===============================================================================\n"; + + using DeviceSpaceType = typename DeviceType::execution_space; + Kokkos::print_configuration(std::cout, false); + + int errorFlag = 0; + + constexpr int maxOrder = 9; + try { + for (int order=1;order<=maxOrder;++order) { + using BasisType = Basis_HVOL_TRI_Cn_FEM; + auto basisPtr = Teuchos::rcp(new BasisType(order)); + + const int ncells = 5, npts = 10, ndim = 2; + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesA, ncells, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelOutView(outputValuesB, basisPtr->getCardinality(), npts); + Kokkos::DynRankView ConstructWithLabelPointView(point, 1); + + using ScalarType = typename ScalarTraits::scalar_type; + + Kokkos::View inputPointsViewToUseRandom("inputPoints", npts*ndim*get_dimension_scalar(point)); + auto vcprop = Kokkos::common_view_alloc_prop(point); + Kokkos::DynRankView inputPoints (Kokkos::view_wrap(inputPointsViewToUseRandom.data(), vcprop), npts, ndim); + + // random values between (0,1) + Kokkos::Random_XorShift64_Pool random(13718); + Kokkos::fill_random(inputPointsViewToUseRandom, random, 1.0); + + + *outStream << "Order: " << order << ": Computing values for " << ncells << " cells and " << npts << " points using team-level getValues function" <(*basisPtr); + auto basisRawPtr_device = basisPtr_device.get(); + + int scratch_space_level =1; + const int vectorSize = getVectorSizeForHierarchicalParallelism(); + Kokkos::TeamPolicy teamPolicy(ncells, Kokkos::AUTO,vectorSize); + + { //compute values + auto functor = KOKKOS_LAMBDA (typename Kokkos::TeamPolicy::member_type team_member) { + auto valsACell = Kokkos::subview(outputValuesA, team_member.league_rank(), Kokkos::ALL(), Kokkos::ALL(), Kokkos::ALL()); + basisRawPtr_device->getValues(valsACell, inputPoints, OPERATOR_VALUE, team_member, team_member.team_scratch(scratch_space_level)); + }; + + //Get the required size of the scratch space per team and per thread. + int perThreadSpaceSize(0), perTeamSpaceSize(0); + basisPtr->getScratchSpaceSize(perTeamSpaceSize,perThreadSpaceSize,inputPoints, OPERATOR_VALUE); + teamPolicy.set_scratch_size(scratch_space_level, Kokkos::PerTeam(perTeamSpaceSize), Kokkos::PerThread(perThreadSpaceSize)); + + Kokkos::parallel_for (teamPolicy,functor); + } + } + + *outStream << "Order: " << order << ": Computing values for " << npts << " points using high-level getValues function" <getValues(outputValuesB, inputPoints, OPERATOR_VALUE); + + *outStream << "Order: " << order << ": Comparing values on host" <(); + for (size_t ic=0;ic tol) { + ++errorFlag; + std::cout << " order: " << order + << ", ic: " << ic << ", i: " << i << ", j: " << j + << ", val A: " << outputValuesA_Host(ic,i,j) + << ", val B: " << outputValuesB_Host(i,j) + << ", |diff|: " << diff + << ", tol: " << tol + << std::endl; + } + } + } + } + } catch (std::exception &err) { + std::cout << "UNEXPECTED ERROR !!! ----------------------------------------------------------\n"; + std::cout << err.what() << '\n'; + std::cout << "-------------------------------------------------------------------------------" << "\n\n"; + errorFlag = -1000; + }; + + if (errorFlag != 0) + std::cout << "End Result: TEST FAILED\n"; + else + std::cout << "End Result: TEST PASSED\n"; + + return errorFlag; + } + } +} diff --git a/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp b/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp index 5f71f3b5e376..4dcc02da30ce 100644 --- a/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp +++ b/packages/intrepid2/unit-test/Shared/Polylib/test_01.hpp @@ -233,7 +233,8 @@ namespace Intrepid2 { outStream->precision(5); - const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; // npUpper: 31 right now + const ordinal_type npLower = 5, npUpper = Polylib::MaxPolylibPoint; + const ordinal_type npUpperStep1 = 21; // we cover all np values from npLower to npUpperStep1; we only cover every 5th one after that const ValueType tol = 1000.0 * tolerence(); const double lowOrderTol = tol; const double highOrderTol = tol * 100; @@ -268,7 +269,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - for (auto np = npLower; np <= npUpper; ++np){ + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep){ const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); @@ -281,6 +283,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " integral was " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; } @@ -296,7 +299,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - for (auto np = npLower; np <= npUpper; ++np) { + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep) { Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); const double localTol = (np > 20) ? highOrderTol : lowOrderTol; @@ -316,6 +320,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " difference " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; } @@ -331,8 +336,8 @@ namespace Intrepid2 { while (alpha <= 5.0) { ValueType beta = -0.5; while (beta <= 5.0) { - - for (auto np = npLower; np <= npUpper; ++np) { + ordinal_type npStep = 1; + for (auto np = npLower; np <= npUpper; np += npStep) { const double localTol = (np > 20) ? highOrderTol : lowOrderTol; Polylib::Serial::getCubature(z, w, np, alpha, beta, poly); @@ -353,6 +358,7 @@ namespace Intrepid2 { ", np = " << np << ", n = " << n << " difference " << sum << "\n"; } } + if (np == npUpperStep1) npStep = 5; } beta += 0.5; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp index 94f4ba1df086..09e3f6f98382 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_decl.hpp @@ -1160,8 +1160,6 @@ namespace BaskerNS void printMTX(std::string fname, BASKER_MATRIX &M); void printMTX(std::string fname, BASKER_MATRIX &M, BASKER_BOOL off); void readMTX(std::string fname, BASKER_MATRIX &M); - int printRHS(); - int printSOL(); void printTree(); BASKER_INLINE @@ -1494,13 +1492,6 @@ namespace BaskerNS //end NDE - - //RHS and solutions (These are not used anymore) - ENTRY_2DARRAY rhs; - ENTRY_2DARRAY sol; - Int nrhs; - - BASKER_TREE part_tree; BASKER_TREE tree; BASKER_SYMBOLIC_TREE stree; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp index c1b92347a094..35d8588b0bd9 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_def.hpp @@ -92,16 +92,8 @@ namespace BaskerNS BASKER_INLINE void Basker::Finalize() { - //finalize all matrices - A.Finalize(); - At.Finalize(); //??? is At even used - BTF_A.Finalize(); - BTF_C.Finalize(); - BTF_B.Finalize(); - BTF_D.Finalize(); - BTF_E.Finalize(); - //finalize array of 2d matrics + // Actuall Finalize is called by desctructor FREE_MATRIX_2DARRAY(AVM, tree.nblks); FREE_MATRIX_2DARRAY(ALM, tree.nblks); @@ -120,7 +112,6 @@ namespace BaskerNS //Thread Array FREE_THREAD_1DARRAY(thread_array); - basker_barrier.Finalize(); //S (Check on this) FREE_INT_2DARRAY(S, tree.nblks); @@ -187,12 +178,6 @@ namespace BaskerNS FREE_ENTRY_1DARRAY(x_view_ptr_scale); FREE_ENTRY_1DARRAY(y_view_ptr_scale); - - //Structures - part_tree.Finalize(); - tree.Finalize(); - stree.Finalize(); - stats.Finalize(); }//end Finalize() @@ -239,7 +224,7 @@ namespace BaskerNS //Option = 2, BTF BASKER if(option == 1) - { + { default_order(); } else if(option == 2) @@ -475,12 +460,16 @@ namespace BaskerNS //Find BTF ordering if(btf_order2() != BASKER_SUCCESS) { + if(Options.verbose == BASKER_TRUE) + { + printf("Basker Ordering Failed \n"); fflush(stdout); + } return BASKER_ERROR; } if(Options.verbose == BASKER_TRUE) { - printf("Basker Ordering Found \n"); + printf("Basker Ordering Found \n"); fflush(stdout); } /*if((Options.btf == BASKER_TRUE) && (btf_tabs_offset != 0)) @@ -512,7 +501,7 @@ namespace BaskerNS if(symb_flag == BASKER_TRUE) { if(Options.verbose == BASKER_TRUE) { - printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); + printf("BASKER: YOU CANNOT RERUN SFACTOR\n"); fflush(stdout); } return BASKER_ERROR; } @@ -547,7 +536,7 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf(" == Basker Symbolic Done ==\n\n"); + printf(" == Basker Symbolic Done ==\n\n"); fflush(stdout); } #ifdef BASKER_TIMER @@ -1573,7 +1562,7 @@ namespace BaskerNS #endif } - // ---------------------------------------------------------------------------------------------- + // ---------------------------------------------------------------------------------------------- // 'sort' rows of BTF_A into ND structure #if 0 for (Int i = 0; i < BTF_A.nnz; ++i) { @@ -1621,6 +1610,7 @@ namespace BaskerNS symmetric_sfactor(); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for symbolic after ND on a big block A: " << nd_symbolic_timer.seconds() << std::endl; + fflush(stdout); } Kokkos::Timer nd_last_dense_timer; @@ -1628,16 +1618,23 @@ namespace BaskerNS btf_last_dense(flag); if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for last-dense after ND on a big block A: " << nd_last_dense_timer.seconds() << std::endl; + fflush(stdout); } #ifdef BASKER_KOKKOS // ---------------------------------------------------------------------------------------------- // Allocate & Initialize blocks + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif /*kokkos_sfactor_init_workspace iWS(flag, this); @@ -1950,10 +1947,16 @@ namespace BaskerNS }*/ Kokkos::Timer nd_setup2_timer; +#ifdef BASKER_PARALLEL_INIT_WORKSPACE kokkos_sfactor_init_workspace iWS(flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); +#else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(flag, p); + } +#endif if(Options.verbose == BASKER_TRUE) { std::cout<< " > Basker Factor: Time for workspace allocation after ND on a big block A: " << nd_setup2_timer.seconds() << std::endl; } @@ -2304,10 +2307,6 @@ namespace BaskerNS printU(); printUMTX(); std::cout << "U printed" << std::endl; - //printRHS(); - std::cout << "RHS printed" << std::endl; - //printSOL(); - std::cout << "SOL printed" << std::endl; //printTree(); std::cout << "Tree printed" << std::endl; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp index a6e1f5c41e91..cd2c9f57bf0a 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_error_manager.hpp @@ -95,7 +95,7 @@ namespace BaskerNS Int blkcol = thread_array(ti).error_blk; Int blkUrow = LU_size(blkcol)-1; if(LL(blkcol)(0).nnz >= - LU(blkcol)(blkUrow).nnz) + LU(blkcol)(blkUrow).nnz) { resize_U = thread_array(ti).error_info; } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp index 02a896d957c0..4bbd86507d9d 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_decl.hpp @@ -95,6 +95,9 @@ namespace BaskerNS BASKER_INLINE int fill(); + BASKER_INLINE + void init_ptr(); + BASKER_INLINE void init_inc_lvl(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp index 4f12887c87ed..e40361e6f988 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_matrix_def.hpp @@ -328,7 +328,7 @@ namespace BaskerNS if(nnz == _nnz) { copy_vec(_row_idx, _nnz, row_idx); - copy_vec(_val,_nnz, val); + copy_vec(_val, _nnz, val); } else { @@ -498,6 +498,13 @@ namespace BaskerNS return 0; } + template + BASKER_INLINE + void BaskerMatrix::init_ptr() + { + for (Int i = 0; i < ncol+1; i ++) col_ptr(i) = 0; + } + template BASKER_INLINE void BaskerMatrix::convert2D diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp index d2c6a5690528..cef593230d5e 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor.hpp @@ -171,13 +171,9 @@ namespace BaskerNS }//end while if(Options.verbose == BASKER_TRUE) { - printf("Time DOMAIN: %lf \n", timer.seconds()); + printf("Time DOMAIN: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time DOMAIN: %lf \n", timer.seconds()); - timer.reset(); - #endif #else// else basker_kokkos #pragma omp parallel @@ -282,13 +278,9 @@ namespace BaskerNS //printf( " End Sep: info = %d (%d, %d)\n",info,BASKER_SUCCESS,BASKER_ERROR ); if(Options.verbose == BASKER_TRUE) { - printf("Time SEP: %lf \n", timer.seconds()); + printf("Time SEP: %lf \n\n", timer.seconds()); timer.reset(); } - #ifdef BASKER_TIMER - printf("Time SEP: %lf \n", timer.seconds()); - timer.reset(); - #endif } // ---------------------------------------------------------------------------------------- // @@ -363,11 +355,8 @@ namespace BaskerNS if(Options.verbose == BASKER_TRUE) { - printf("Time BTF: %lf \n", timer.seconds()); + printf("Time BTF: %lf \n\n", timer.seconds()); } - #ifdef BASKER_TIMER - printf("Time BTF: %lf \n", timer.seconds()); - #endif }//end btf call Kokkos::Timer tzback; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp index 499e00edd417..2e0434796e33 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk.hpp @@ -149,7 +149,7 @@ namespace BaskerNS const Mag normA = BTF_A.gnorm; const Mag normA_blk = BTF_A.anorm; - Int b = S[0][kid]; //Which blk from schedule + Int b = S(0)(kid); //Which blk from schedule BASKER_MATRIX &L = LL(b)(0); BASKER_MATRIX &U = LU(b)(LU_size(b)-1); BASKER_MATRIX &M = ALM(b)(0); //A->blk @@ -159,9 +159,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL(b)(0).ews; Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif //Int bcol = L.scol; //begining col //NOT UD Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1286,8 +1286,8 @@ namespace BaskerNS INT_1DARRAY ws = LL(wsb)(l).iws; const Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int scol_top = btf_tabs[btf_top_tabs_offset]; // the first column index of A @@ -1460,9 +1460,9 @@ namespace BaskerNS ENTRY_1DARRAY X = LL(wsb)(l).ews; Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif const Entry zero (0.0); @@ -1607,7 +1607,7 @@ namespace BaskerNS if (blkcol == 2 && blkrow == 1) printf( " L.colptr(%d) = %d\n",k+1,lnnz ); #endif - //LL[X_col][X_row].p_size = 0; + //LL(X_col)(X_row).p_size = 0; LL(X_col)(X_row).p_size = 0; return 0; @@ -1831,7 +1831,7 @@ namespace BaskerNS }//over all nonzero in left #ifdef BASKER_2DL - //LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; LL(X_col)(X_row).p_size = nnz; #endif @@ -2056,7 +2056,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - //LL[X_col][X_row].p_size = nnz; + //LL(X_col)(X_row).p_size = nnz; LL(X_col)(X_row).p_size = nnz; #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp index 1fb5dc3fcc2b..c9e696f50786 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_blk_inc.hpp @@ -1555,7 +1555,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -1717,7 +1717,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -1846,7 +1846,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -1869,7 +1869,7 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -2046,7 +2046,7 @@ namespace BaskerNS nnz, kid, X_col, X_row); printf("kid %d Ending nnz: %d \n",kid, nnz); #endif - LL[X_col][X_row].p_size = nnz; + LL(X_col)(X_row).p_size = nnz; #endif return 0; @@ -2176,18 +2176,18 @@ namespace BaskerNS BASKER_INLINE int Basker::t_nfactor_blk_old(Int kid) { - Int b = S[0][kid]; //Which blk from schedule - BASKER_MATRIX &L = LL[b][0]; - BASKER_MATRIX &U = LU[b][LU_size[b]-1]; + Int b = S(0)(kid); //Which blk from schedule + BASKER_MATRIX &L = LL(b)(0); + BASKER_MATRIX &U = LU(b)(LU_size[b]-1); #ifdef BASKER_2DL printf("Accessing blk: %d \n", b); - INT_1DARRAY ws = LL[b][0].iws; - ENTRY_1DARRAY X = LL[b][0].ews; - Int ws_size = LL[b][0].iws_size; + INT_1DARRAY ws = LL(b)(0).iws; + ENTRY_1DARRAY X = LL(b)(0).ews; + Int ws_size = LL(b)(0).iws_size; #else //else if BASKER_2DL - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[kid].ews; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(kid).ews; + Int ws_size = thread_array(kid).iws_size; #endif Int bcol = L.scol; //begining col @@ -2576,15 +2576,15 @@ namespace BaskerNS { //Setup variables - const Int b = S[lvl][kid]; - const Int wsb = S[0][kid]; - BASKER_MATRIX &L = LL[b][0]; + const Int b = S(lvl)(kid); + const Int wsb = S(0)(kid); + BASKER_MATRIX &L = LL(b)(0); #ifdef BASKER_2DL - INT_1DARRAY ws = LL[wsb][l].iws; - Int ws_size = LL[wsb][l].iws_size; + INT_1DARRAY ws = LL(wsb)(l).iws; + Int ws_size = LL(wsb)(l).iws_size; #else - INT_1DARRAY ws = thread_array[kid].iws; - Int ws_size = thread_array[kid].iws_size; + INT_1DARRAY ws = thread_array(kid).iws; + Int ws_size = thread_array(kid).iws_size; #endif const Int brow = L.srow; @@ -2729,8 +2729,8 @@ namespace BaskerNS BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); - BASKER_MATRIX &B = ALM(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &B = ALM(blkcol)(blkrow); /* @@ -2756,11 +2756,10 @@ namespace BaskerNS - INT_1DARRAY ws = LL(X_col)(X_row).iws; - ENTRY_1DARRAY X = LL(X_col)(X_row).ews; - Int ws_size = LL(X_col)(X_row).iws_size; - - Int nnz = LL(X_col)(X_row).p_size; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; + Int ws_size = LL(X_col)(X_row).iws_size; + Int nnz = LL(X_col)(X_row).p_size; @@ -2969,7 +2968,7 @@ namespace BaskerNS Int x_size, Int x_offset, BASKER_BOOL A_option) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; @@ -3315,7 +3314,7 @@ namespace BaskerNS const BASKER_BOOL A_option ) { - BASKER_MATRIX &L = LL(blkcol)(blkrow); + BASKER_MATRIX &L = LL(blkcol)(blkrow); BASKER_MATRIX &B = ALM(blkcol)(blkrow); INT_1DARRAY ws = LL(X_col)(X_row).iws; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp index 650bc77a8de6..289ee65f7ccd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col.hpp @@ -435,8 +435,8 @@ namespace BaskerNS for(Int l = 0; l < lvl; l++) { printf("OPS. KID : %d LVL: %d OPS : %d \n", - kid, l, thread_array[kid].ops_counts[l][0]); - thread_array[kid].ops_count[1][0] = 0; + kid, l, thread_array(kid).ops_counts[l][0]); + thread_array(kid).ops_count[1][0] = 0; } #endif @@ -480,7 +480,7 @@ namespace BaskerNS #endif //end get needed variables// - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -493,7 +493,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); //printf("Using temp matrix, kid: %d\n", kid); //Bp->print(); } @@ -613,7 +613,7 @@ namespace BaskerNS //Count ops to show imbalance #ifdef BASKER_COUNT_OPS - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif //WE SHOUD DO A UNNZ COUNT @@ -878,9 +878,9 @@ namespace BaskerNS #endif #ifdef BASKER_2DL - INT_1DARRAY ws = LL[X_col][X_row].iws; - const Int ws_size = LL[X_col][X_row].iws_size; - ENTRY_1DARRAY X = LL[X_col][X_row].ews; + INT_1DARRAY ws = LL(X_col)(X_row).iws; + const Int ws_size = LL(X_col)(X_row).iws_size; + ENTRY_1DARRAY X = LL(X_col)(X_row).ews; #else BASKER_ASSERT(0==1, "t_upper_col_factor_offdiag, only works with 2D layout"); #endif @@ -959,12 +959,12 @@ namespace BaskerNS ) { - Int b = S[l][kid]; - BASKER_MATRIX &L = LL[b][0]; - INT_1DARRAY ws = thread_array[kid].iws; - ENTRY_1DARRAY X = thread_array[team_leader].ews; - Int ws_size = thread_array[kid].iws_size; - Int ews_size = thread_array[team_leader].ews_size; + Int b = S(l)(kid); + BASKER_MATRIX &L = LL(b)(0); + INT_1DARRAY ws = thread_array(kid).iws; + ENTRY_1DARRAY X = thread_array(team_leader).ews; + Int ws_size = thread_array(kid).iws_size; + Int ews_size = thread_array(team_leader).ews_size; #ifdef BASKER_DEBUG_NFACTOR_COL if(kid>3) @@ -1237,7 +1237,7 @@ namespace BaskerNS #endif #ifdef BASKER_OPS_COUNT - thread_array[kid].ops_counts[0][l] += xnnz; + thread_array(kid).ops_counts[0][l] += xnnz; #endif t_back_solve(kid, lvl,l+1, k, top, xnnz); // note: l not lvl given @@ -1868,7 +1868,6 @@ namespace BaskerNS if(kid != team_leader) { - //LL[my_idx][blk].p_size = 0; LL(my_idx)(blk).p_size = 0; } else @@ -1877,7 +1876,6 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - //LL[leader_idx][blk].p_size = p_sizeL; LL(leader_idx)(blk).p_size = p_sizeL; } p_size = 0; @@ -2035,7 +2033,6 @@ namespace BaskerNS if(kid != team_leader) { - //LL[my_idx][blk].p_size = 0; LL(my_idx)(blk).p_size = 0; } else @@ -2044,7 +2041,6 @@ namespace BaskerNS printf("SETTING PS: %d L:%d %d kid: %d\n", p_sizeL, leader_idx, blk, kid); #endif - //LL[leader_idx][blk].p_size = p_sizeL; LL(leader_idx)(blk).p_size = p_sizeL; } p_size = 0; @@ -2104,7 +2100,7 @@ namespace BaskerNS else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; @@ -2181,7 +2177,7 @@ namespace BaskerNS const Int ws_size = LL(leader_idx)(bl).ews_size; const Int brow = LL(leader_idx)(bl).srow; const Int nrow = LL(leader_idx)(bl).nrow; - Int p_size = LL[leader_idx][bl].p_size; + Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk //Need better sparse update @@ -2248,7 +2244,6 @@ namespace BaskerNS printf("SETTING move_over set 0, L: %d %d kid: %d \n", leader_idx, bl, kid); #endif - //LL[leader_idx][bl].p_size = 0; LL(leader_idx)(bl).p_size = 0; p_count =0; } @@ -2261,7 +2256,6 @@ namespace BaskerNS printf("SETTING Re-pop pattern: %d %d size: %d \n", leader_idx, bl, p_count); #endif - //LL[leader_idx][bl].p_size = p_count; LL(leader_idx)(bl).p_size = p_count; } @@ -2334,7 +2328,7 @@ namespace BaskerNS else { //printf("lower picked, kid: %d\n", kid); - Bp = &(ALM[A_col][0]); + Bp = &(ALM(A_col)(0)); } BASKER_MATRIX &B = *Bp; @@ -2345,17 +2339,11 @@ namespace BaskerNS //B.print(); team_leader = find_leader(kid, l); - //ENTRY_1DARRAY X = LL[team_leader][bl].ews; ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - //INT_1DARRAY ws = LL[team_leader][bl].iws; INT_1DARRAY ws = LL(leader_idx)(bl).iws; - //Int brow = LL[team_leader][bl].srow; - //Int nrow = LL[team_leader][bl].nrow; const Int brow = LL(leader_idx)(bl).srow; const Int nrow = LL(leader_idx)(bl).nrow; - //Int p_size = LL[team_leader][bl].p_size; Int p_size = LL(leader_idx)(bl).p_size; - //Int ws_size = LL[team_leader][bl].iws_size; const Int ws_size = LL(leader_idx)(bl).iws_size; Int *color = &(ws(0)); Int *pattern = &(color[ws_size]); @@ -2431,18 +2419,12 @@ namespace BaskerNS Int A_col = S(lvl)(kid); Int A_row = (lvl==1)?(2):S(bl)(kid)%(LU_size(A_col)); Int CM_idx = kid; - //ENTRY_1DARRAY X = LL[team_leader][bl].ews; ENTRY_1DARRAY X = LL(leader_idx)(bl).ews; - //INT_1DARRAY ws = LL[team_leader][bl].iws; INT_1DARRAY ws = LL(leader_idx)(bl).iws; - //Int ws_size =LL[team_leader][bl].ews_size; - const Int ws_size =LL(leader_idx)(bl).ews_size; - //Int brow = LL[team_leader][bl].srow; + const Int ws_size = LL(leader_idx)(bl).ews_size; const Int brow = LL(leader_idx)(bl).srow; - //Int nrow = LL[team_leader][bl].nrow; const Int nrow = LL(leader_idx)(bl).nrow; - //Int p_size = LL[team_leader][bl].p_size; - Int p_size = LL[leader_idx][bl].p_size; + Int p_size = LL(leader_idx)(bl).p_size; //For recounting patterns in dense blk //Need better sparse update @@ -2511,7 +2493,6 @@ namespace BaskerNS printf("SETTING move_over set 0, L: %d %d kid: %d \n", leader_idx, bl, kid); #endif - //LL[leader_idx][bl].p_size = 0; LL(leader_idx)(bl).p_size = 0; p_count =0; } @@ -2521,7 +2502,6 @@ namespace BaskerNS printf("SETTING Re-pop pattern: %d %d size: %d \n", leader_idx, bl, p_count); #endif - //LL[leader_idx][bl].p_size = p_count; LL(leader_idx)(bl).p_size = p_count; } @@ -2549,7 +2529,7 @@ namespace BaskerNS Int CM_idx = kid; BASKER_MATRIX_VIEW &B = AV[A_col][A_row]; - B.flip_base(&(thread_array[kid].C)); + B.flip_base(&(thread_array(kid).C)); B.k_offset = k; if(kid == 0) @@ -2630,8 +2610,8 @@ namespace BaskerNS /* Old Atomic Barrier BaskerBarrier BB; - BB.Barrier(thread_array[leader_kid].token[sublvl][function_n], - thread_array[leader_kid].token[sublvl][1], + BB.Barrier(thread_array(leader_kid).token[sublvl][function_n], + thread_array(leader_kid).token[sublvl][1], size); */ } diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp index 801ad2ee6362..5e9345ed02ec 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col2.hpp @@ -886,7 +886,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); pivot = U.tpivot; //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp index 1425385d9f2e..c6ddadf55092 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_nfactor_col_inc.hpp @@ -654,7 +654,7 @@ namespace BaskerNS //end get needed variables// //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); //Ask C++ guru if this is ok BASKER_MATRIX *Bp; @@ -664,7 +664,7 @@ namespace BaskerNS } else { - Bp = &(thread_array[kid].C); + Bp = &(thread_array(kid).C); } BASKER_MATRIX &B = *Bp; //if(kid ==0) @@ -2471,7 +2471,7 @@ namespace BaskerNS Int col_idx_offset = 0; //can get rid of? //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused L - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; @@ -2592,7 +2592,7 @@ namespace BaskerNS //Int col_idx_offset = 0; //can get rid of?//NDE - warning: unused //BASKER_MATRIX &L = LL(L_col)(L_row); //NDE - warning: unused - BASKER_MATRIX &U = LU(U_col)(U_row); + BASKER_MATRIX &U = LU(U_col)(U_row); INT_1DARRAY ws = LL(X_col)(X_row).iws; //const Int ws_size = LL(X_col)(X_row).iws_size; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp index 69d06a6bd72e..82ea04be3754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_order.hpp @@ -1096,11 +1096,19 @@ static int basker_sort_matrix_col(const void *arg1, const void *arg2) find_2D_convert(BTF_A); //now we can fill submatrices #ifdef BASKER_KOKKOS - kokkos_order_init_2D iO(this); - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this); + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + bool alloc = true; + //bool keep_zeros = true; + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else - //Comeback + //Comeback #endif #ifdef BASKER_TIMER double init_2d_time = scotch_timer.seconds(); diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp index cc20d3b21e78..c955ff952551 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor.hpp @@ -117,9 +117,11 @@ namespace BaskerNS // thread.team_rank()); Int kid = basker->t_get_kid(thread); #endif + printf( " * kokkos_sfactor_init_factor(%d) *\n",kid ); fflush(stdout); basker->t_init_factor(kid); + printf( " * kokkos_sfactor_init_factor(%d) done *\n",kid ); fflush(stdout); //This needs to be done earlier in ordering now //basker->t_init_2DA(kid); @@ -159,7 +161,7 @@ int Basker::sfactor() printf("Total NNZ: %ld \n", (long)global_nnz); printf(" > blk_matching = %d\n", (int)Options.blk_matching ); printf("----------------------------------\n"); - printf("\n"); + printf("\n"); fflush(stdout); } } @@ -169,28 +171,45 @@ int Basker::sfactor() } //Allocate Factorspace - //printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", - // btf_tabs_offset,allocate_nd_workspace); + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_init_factor( btf_tabs_offset = %d, allocate_nd_workspace = %d ) <<\n", + btf_tabs_offset,allocate_nd_workspace); fflush(stdout); + #endif if(btf_tabs_offset != 0 && allocate_nd_workspace) { #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_FACTOR kokkos_sfactor_init_factor iF(this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iF); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_factor(p); + } + #endif #else #endif } + #ifdef BASKER_TIMER + printf(" >> kokkos_sfactor_workspace <<\n"); fflush(stdout); + #endif //if(btf_tabs_offset != 0) { //Allocate workspace #ifdef BASKER_KOKKOS + #ifdef BASKER_PARALLEL_INIT_WORKSPACE typedef Kokkos::TeamPolicy TeamPolicy; kokkos_sfactor_init_workspace iWS(setup_flag, this); Kokkos::parallel_for(TeamPolicy(num_threads,1), iWS); Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_workspace(setup_flag, p); + } + #endif #endif } @@ -266,8 +285,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.ncol > 0, "Basker symmetric_sfactor assert: A.ncol malloc > 0 failed"); - MALLOC_INT_1DARRAY(gScol[ii], A.ncol); - init_value(gScol[ii], A.ncol, (Int)0); + MALLOC_INT_1DARRAY(gScol(ii), A.ncol); + init_value(gScol(ii), A.ncol, (Int)0); } @@ -279,8 +298,8 @@ int Basker::sfactor() for(Int ii=0; ii < split_num; ii++) { BASKER_ASSERT(A.nrow > 0, "sfactor A.nrow malloc"); - MALLOC_INT_1DARRAY(gSrow[ii], A.nrow); - init_value(gSrow[ii], A.nrow, (Int)0); + MALLOC_INT_1DARRAY(gSrow(ii), A.nrow); + init_value(gSrow(ii), A.nrow, (Int)0); } #ifdef BASKER_TIMER @@ -292,7 +311,9 @@ int Basker::sfactor() double time2 = 0.0; double time3 = 0.0; Kokkos::Timer timer1; + Kokkos::Timer timer2; timer.reset(); + timer2.reset(); #endif //split_num = num_threads/2; @@ -303,7 +324,7 @@ int Basker::sfactor() printf("\n --------------- OVER DOMS ---------------\n"); printf("\n"); } - #define SHYLU_BASKER_STREE_LIST + //#define SHYLU_BASKER_STREE_LIST std::vector stree_list (num_threads); #ifdef SHYLU_BASKER_STREE_LIST Kokkos::parallel_for( @@ -323,7 +344,7 @@ int Basker::sfactor() //printf("\n\n STREE SIZE: %d \n", AL[blk][0].ncol); //printf("Here 0\n"); //Find nnz_counts for leafs - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST @@ -332,7 +353,7 @@ int Basker::sfactor() #else e_tree (ALM(blk)(0), stree, 1); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_2 += timer1.seconds(); timer1.reset(); #endif @@ -341,7 +362,7 @@ int Basker::sfactor() #else post_order(ALM(blk)(0), stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1_3 += timer1.seconds(); timer1.reset(); #endif @@ -350,19 +371,19 @@ int Basker::sfactor() #else col_count (ALM(blk)(0), stree); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time1 += timer1.seconds(); #endif //Assign nnz here - //leaf_assign_nnz(LL[blk][0], stree, 0); - //leaf_assign_nnz(LU[blk][LU_size[blk]-1], stree, 0); + //leaf_assign_nnz(LL(blk)(0), stree, 0); + //leaf_assign_nnz(LU(blk)(LU_size[blk]-1), stree, 0); if(Options.verbose == BASKER_TRUE) { printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,0); - printf( " >> leaf_assign_nnz(LL(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); + printf( " >> leaf_assign_nnz(LU(%d)(%d))\n",(int)blk,(int)LU_size(blk)-1); } - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST @@ -372,7 +393,7 @@ int Basker::sfactor() leaf_assign_nnz(LL(blk)(0), stree, 0); leaf_assign_nnz(LU(blk)(LU_size(blk)-1), stree, 0); #endif - #ifdef BASKER_TIMER + #if defined(BASKER_TIMER) & !defined(SHYLU_BASKER_STREE_LIST) time2 += timer1.seconds(); #endif } @@ -380,6 +401,10 @@ int Basker::sfactor() ); Kokkos::fence(); #endif + #ifdef BASKER_TIMER + double dom_time = timer2.seconds(); + std::cout << " DOMAIN BLKs done : " << dom_time << std::endl << std::endl; + #endif for(Int p = 0; p < num_threads; ++p) { @@ -411,16 +436,16 @@ int Basker::sfactor() Int off_diag = 1; //printf( " U_blk_sfactor(AVM(%d,%d))\n",U_col,U_row ); //U_blk_sfactor(AV[U_col][U_row], stree, - // gScol[l], gSrow[glvl],0); + // gScol(l), gSrow(glvl),0); #ifdef BASKER_TIMER timer1.reset(); #endif #ifdef SHYLU_BASKER_STREE_LIST U_blk_sfactor(AVM(U_col)(U_row), stree_p, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #else U_blk_sfactor(AVM(U_col)(U_row), stree, - gScol[l], gSrow[glvl], off_diag); + gScol(l), gSrow(glvl), off_diag); #endif #ifdef BASKER_TIMER time3 += timer1.seconds(); @@ -435,18 +460,17 @@ int Basker::sfactor() // stree, gScol, gSrow); //Assign nnz counts for leaf off-diag - //U_assign_nnz(LU[U_col][U_row], stree, 0); - //L_assign_nnz(LL[blk][l+1], stree, 0); - if(Options.verbose == BASKER_TRUE) - { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)blk,(int)l+1); - } + //U_assign_nnz(LU(U_col)(U_row), stree, 0); + //L_assign_nnz(LL(blk)(l+1), stree, 0); #ifdef BASKER_TIMER timer1.reset(); #endif - //printf( " U_assign_nnz(LU(%d,%d))\n",U_col,U_row ); double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; + if(Options.verbose == BASKER_TRUE) + { + printf( " ++ U_assign_nnz(LU(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)U_col,(int)U_row, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ L_assign_nnz(LL(%d, %d)) fill-factor x(%f+%f = %f)\n",(int)blk,(int)l+1, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); + } #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); L_assign_nnz(LL(blk)(l+1), stree_p, fill_factor, 0); @@ -465,7 +489,7 @@ int Basker::sfactor() std::cout << " >> symmetric_sfactor::domain : " << timer.seconds() << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::postorder : " << time1_2 << " + " << time1_3 << " + " << time1 << " seconds" << std::endl; std::cout << " ++ symmetric_sfactor::domain::init : " << time2 << " seconds" << std::endl; - std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl; + std::cout << " ++ symmetric_sfactor::domain::sfactor : " << time3 << " seconds" << std::endl << std::endl; timer.reset(); #endif @@ -484,13 +508,17 @@ int Basker::sfactor() //over all the seps in a lvle #ifdef SHYLU_BASKER_STREE_LIST + //printf( " parallel for \n" ); Kokkos::parallel_for( "permute_col", p, KOKKOS_LAMBDA(const int pp) #else + //printf( " serial for \n" ); for(Int pp = 0; pp < p; pp++) #endif { - //printf( " -- level = %d separator = %d --\n",lvl,pp ); + #ifdef BASKER_TIMER + printf( " -- level = %d/%d separator = %d/%d --\n",lvl,tree.nlvls, pp,p ); fflush(stdout); + #endif //S blks Int ppp; ppp = pp*pow(tree.nparts, lvl+1); @@ -509,9 +537,11 @@ int Basker::sfactor() Int U_row = 0; //S_blk_sfactor(AL[U_col][U_row], stree, - //gScol[lvl], gSrow[pp]); + //gScol(lvl), gSrow(pp)); - //printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> S_blk_sfactor( ALM(%d)(%d) with %dx%d and nnz=%d) <<<\n",U_col,U_row, ALM(U_col)(U_row).nrow,ALM(U_col)(U_row).ncol,ALM(U_col)(U_row).nnz ); fflush(stdout); + #endif #ifdef SHYLU_BASKER_STREE_LIST auto stree_p = stree_list[pp]; S_blk_sfactor(ALM(U_col)(U_row), stree_p, @@ -520,29 +550,34 @@ int Basker::sfactor() S_blk_sfactor(ALM(U_col)(U_row), stree, gScol(lvl), gSrow(pp)); #endif - //printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",ALM(U_col)(U_row).nnz ); fflush(stdout); + #endif - //S_assign_nnz(LL[U_col][U_row], stree, 0); + //S_assign_nnz(LL(U_col)(U_row), stree, 0); if(Options.verbose == BASKER_TRUE) { - printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); + printf( " >> S_assign_nnz( LL(%d,%d) )\n",(int)U_col,(int)U_row ); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST S_assign_nnz(LL(U_col)(U_row), stree_p, 0); #else S_assign_nnz(LL(U_col)(U_row), stree, 0); #endif - //S_assign_nnz(LU[U_col][LU_size[U_col]-1], stree,0); + //S_assign_nnz(LU(U_col)(LU_size[U_col]-1), stree,0); //printf( " >>> S_assign_nnz( LU(%d,%d) )\n",U_col,LU_size(U_col)-1 ); if(Options.verbose == BASKER_TRUE) { - printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); + printf( " ++ S_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)LU_size(U_col)-1); fflush(stdout); } #ifdef SHYLU_BASKER_STREE_LIST S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree_p, 0); #else S_assign_nnz(LU(U_col)(LU_size(U_col)-1), stree, 0); #endif + #ifdef BASKER_TIMER + printf( " >>> -> nnz = %d\n",LU(U_col)(LU_size(U_col)-1).nnz); fflush(stdout); + #endif } #ifdef SHYLU_BASKER_STREE_LIST ); @@ -563,6 +598,7 @@ int Basker::sfactor() Int inner_blk = U_col; for(Int l = lvl+1; l < tree.nlvls; l++) { + //printf( " --- pp = %d/%d, l = %d/%d ---\n",pp,p, l,tree.nlvls ); fflush(stdout); U_col = S(l+1)(ppp); U_row = S(lvl+1)(ppp)%LU_size(U_col); @@ -594,12 +630,13 @@ int Basker::sfactor() //Assign nnz + double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; if(Options.verbose == BASKER_TRUE) { - printf( " ++ leaf_assign_nnz(LU(%d, %d))\n",(int)U_col,(int)U_row); - printf( " ++ leaf_assign_nnz(LL(%d, %d))\n",(int)inner_blk,(int)(l-lvl)); + printf( " ++ leaf_assign_nnz(LU(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)U_col,(int)U_row, (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + printf( " ++ leaf_assign_nnz(LL(%d, %d)) fill-factor x(%d+%f = %f)\n",(int)inner_blk,(int)(l-lvl), (int)BASKER_SEP_NNZ_OVER,Options.user_fill,fill_factor); + fflush(stdout); } - double fill_factor = BASKER_SEP_NNZ_OVER+Options.user_fill; #ifdef SHYLU_BASKER_STREE_LIST U_assign_nnz(LU(U_col)(U_row), stree_p, fill_factor, 0); L_assign_nnz(LL(inner_blk)(l-lvl), stree_p, fill_factor, 0); @@ -619,12 +656,15 @@ int Basker::sfactor() for(Int ii = 0 ; ii < split_num; ++ii) { //printf("split\n"); - FREE(gScol[ii]); - FREE(gSrow[ii]); + FREE(gScol(ii)); + FREE(gSrow(ii)); } FREE(gScol); FREE(gSrow); + #ifdef BASKER_TIMER + std::cout << " >> symmetric_sfactor done << " << std::endl; + #endif return 0; }//end symmetric_symbolic() @@ -1151,7 +1191,6 @@ int Basker::sfactor() BASKER_SYMBOLIC_TREE &ST ) { -printf( " col_count:: view \n" ); //Still like to find a way to do this without transpose BASKER_MATRIX Mt; matrix_transpose(MV, Mt); @@ -2220,6 +2259,9 @@ printf( " col_count:: view \n" ); Int option ) { + #ifdef BASKER_TIMER + printf("leaf_assign_nnz:\n"); + #endif if(option == 0) { const Int Int_MAX = std::numeric_limits::max(); @@ -2228,19 +2270,23 @@ printf( " col_count:: view \n" ); for(Int i = 0; i < M.ncol; i++) { if (t_nnz <= Int_MAX - ST.col_counts[i]) { + #ifdef BASKER_TIMER + //printf( " > %d: %d += %d\n",i,t_nnz, ST.col_counts[i] ); + #endif t_nnz += ST.col_counts[i]; } else { // let's just hope it is enough, if overflow break; } } - #ifdef BASKER_DEBUG_SFACTOR - printf("leaf nnz: %ld \n", (long)t_nnz); + #ifdef BASKER_TIMER + printf(" > leaf nnz: (%ld + %ld) / 2 = %ld\n", (long)t_nnz,(long)M.ncol,(long)(t_nnz+M.ncol)/2); #endif + t_nnz = long(t_nnz+M.ncol)/2; //double nnz_shoulder = 1.05; double fill_factor = BASKER_DOM_NNZ_OVER+Options.user_fill; // used to boost fill estimate - Int temp = fill_factor*t_nnz; + Int temp = fill_factor*t_nnz; // assuming (t_nnz/2) as triangular part if (temp > t_nnz) { M.nnz = temp; } else { @@ -2258,8 +2304,8 @@ printf( " col_count:: view \n" ); } if(Options.verbose == BASKER_TRUE) { - printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("leaf with elbow-room global_nnz = %ld, t_nnz = %ld, M.nnz = %ld (%ld x %ld) with fill-factor x(%d+%f = %f)\n", + (long)global_nnz,(long)t_nnz,(long)M.nnz,(long)M.nrow,(long)M.ncol,(int)BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor); } } }//end assign_leaf_nnz @@ -2290,12 +2336,12 @@ printf( " col_count:: view \n" ); } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("U_assing_nnz: %ld \n", t_nnz); #endif //double fill_factor = 1.05; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2312,8 +2358,8 @@ printf( " col_count:: view \n" ); #endif if(Options.verbose == BASKER_TRUE) { - printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f + %f = %f), M.nnz = %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill,fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); + printf("U_assing with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)M.nnz,(long)M.nrow,(long)M.ncol); } } }//end assign_upper_nnz @@ -2344,13 +2390,13 @@ printf( " col_count:: view \n" ); } } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("L_assign_nnz: %ld \n", t_nnz); #endif // double fill_factor = 2.05; double old_nnz = M.nnz; - Int temp = fill_factor*t_nnz; + Int temp = min(M.nrow*M.ncol, Int(fill_factor*t_nnz)); if (temp >= t_nnz) { M.nnz = temp; } else { @@ -2367,8 +2413,8 @@ printf( " col_count:: view \n" ); } if(Options.verbose == BASKER_TRUE) { - printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %e + %e = %e), M.nnz = %ld -> %ld (%ld x %ld)\n", - (long)global_nnz,(long)t_nnz, BASKER_DOM_NNZ_OVER,Options.user_fill, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); + printf("L_assign with elbow global_nnz = %ld, t_nnz = %ld (fill_factor = %f), M.nnz = %ld -> %ld (%ld x %ld)\n", + (long)global_nnz,(long)t_nnz, fill_factor, (long)old_nnz,(long)M.nnz, (long)M.nrow,(long)M.ncol); } } }//end assign_lower_nnz @@ -2419,6 +2465,9 @@ printf( " col_count:: view \n" ); //printf("number of blks: %d \n", // btf_nblks-btf_tabs_offset); #endif + #ifdef BASKER_TIMER + printf( " > btf_last_dense(%s) <\n",(flag ? "true" : "false") ); fflush(stdout); + #endif Int max_blk_size = 0; #if defined(BASKER_SPLIT_A) @@ -2439,7 +2488,9 @@ printf( " col_count:: view \n" ); if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " L_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif L_D(i).init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2450,6 +2501,9 @@ printf( " col_count:: view \n" ); //For pruning L_D(i).init_pend(); + #ifdef BASKER_TIMER + printf( " U_D[%d](%d, size = %d, nnz = %d)\n",i,(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif U_D(i).init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2459,6 +2513,9 @@ printf( " col_count:: view \n" ); }//over all blks } #endif + #ifdef BASKER_TIMER + printf( " > top blocks done <\n" ); fflush(stdout); + #endif //Malloc L and U #ifdef BASKER_DEBUG_SFACTOR @@ -2486,7 +2543,9 @@ printf( " col_count:: view \n" ); if ((double)nnz > ((double)lblk_size)*((double)lblk_size)) { nnz = lblk_size*lblk_size; } - //printf( " LBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " LBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif LBTF(i-btf_tabs_offset).init_matrix("LBFT", btf_tabs(i), lblk_size, @@ -2498,7 +2557,9 @@ printf( " col_count:: view \n" ); //printf( " LBTF(%d).init_pend()\n",(int)(i-btf_tabs_offset) ); LBTF(i-btf_tabs_offset).init_pend(); - //printf( " UBTF(%d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)nnz ); + #ifdef BASKER_TIMER + printf( " UBTF(%d, size = %d, nnz = %d)\n",(int)(i-btf_tabs_offset), (int)lblk_size, (int)nnz ); + #endif UBTF(i-btf_tabs_offset).init_matrix("UBFT", btf_tabs(i), lblk_size, @@ -2511,6 +2572,9 @@ printf( " col_count:: view \n" ); //MALLOC workspace }//over all blks } + #ifdef BASKER_TIMER + printf( " > left blocks done <\n" ); fflush(stdout); + #endif //JDB: This needs to be fixed max_blk_size = BTF_D.nrow + BTF_C.nrow; @@ -2530,23 +2594,27 @@ printf( " col_count:: view \n" ); //BASKER_ASSERT((thread_array(i).iws_size*thread_array(i).iws_mult) > 0, "Basker btf_last_dense assert: sfactor threads iws > 0 failed"); //BASKER_ASSERT((thread_array(i).ews_size*thread_array(i).ews_mult) > 0, "Basker btf_last_dense assert: sfactor threads ews > 0 failed"); - if (max_blk_size > 0) { - MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); - MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); - } - #ifdef BASKER_DEBUG_SFACTOR + #ifdef BASKER_TIMER printf("Malloc Thread: %d iws: %d \n", i, (thread_array(i).iws_size* thread_array(i).iws_mult)); - printf("Malloc Thread: %d ews: %d \n", + printf("Malloc Thread: %d ews: %d \n", i, (thread_array(i).ews_size* thread_array(i).ews_mult)); #endif + if (max_blk_size > 0) { + MALLOC_INT_1DARRAY(thread_array(i).iws, thread_array(i).iws_size*thread_array(i).iws_mult); + MALLOC_ENTRY_1DARRAY(thread_array(i).ews, thread_array(i).ews_size*thread_array(i).ews_mult); + } } } + #ifdef BASKER_TIMER + printf( " > btf_last_dense done <\n" ); + #endif }//end btf_last_dense() }//end namespace Bakser +#undef BASKER_TIMER #endif//endif BASKER_SFACTOR_NEWFRM_HPP diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp index 64c041a6536c..622bdf39a0fd 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_sfactor_inc.hpp @@ -100,8 +100,8 @@ namespace BaskerNS for(Int p=0; p < num_threads; ++p) { Int blk = S(0)(p); - sfactor_nd_dom_estimate(ALM(blk)(0), - LL(blk)(0), + sfactor_nd_dom_estimate(ALM(blk)(0), + LL(blk)(0), LU(blk)(LU_size(blk)-1)); for(Int l=0; l < tree.nlvls; l++) @@ -156,7 +156,7 @@ namespace BaskerNS U_row = S(lvl+1)(ppp)%LU_size(U_col); if((S(lvl+1)(ppp) > 14) && - (S(lvl+1)(ppp) > LU_size(U_col)) + (S(lvl+1)(ppp) > LU_size(U_col)) ) { Int tm = (S(lvl+1)(ppp)+1)/16; @@ -172,7 +172,7 @@ namespace BaskerNS sfactor_nd_sep_lower_estimate( ALM(innerblk)(l-lvl), - LL(innerblk)(l-lvl)); + LL(innerblk)(l-lvl)); }//for - l }//for -p diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp index 995bad188542..c7f804794f67 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_stats.hpp @@ -148,8 +148,8 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - MATRIX &myL = LL[l][0]; - stats.Lnnz += LL[l][0].nnz; + MATRIX &myL = LL(l)(0); + stats.Lnnz += LL(l)(0).nnz; }//over all Ls return stats.Lnnz; @@ -166,10 +166,10 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - for(Int r=0; r 0) { FREE_INT_1DARRAY(roots); @@ -267,7 +265,7 @@ namespace BaskerNS ~basker_symbolic_tree() { - //Finalize(); + Finalize(); }//end ~basker_symbolic_tree BASKER_INLINE diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp index ebce20c9875f..6e4d1554c754 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_thread.hpp @@ -272,7 +272,7 @@ namespace BaskerNS BASKER_INLINE void atomic_barrier_fanout(volatile Int &value, const Int l_size) { - Kokkos::atomic_inc(&(value)) + Kokkos::atomic_inc(&(value)); while(value < l_size) { BASKER_NO_OP; diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp index be4c146e9c83..784df704eb59 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_tree.hpp @@ -118,7 +118,7 @@ namespace BaskerNS for(Int i =0; i < tree.nblks+1; i++) { BASKER_ASSERT(num_threads > 0, "tree num_threads"); - MALLOC_INT_1DARRAY(S[i], num_threads); + MALLOC_INT_1DARRAY(S(i), num_threads); } //this will want to be across all threads @@ -335,7 +335,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -356,7 +356,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -368,11 +368,11 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -592,7 +592,7 @@ namespace BaskerNS l, t, lvl_counter ,lvl_idx, tree.nblks); #endif - S[l][t] = tree.lvlset[lvl_idx]; + S(l)(t) = tree.lvlset[lvl_idx]; if(lvl_counter >= (pow(tree.nparts,l)-1)) { lvl_idx++; @@ -611,7 +611,7 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - cout << S[l][t] << " , " ; + cout << S(l)(t) << " , " ; }//end over nhreads cout << endl; }//end over nlvls @@ -624,10 +624,10 @@ namespace BaskerNS { for(Int t=0; t < num_threads; t++) { - Int s_element = S[l][t]; + Int s_element = S(l)(t); Int row_size = (tree.row_tabs[s_element+1] - tree.row_tabs[s_element]); - thread_array[t].iws_size += row_size; - thread_array[t].ews_size += row_size; + thread_array(t).iws_size += row_size; + thread_array(t).ews_size += row_size; }//end over threads }//end over lvls @@ -855,11 +855,11 @@ namespace BaskerNS #endif for(Int j=i; j != -flat.ncol; j=tree.treetab[j]) { - MATRIX_1DARRAY &UMtemp = AVM[j]; - MATRIX_1DARRAY &LMtemp = ALM[i]; + MATRIX_1DARRAY &UMtemp = AVM(j); + MATRIX_1DARRAY &LMtemp = ALM(i); - MATRIX_1DARRAY &LUtemp = LU[j]; - MATRIX_1DARRAY &LLtemp = LL[i]; + MATRIX_1DARRAY &LUtemp = LU(j); + MATRIX_1DARRAY &LLtemp = LL(i); #ifdef MY_DEBUG printf( " AVM(%d)(%d).set_shape(%dx%d)\n",j,U_view_count[j], tree.col_tabs[i+1]-tree.col_tabs[i],tree.col_tabs[j+1]-tree.col_tabs[j] ); @@ -1322,9 +1322,15 @@ namespace BaskerNS #ifdef BASKER_KOKKOS BASKER_BOOL keep_zeros = BASKER_FALSE; BASKER_BOOL alloc = alloc_BTFA; //BASKER_FALSE; - kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D - Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); - Kokkos::fence(); + #ifdef BASKER_PARALLEL_INIT_2D + kokkos_order_init_2D iO(this, alloc, keep_zeros); // t_init_2DA; fill row_idx, vals into ALM, AVM calling convert2D + Kokkos::parallel_for(TeamPolicy(num_threads,1), iO); + Kokkos::fence(); + #else + for (Int p = 0; p < num_threads; p++) { + this->t_init_2DA(p, alloc, keep_zeros); + } + #endif #else //Comeback #endif diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp index 6009e346f73b..f57447b10906 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_types.hpp @@ -90,7 +90,7 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_INC_TOL_VALUE 0.0001 //MACRO INC FILL (this will become dynamic in the future) -#define BASKER_FILL_USER 1.00 +#define BASKER_FILL_USER 0.00 #define BASKER_FILL_LESTIMATE 1.50 #define BASKER_FILL_UESTIMATE 1.50 #define BASKER_FILL_LLOWERESTIMATE 2.00 @@ -144,17 +144,17 @@ enum BASKER_INCOMPLETE_CODE #define BASKER_KOKKOS_NOINIT Kokkos::ViewAllocateWithoutInitializing #define INT_RANK2DARRAY Kokkos::View #define INT_1DARRAY Kokkos::View -#define INT_2DARRAY Kokkos::View #define ENTRY_1DARRAY Kokkos::View -#define ENTRY_2DARRAY Kokkos::View #define BOOL_1DARRAY Kokkos::View #define BOOL_2DARRAY Kokkos::View -#define MATRIX_1DARRAY Kokkos::View -#define MATRIX_2DARRAY Kokkos::View -#define MATRIX_VIEW_1DARRAY Kokkos::View -#define MATRIX_VIEW_2DARRAY Kokkos::View -#define THREAD_1DARRAY Kokkos::View -#define THREAD_2DARRAY Kokkos::View + +#define INT_2DARRAY Kokkos::View +#define ENTRY_2DARRAY Kokkos::View +#define MATRIX_1DARRAY Kokkos::View +#define MATRIX_2DARRAY Kokkos::View +#define MATRIX_VIEW_1DARRAY Kokkos::View +#define MATRIX_VIEW_2DARRAY Kokkos::View +#define THREAD_1DARRAY Kokkos::View #define INT_1DARRAY_PAIRS Kokkos::View*, BASKER_EXE_SPACE> //Macro Memory Calls @@ -163,7 +163,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC malloc_pairs_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY_PAIRS(BASKER_KOKKOS_NOINIT("pairs_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -172,7 +172,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC int_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_1DARRAY(BASKER_KOKKOS_NOINIT("int_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -181,7 +181,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s0>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ BASKER_ASSERT(s1>0, "BASKER ASSERT MALLOC int_rank2d: size to alloc > 0 fails"); \ - a = INT_RANK2DARRAY(BASKER_KOKKOS_NOINIT("int_rank2d"),s0,s1); \ + Kokkos::resize(a, s0,s1); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } @@ -189,7 +189,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0,"BASKER ASSERT MALLOC int_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = INT_2DARRAY("int_2d",s); \ + a = INT_2DARRAY(Kokkos::view_alloc("int_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -198,7 +198,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_1DARRAY(BASKER_KOKKOS_NOINIT("entry_1d"),s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -207,7 +207,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC entry_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = ENTRY_2DARRAY("entry_2d",s); \ + a = ENTRY_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -216,7 +216,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_1DARRAY(BASKER_KOKKOS_NOINIT("bool_1d"), s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -225,7 +225,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC bool_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = BOOL_2DARRAY("bool_2d", s); \ + Kokkos::resize(a, s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -234,7 +234,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_1DARRAY("matrix_1d",s); \ + a = MATRIX_1DARRAY(Kokkos::view_alloc("matrix_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -243,7 +243,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_2DARRAY("matrix_2d",s); \ + a = MATRIX_2DARRAY(Kokkos::view_alloc("matrix_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -252,7 +252,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_1DARRAY("matrix_view_1d",s); \ + a = MATRIX_VIEW_1DARRAY(Kokkos::view_alloc("matrix_view_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -261,7 +261,7 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC matrix_view_2d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = MATRIX_VIEW_2DARRAY("matrix_view_2d",s); \ + a = MATRIX_VIEW_2DARRAY(Kokkos::view_alloc("matrix_view_2d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ @@ -270,33 +270,12 @@ enum BASKER_INCOMPLETE_CODE { \ BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_1d: size to alloc >= 0 fails"); \ if (s > 0) { \ - a = THREAD_1DARRAY("thread_1d",s); \ + a = THREAD_1DARRAY(Kokkos::view_alloc("thread_1d", Kokkos::SequentialHostInit),s); \ if(a.data() == NULL) \ throw std::bad_alloc(); \ } \ } -#define MALLOC_THREAD_2DARRAY(a,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT MALLOC thread_2d: size to alloc >= 0 fails"); \ - if (s > 0) { \ - a = THREAD_2DARRAY("thread_2d",s); \ - if(a.data() == NULL) \ - throw std::bad_alloc(); \ - } \ - } -//RESIZE (with copy) -#define RESIZE_1DARRAY(a,os,s) \ - { \ - BASKER_ASSERT(s >= 0, "BASKER ASSERT RESIZE 1D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s); \ - } -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) \ - { \ - BASKER_ASSERT(s1 >= 0 && s2 >= 0, "BASKER ASSERT RESIZE 2D ARRAY: size to alloc >= 0 fails"); \ - Kokkos::resize(a,s1,s2); \ - } -#define RESIZE_INT_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) -#define RESIZE_ENTRY_1DARRAY(a,os,s) RESIZE_1DARRAY(a,os,s) + //REALLOC (no copy) #define REALLOC_1DARRAY(a,os,s) \ { \ @@ -310,6 +289,7 @@ enum BASKER_INCOMPLETE_CODE } #define REALLOC_INT_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) #define REALLOC_ENTRY_1DARRAY(a,os,s) REALLOC_1DARRAY(a,os,s) + //Set values #define SET_INT_1DARRAY(a, b, s) \ { \ @@ -334,77 +314,73 @@ enum BASKER_INCOMPLETE_CODE #define FREE(a) BASKER_NO_OP -#define FREE_INT_1DARRAY_PAIRS(a) \ - { \ - a = INT_1DARRAY_PAIRS(); \ +#define FREE_INT_1DARRAY_PAIRS(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_1DARRAY(a) \ - { \ - a = INT_1DARRAY(); \ +#define FREE_INT_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_RANK2DARRAY(a) \ - { \ - a = INT_RANK2DARRAY(); \ +#define FREE_INT_RANK2DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_INT_2DARRAY(a,n) \ - { \ - a = INT_2DARRAY(); \ +#define FREE_INT_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_1DARRAY(a) \ - { \ - a = ENTRY_1DARRAY(); \ +#define FREE_ENTRY_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_ENTRY_2DARRAY(a,n) \ - { \ - a = ENTRY_2DARRAY(); \ +#define FREE_ENTRY_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_1DARRAY(a) \ - { \ - a = BOOL_1DARRAY(); \ +#define FREE_BOOL_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_BOOL_2DARRAY(a,n) \ - { \ - a = BOOL_2DARRAY(); \ +#define FREE_BOOL_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_1DARRAY(a) \ - { \ - a = MATRIX_1DARRAY(); \ +#define FREE_MATRIX_1DARRAY(a) \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_2DARRAY(a,n) \ - { \ - a = MATRIX_2DARRAY(); \ +#define FREE_MATRIX_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } #define FREE_MATRIX_VIEW_1DARRAY(a) \ - { \ - a = MATRIX_VIEW_1DARRAY(); \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ - { \ - a = MATRIX_VIEW_2DARRAY(); \ +#define FREE_MATRIX_VIEW_2DARRAY(a,n) \ + { \ + Kokkos::resize(a,0); \ } #define FREE_THREAD_1DARRAY(a) \ - { \ - a = THREAD_1DARRAY(); \ + { \ + Kokkos::resize(a,0); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - a = TRHEAD_2DARRAY(); \ - } +#else // not BASKER_KOKKOS -#else //Execution Space #define BASKER_EXE_SPACE void* //ReMacro Basker Classes @@ -428,7 +404,6 @@ enum BASKER_INCOMPLETE_CODE #define MATRIX_VIEW_1DARRAY BASKER_MATRIX_VIEW* #define MATRIX_VIEW_2DARRAY BASKER_MATRIX_VIEW** #define THREAD_1DARRAY BASKER_THREAD* -#define THREAD_2DARRAY BASKER_THREAD** //Macro Memory Calls //Malloc @@ -443,12 +418,6 @@ enum BASKER_INCOMPLETE_CODE #define MALLOC_MATRIX_VIEW_1DARRAY(a,s) a = new BASKER_MATRIX_VIEW [s] #define MALLOC_MATRIX_VIEW_2DARRAY(a,s) a = new MATRIX_VIEW_1DARRAY[s] #define MALLOC_THREAD_1DARRAY(a,s) a = new BASKER_THREAD [s] -#define MALLOC_THREAD_2DARRAY(a,s) a = new THREAD_1DARRAY [s] -//Resize (copy old data) (come back and add) -#define RESIZE_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP -#define RESIZE_INT_1DARRAY(a,os,s) BASKER_NO_OP -#define RESIZE_ENTRY_1DARRAY(a,os,s) BASKER_NO_OP //Realloc (dont copy old data) #define REALLOC_1DARRAY(a,os,s) BASKER_NO_OP #define REALLOC_2DARRAY(a,os1,os2,s1,s2) BASKER_NO_OP @@ -525,13 +494,6 @@ enum BASKER_INCOMPLETE_CODE FREE(a); \ } -#define FREE_THREAD_2DARRAY(a,n) \ - { \ - for(BASKER_INT MACRO_I = 0; MACRO_I < s; MACRO_I++) \ - FREE(a[MACRO_I]); \ - FREE(a); \ - } - #endif //end ifdef BASKER_KOKKOS //Inline command diff --git a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp index 130f62ea6127..455b76004a98 100644 --- a/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp +++ b/packages/shylu/shylu_node/basker/src/shylubasker_util.hpp @@ -252,11 +252,11 @@ namespace BaskerNS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -291,12 +291,11 @@ namespace BaskerNS #ifdef BASKER_KOKKOS typedef Kokkos::TeamPolicy TeamPolicy; typedef typename TeamPolicy::member_type TeamMember; - Kokkos::parallel_for( - TeamPolicy(Exe_Space::thread_pool_size(),1), - KOKKOS_LAMBDA(const TeamMember& thread) + Kokkos::parallel_for(TeamPolicy(Exe_Space::thread_pool_size(),1), + KOKKOS_LAMBDA(const TeamMember& thread) #else #pragma omp parallel - #endif + #endif { #ifdef BASKER_KOKKOS if(kid == thread.league_rank()) @@ -365,7 +364,7 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif LL(b)(row).clear_pend(); @@ -384,7 +383,7 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif //LU(b)(LU_size(b)-1).nnz = 0; @@ -417,7 +416,7 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif for(Int kk = 0; kk < LU(U_col)(U_row).ncol+1; kk++) @@ -455,8 +454,8 @@ namespace BaskerNS Kokkos::Timer timer_init_matrixL; Kokkos::Timer timer_fill_matrixL; timer_initL.reset(); + printf( " > t_init_factor( tid = %d, nlvls = %d ) <\n",kid,tree.nlvls+1 ); fflush(stdout); #endif - //printf( " > t_init_factor( tid = %d ) <\n",kid ); for(Int lvl = 0; lvl < tree.nlvls+1; lvl++) { if(kid%((Int)pow(2,lvl)) == 0) @@ -467,13 +466,13 @@ namespace BaskerNS { #ifdef BASKER_DEBUG_INIT printf("L Factor Init: %d %d , kid: %d, nnz: %ld \n", - b, row, kid, LL[b][row].nnz); + b, row, kid, LL(b)(row).nnz); #endif #ifdef BASKER_TIMER timer_init_matrixL.reset(); + printf( " ++ lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d ++\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); fflush(stdout); #endif - //printf( " lvl=%d: LL(%d,%d): nnz=%d, mnnz=%d\n",(int)lvl, (int)b, (int)row, (int)LL(b)(row).nnz, (int)LL(b)(row).mnnz); LL(b)(row).init_matrix("Loffdig", LL(b)(row).srow, LL(b)(row).nrow, @@ -481,6 +480,7 @@ namespace BaskerNS LL(b)(row).ncol, LL(b)(row).nnz); #ifdef BASKER_TIMER + printf( " >> LL(%d,%d).init_matrix done <<\n",b,row ); fflush(stdout); init_matrixL_time += timer_init_matrixL.seconds(); #endif @@ -491,15 +491,19 @@ namespace BaskerNS } #ifdef BASKER_TIMER timer_fill_matrixL.reset(); + printf( " ++ zero out (%d) ++\n",int(LL(b)(row).col_ptr.extent(0)) ); fflush(stdout); #endif //LL(b)(row).fill(); - Kokkos::deep_copy(LL(b)(row).col_ptr, 0); + LL(b)(row).init_ptr(); + //Kokkos::deep_copy(LL(b)(row).col_ptr, 0); #ifdef BASKER_TIMER + printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); fflush(stdout); fill_matrixL_time += timer_fill_matrixL.seconds(); #endif - //printf( " LL(%d)(%d).init_pend(ncol = %d)\n",b,row,LL(b)(row).ncol ); LL(b)(row).init_pend(); - + #ifdef BASKER_TIMER + printf( " (b=%d: row=%d) done\n\n",b,row ); fflush(stdout); + #endif }//end over all row }//end select which thread }//end for over all lvl @@ -508,6 +512,7 @@ namespace BaskerNS std::cout << " > Basker t_init_factor::initL(" << kid << "): time: " << initL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::initMatrix(" << kid << "): time: " << init_matrixL_time << std::endl; std::cout << " > + Basker t_init_factor::initL::fillMatrix(" << kid << "): time: " << fill_matrixL_time << std::endl; + fflush(stdout); #endif //U @@ -524,10 +529,14 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("U Factor init: %d %d, nnz: %ld \n", b, LU_size[b]-1, - LU[b][LU_size[b]-1].nnz); + LU(b)(LU_size[b]-1).nnz); #endif - //printf( " lvl=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)lvl, (int)b, (int)LU_size(b)-1, (int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz); + #ifdef BASKER_TIMER + printf( " lvl=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d, at (%d,%d)\n", (int)lvl, (int)b, (int)LU_size(b)-1, + (int)LU(b)(LU_size(b)-1).nrow,(int)LU(b)(LU_size(b)-1).ncol,(int)LU(b)(LU_size(b)-1).nnz, (int)LU(b)(LU_size(b)-1).mnnz, + (int)LU(b)(LU_size(b)-1).srow,(int)LU(b)(LU_size(b)-1).scol); + #endif LU(b)(LU_size(b)-1).init_matrix("Udiag", LU(b)(LU_size(b)-1).srow, LU(b)(LU_size(b)-1).nrow, @@ -536,7 +545,8 @@ namespace BaskerNS LU(b)(LU_size(b)-1).nnz); //LU(b)(LU_size(b)-1).fill(); - Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); + LU(b)(LU_size(b)-1).init_ptr(); + //Kokkos::deep_copy(LU(b)(LU_size(b)-1).col_ptr, 0); for(Int l = lvl+1; l < tree.nlvls+1; l++) { @@ -573,10 +583,15 @@ namespace BaskerNS #ifdef BASKER_DEBUG_INIT printf("Init U: %d %d lvl: %d l: %d kid: %d nnz: %ld \n", U_col, U_row, lvl, l, kid, - LU[U_col][U_row].nnz); + LU(U_col)(U_row).nnz); #endif - //printf( " > l=%d: LU(%d,%d): nnz=%d, mnnz=%d\n", (int)l, (int)U_col, (int)U_row, (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz); + #ifdef BASKER_TIMER + printf( " +++ l=%d: LU(%d,%d): %dx%d, nnz=%d, mnnz=%d at (%d,%d)\n", (int)l, (int)U_col, (int)U_row, + (int)LU(U_col)(U_row).nrow,(int)LU(U_col)(U_row).ncol, + (int)LU(U_col)(U_row).nnz, (int)LU(U_col)(U_row).mnnz, + (int)LU(U_col)(U_row).srow,(int)LU(U_col)(U_row).scol); + #endif LU(U_col)(U_row).init_matrix("Uoffdiag", LU(U_col)(U_row).srow, LU(U_col)(U_row).nrow, @@ -585,7 +600,8 @@ namespace BaskerNS LU(U_col)(U_row).nnz); //LU(U_col)(U_row).fill(); - Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); + LU(U_col)(U_row).init_ptr(); + //Kokkos::deep_copy(LU(U_col)(U_row).col_ptr, 0); if(Options.incomplete == BASKER_TRUE) { @@ -775,7 +791,7 @@ namespace BaskerNS { //printf(" %d: Using BTF AVM(%d,%d), %dx%d\n",kid,U_col,U_row, AVM(U_col)(U_row).nrow,AVM(U_col)(U_row).ncol); //printf("2nd convert AVM: %d %d size:%d kid: %d\n", - // U_col, U_row, AVM(U_col)(U_row).nnz, + // U_col, U_row, AVM(U_col)(U_row).nnz, // kid); AVM(U_col)(U_row).convert2D(BTF_A, alloc, kid); //printf(" %d: Using BTF AU(%d,%d) done\n",kid,U_col,U_row); @@ -859,19 +875,20 @@ namespace BaskerNS //printf( " kid=%d :: LL(%d, %d).fill\n",kid, b,l ); //LL(b)(l).fill(); - Kokkos::deep_copy(LL(b)(l).col_ptr, 0); + LL(b)(l).init_ptr(); + //Kokkos::deep_copy(LL(b)(l).col_ptr, 0); if(l==0) { //Also workspace matrix //This could be made smaller //printf("C: size: %d kid: %d \n", - // iws_size, kid); + // iws_size, kid); - //thread_array[kid].C.init_matrix("cwork", - // 0, iws_size, - // 0, 2, - // iws_size*2); + //thread_array(kid).C.init_matrix("cwork", + // 0, iws_size, + // 0, 2, + // iws_size*2); } } //end for l } @@ -888,19 +905,19 @@ namespace BaskerNS { // if any left over for BLK factorization if(Options.btf == BASKER_TRUE) { - Int iws_mult = thread_array[kid].iws_mult; - Int iws_size = thread_array[kid].iws_size; - Int ews_mult = thread_array[kid].ews_mult; - Int ews_size = thread_array[kid].ews_size; + Int iws_mult = thread_array(kid).iws_mult; + Int iws_size = thread_array(kid).iws_size; + Int ews_mult = thread_array(kid).ews_mult; + Int ews_size = thread_array(kid).ews_size; for(Int i=0; i < iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0.0; + thread_array(kid).ews[i] = 0.0; } } } @@ -920,14 +937,14 @@ namespace BaskerNS } } printf("init_workspace 1d, kid: %d size: %d %d %d %d \n", - kid, iws_mult, iws_size, ews_mult, ews_size); + kid, iws_mult, iws_size, ews_mult, ews_size); for(Int i=0; i< iws_mult*iws_size; i++) { - thread_array[kid].iws[i] = 0; + thread_array(kid).iws[i] = 0; } for(Int i = 0; i < ews_mult*ews_size; i++) { - thread_array[kid].ews[i] = 0; + thread_array(kid).ews[i] = 0; } #endif //endif def basker_2dl //return 0; @@ -995,7 +1012,7 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { - BASKER_MATRIX &myL = LL[l][0]; + BASKER_MATRIX &myL = LL(l)(0); for(Int k = 0; k < myL.ncol; k++) { @@ -1033,13 +1050,13 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k=0; k < LL[l][0].ncol; k++) + for(Int k=0; k < LL(l)(0).ncol; k++) { - fprintf(fp, "k=%ld \n", (long)k+LL[l][0].scol); + fprintf(fp, "k=%ld \n", (long)k+LL(l)(0).scol); for(Int r = 0; r < LL_size[l]; r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); for(Int j = myL.col_ptr[k]; j < myL.col_ptr[k+1]; j++) { fprintf(fp, "(%ld , %ld , %ld, %ld, %ld) %g , ", @@ -1109,12 +1126,12 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k=0; k < LL[l][0].ncol; k++) + for(Int k=0; k < LL(l)(0).ncol; k++) { - //fprintf(fp, "k=%d \n", k+LL[l][0].scol); + //fprintf(fp, "k=%d \n", k+LL(l)(0).scol); for(Int r = 0; r < LL_size[l]; r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); for(Int j = myL.col_ptr[k]; j < myL.col_ptr[k+1]; j++) { @@ -1167,12 +1184,12 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k = 0; k < LU[l][0].ncol; k++) + for(Int k = 0; k < LU(l)(0).ncol; k++) { //over each row of U for(Int r = 0; r < LU_size[l]; r++) { - BASKER_MATRIX &myU = LU[l][r]; + BASKER_MATRIX &myU = LU(l)(r); //over each nnz in column (k) of local U for(Int j = myU.col_ptr[k]; j < myU.col_ptr[k+1]; j++) @@ -1196,7 +1213,7 @@ namespace BaskerNS Int nblks = btf_nblks-btf_tabs_offset; for(Int i =0; i < nblks; i++) { - BASKER_MATRIX &myU = UBTF[i]; + BASKER_MATRIX &myU = UBTF(i); for(Int k = 0; k < myU.ncol; k++) { for(Int j = myU.col_ptr[k]; j< myU.col_ptr[k+1]; j++) @@ -1230,14 +1247,14 @@ namespace BaskerNS for(Int l = 0; l < tree.nblks; l++) { //over each column - for(Int k = 0; k < LU[l][0].ncol; k++) + for(Int k = 0; k < LU(l)(0).ncol; k++) { - fprintf(fp, "k=%ld \n", (long)k+LU[l][0].scol); + fprintf(fp, "k=%ld \n", (long)k+LU(l)(0).scol); //over each row of U for(Int r = 0; r < LU_size[l]; r++) { - BASKER_MATRIX &myU = LU[l][r]; + BASKER_MATRIX &myU = LU(l)(r); //over each nnz in column (k) of local U for(Int j = myU.col_ptr[k]; j < myU.col_ptr[k+1]; j++) @@ -1261,7 +1278,7 @@ namespace BaskerNS Int nblks = btf_nblks-btf_tabs_offset; for(Int i =0; i < nblks; i++) { - BASKER_MATRIX &myU = UBTF[i]; + BASKER_MATRIX &myU = UBTF(i); for(Int k = 0; k < myU.ncol; k++) { fprintf(fp, "k=%ld \n", (long)k+myU.scol); @@ -1304,7 +1321,7 @@ namespace BaskerNS fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(fp, "%%Generated by **Basker** \n"); fprintf(fp, "%%Starting Row %ld Starting Col %ld \n", - (long)M.srow, (long)M.scol); + (long)M.srow, (long)M.scol); fprintf(fp, "%ld %ld %ld \n", (long)M.nrow, (long)M.ncol, (long)M.nnz); Int bcol=M.scol; @@ -1334,7 +1351,7 @@ namespace BaskerNS fprintf(fp, "%%%%MatrixMarket matrix coordinate real general\n"); fprintf(fp, "%%Generated by **Basker** \n"); fprintf(fp, "%%Starting Row %d Starting Col %d \n", - M.srow, M.scol); + M.srow, M.scol); fprintf(fp, "%ld %ld %ld \n", (long)M.nrow, (long)M.ncol, (long)M.nnz); Int bcol=M.scol; @@ -1455,61 +1472,6 @@ namespace BaskerNS }//end readMTX() - //Print out RHS RHS.txt - template - int Basker::printRHS() - { - if(solve_flag == false) - {return -1;} - - FILE *fp; - fp = fopen("RHS.txt", "w"); - - //over each row - for(Int r = 0; r < A.nrow; r++) - { - //over each column NOTE: come back to - //for(Int k = 0; k < rhs.size(); k++) - for(Int k = 0; k < 1; k++) - { - //fprintf(fp, "%ld %ld %f, ", (long)r, (long)gperm[r], rhs[k][r]); - fprintf(fp, "%ld %ld %.16e, ", (long)r, (long)gperm[r], rhs[k][r]); - }//end over each column - fprintf(fp, "\n"); - }//end over each row - - fclose(fp); - - return 0; - }//end printRHS() - - //Print solution SOL.txt - template - int Basker::printSOL() - { - if(solve_flag == false) - {return -1;} - - FILE *fp; - fp = fopen("SOL.txt", "w"); - - //over each row - for(Int r = 0; r < A.nrow; r++) - { - //over each column Note: come back to - //for(Int k = 0; k < rhs.size(); k++) - for(Int k = 0 ; k < 1; k++) - { - fprintf(fp, "%ld %ld %f, ", (long)r, (long)gperm[r], sol[k][r]); - }//end over each column - fprintf(fp, "\n"); - }//end over each row - - fclose(fp); - - return 0; - }//end printSOL() - //Prints the given tree into a file to analyze template void Basker::printTree() @@ -1721,7 +1683,7 @@ namespace BaskerNS { for(Int r = 0; r < LL_size(l); r++) { - BASKER_MATRIX &myL = LL[l][r]; + BASKER_MATRIX &myL = LL(l)(r); Int brow = myL.srow; Int bcol = myL.scol; @@ -2354,7 +2316,7 @@ namespace BaskerNS ) { return (Int)(thread.league_rank()*thread.team_size()+ - thread.team_rank()); + thread.team_rank()); }//end t_get_kid @@ -2477,4 +2439,5 @@ namespace BaskerNS }//end namespace basker +#undef BASKER_TIMER #endif //end basker_util_hpp diff --git a/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp b/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp index fae32b32a9f2..a882494097e3 100644 --- a/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp +++ b/packages/teuchos/core/src/Teuchos_ConstNonconstObjectContainer.hpp @@ -328,9 +328,6 @@ class ConstNonconstObjectContainer { /** \brief Perform an implicit conversion to an RCP. */ operator RCP() const { return getConstObj(); } - /** \brief Return the internal count. */ - int count() const - { return constObj_.count(); } private: RCP constObj_; diff --git a/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp b/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp index 64269d97d7f9..ec909ed0aef9 100644 --- a/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp +++ b/packages/tpetra/core/src/Tpetra_ConfigDefs.hpp @@ -198,6 +198,24 @@ namespace Tpetra { Backward, Symmetric }; + + // FE* enums + namespace FE { + + // Enum for activity + enum WhichActive + { + ACTIVE_OWNED, + ACTIVE_OWNED_PLUS_SHARED + }; + + enum class FillState + { + open, // matrix is "open". Values can freely summed in to and replaced + modify, // matrix is open for modification. *local* values can be replaced + closed + }; + } } // For backwards compatibility diff --git a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp index f0eef6b3b32e..a88b5ca649ba 100644 --- a/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_CrsMatrix_def.hpp @@ -47,6 +47,7 @@ #include "KokkosBlas1_scal.hpp" #include "KokkosSparse_getDiagCopy.hpp" #include "KokkosSparse_spmv.hpp" +#include "Kokkos_StdAlgorithms.hpp" #include #include @@ -8301,24 +8302,16 @@ CrsMatrix:: << std::endl; std::cerr << os.str (); } - // Make sure that host has the latest version, since we're - // using the version on host. If host has the latest - // version, syncing to host does nothing. - destMat->numExportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numExportPacketsPerLID = - getArrayViewFromDualView (destMat->numExportPacketsPerLID_); - destMat->numImportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numImportPacketsPerLID = - getArrayViewFromDualView (destMat->numImportPacketsPerLID_); - + destMat->numExportPacketsPerLID_.sync_device(); + auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device(); + auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device(); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Calling 3-arg doReversePostsAndWaits" << std::endl; std::cerr << os.str (); } - Distor.doReversePostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1, - destMat->numImportPacketsPerLID_.view_host()); + Distor.doReversePostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Finished 3-arg doReversePostsAndWaits" @@ -8326,34 +8319,26 @@ CrsMatrix:: std::cerr << os.str (); } - size_t totalImportPackets = 0; - for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) { - totalImportPackets += numImportPacketsPerLID[i]; - } + size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID); // Reallocation MUST go before setting the modified flag, // because it may clear out the flags. destMat->reallocImportsIfNeeded (totalImportPackets, verbose, verbosePrefix.get ()); destMat->imports_.modify_host (); - auto hostImports = destMat->imports_.view_host(); - // This is a legacy host pack/unpack path, so use the host - // version of exports_. - destMat->exports_.sync_host (); - auto hostExports = destMat->exports_.view_host(); + auto deviceImports = destMat->imports_.view_device(); + auto deviceExports = destMat->exports_.view_device(); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaits" + os << *verbosePrefix << "Calling 4-arg doReversePostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } - Distor.doReversePostsAndWaits (hostExports, - numExportPacketsPerLID, - hostImports, - numImportPacketsPerLID); + destMat->imports_.sync_device(); + Distor.doReversePostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaits" + os << *verbosePrefix << "Finished 4-arg doReversePostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } @@ -8396,23 +8381,16 @@ CrsMatrix:: << std::endl; std::cerr << os.str (); } - // Make sure that host has the latest version, since we're - // using the version on host. If host has the latest - // version, syncing to host does nothing. - destMat->numExportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numExportPacketsPerLID = - getArrayViewFromDualView (destMat->numExportPacketsPerLID_); - destMat->numImportPacketsPerLID_.sync_host (); - Teuchos::ArrayView numImportPacketsPerLID = - getArrayViewFromDualView (destMat->numImportPacketsPerLID_); + destMat->numExportPacketsPerLID_.sync_device (); + auto numExportPacketsPerLID = destMat->numExportPacketsPerLID_.view_device(); + auto numImportPacketsPerLID = destMat->numImportPacketsPerLID_.view_device(); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Calling 3-arg doPostsAndWaits" << std::endl; std::cerr << os.str (); } - Distor.doPostsAndWaits(destMat->numExportPacketsPerLID_.view_host(), 1, - destMat->numImportPacketsPerLID_.view_host()); + Distor.doPostsAndWaits(numExportPacketsPerLID, 1, numImportPacketsPerLID); if (verbose) { std::ostringstream os; os << *verbosePrefix << "Finished 3-arg doPostsAndWaits" @@ -8420,34 +8398,26 @@ CrsMatrix:: std::cerr << os.str (); } - size_t totalImportPackets = 0; - for (Array_size_type i = 0; i < numImportPacketsPerLID.size (); ++i) { - totalImportPackets += numImportPacketsPerLID[i]; - } + size_t totalImportPackets = Kokkos::Experimental::reduce(typename Node::execution_space(), numImportPacketsPerLID); // Reallocation MUST go before setting the modified flag, // because it may clear out the flags. destMat->reallocImportsIfNeeded (totalImportPackets, verbose, verbosePrefix.get ()); destMat->imports_.modify_host (); - auto hostImports = destMat->imports_.view_host(); - // This is a legacy host pack/unpack path, so use the host - // version of exports_. - destMat->exports_.sync_host (); - auto hostExports = destMat->exports_.view_host(); + auto deviceImports = destMat->imports_.view_device(); + auto deviceExports = destMat->exports_.view_device(); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Calling 4-arg doPostsAndWaits" + os << *verbosePrefix << "Calling 4-arg doPostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } - Distor.doPostsAndWaits (hostExports, - numExportPacketsPerLID, - hostImports, - numImportPacketsPerLID); + destMat->imports_.sync_device (); + Distor.doPostsAndWaitsKokkos (deviceExports, numExportPacketsPerLID, deviceImports, numImportPacketsPerLID); if (verbose) { std::ostringstream os; - os << *verbosePrefix << "Finished 4-arg doPostsAndWaits" + os << *verbosePrefix << "Finished 4-arg doPostsAndWaitsKokkos" << std::endl; std::cerr << os.str (); } @@ -8494,12 +8464,6 @@ CrsMatrix:: Teuchos::Array RemotePids; if (runOnHost) { Teuchos::Array TargetPids; - // Backwards compatibility measure. We'll use this again below. - - // TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been) - // TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits(). - // TODO JHU This only becomes apparent as we begin to convert TAFC to run on device. - destMat->numImportPacketsPerLID_.modify_host(); //FIXME # ifdef HAVE_TPETRA_MMM_TIMINGS RCP tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data")))); @@ -8691,14 +8655,6 @@ CrsMatrix:: } else { // run on device - - // Backwards compatibility measure. We'll use this again below. - - // TODO JHU Need to track down why numImportPacketsPerLID_ has not been corrently marked as modified on host (which it has been) - // TODO JHU somewhere above, e.g., call to Distor.doPostsAndWaits(). - // TODO JHU This only becomes apparent as we begin to convert TAFC to run on device. - destMat->numImportPacketsPerLID_.modify_host(); //FIXME - # ifdef HAVE_TPETRA_MMM_TIMINGS RCP tmCopySPRdata = rcp(new TimeMonitor(*TimeMonitor::getNewTimer(prefix + std::string("TAFC unpack-count-resize + copy same-perm-remote data")))); # endif diff --git a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp index 9b021ac53e9b..24e8351a6133 100644 --- a/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp +++ b/packages/tpetra/core/src/Tpetra_Details_DistributorActor.hpp @@ -22,6 +22,7 @@ #include "Teuchos_Time.hpp" #include "Kokkos_TeuchosCommAdapters.hpp" +#include "Kokkos_StdAlgorithms.hpp" #ifdef HAVE_TPETRA_MPI #include "mpi.h" @@ -53,6 +54,13 @@ class DistributorActor { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + void doPostsAndWaitsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + template void doPosts(const DistributorPlan& plan, const ExpView& exports, @@ -66,6 +74,27 @@ class DistributorActor { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + void doPostsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + + template + void doPostsAllToAllKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + + template + void doPostsNbrAllToAllVKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + void doWaits(const DistributorPlan& plan); bool isReady() const; @@ -147,6 +176,22 @@ void DistributorActor::doPostsAndWaits(const DistributorPlan& plan, doWaits(plan); } + +template +void DistributorActor::doPostsAndWaitsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) +{ + static_assert(areKokkosViews, + "Data arrays for DistributorActor::doPostsAndWaitsKokkos must be Kokkos::Views"); + static_assert(areKokkosViews, + "Num packets arrays for DistributorActor::doPostsAndWaitsKokkos must be Kokkos::Views"); + doPostsKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + doWaits(plan); +} + template using HostAccessibility = Kokkos::SpaceAccessibility; @@ -760,6 +805,140 @@ void DistributorActor::doPostsAllToAll( << "\"."); } +template +void DistributorActor::doPostsAllToAllKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) { + TEUCHOS_TEST_FOR_EXCEPTION( + !plan.getIndicesTo().is_null(), std::runtime_error, + "Send Type=\"Alltoall\" only works for fast-path communication."); + + using size_type = Teuchos::Array::size_type; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + + auto comm = plan.getComm(); + Kokkos::View sendcounts("sendcounts", comm->getSize()); + Kokkos::View sdispls("sdispls", comm->getSize()); + Kokkos::View recvcounts("recvcounts", comm->getSize()); + Kokkos::View rdispls("rdispls", comm->getSize()); + + auto sendcounts_d = Kokkos::create_mirror_view(ExpExecSpace(), sendcounts); + auto sdispls_d = Kokkos::create_mirror_view(ExpExecSpace(), sdispls); + auto recvcounts_d = Kokkos::create_mirror_view(ImpExecSpace(), recvcounts); + auto rdispls_d = Kokkos::create_mirror_view(ImpExecSpace(), rdispls); + + auto getStartsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto getLengthsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + auto getProcsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getProcsTo()); + + size_t curPKToffset = 0; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& offset, bool is_final) { + sdispls_d(getProcsTo(pp)) = offset; + size_t numPackets = 0; + for (size_t j = getStartsTo(pp); j < getStartsTo(pp) + getLengthsTo(pp); ++j) { + numPackets += numExportPacketsPerLID(j); + } + sendcounts_d(getProcsTo(pp)) = static_cast(numPackets); + offset += numPackets; + }, curPKToffset); + + int overflow; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, int& index) { + if(sendcounts_d(getProcsTo(pp)) < 0) { + index = pp+1; + } + }, overflow); + + // numPackets is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Send count for send " + << overflow-1 << " is too large " + "to be represented as int."); + + const size_type actualNumReceives = + Teuchos::as(plan.getNumReceives()) + + Teuchos::as(plan.hasSelfMessage() ? 1 : 0); + + auto getLengthsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsFrom()); + auto getProcsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getProcsFrom()); + + Kokkos::View curLIDoffset("curLIDoffset", actualNumReceives); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, size_t& offset, bool is_final) { + if(is_final) curLIDoffset(i) = offset; + offset += getLengthsFrom(i); + }); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, size_t& curBufferOffset, bool is_final) { + size_t totalPacketsFrom_i = 0; + for(size_t j = 0; j < getLengthsFrom(i); j++) { + totalPacketsFrom_i += numImportPacketsPerLID(curLIDoffset(i) + j); + } + + if(is_final) rdispls_d(getProcsFrom(i)) = curBufferOffset; + if(is_final) recvcounts_d(getProcsFrom(i)) = static_cast(totalPacketsFrom_i); + curBufferOffset += totalPacketsFrom_i; + }); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, actualNumReceives), KOKKOS_LAMBDA(const size_type i, int& index) { + if(recvcounts_d(getProcsFrom(i)) < 0) { + index = i+1; + } + }, overflow); + + // totalPacketsFrom_i is converted down to int, so make sure it can be + // represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Recv count for receive " + << overflow-1 << " is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendcounts, sendcounts_d); + Kokkos::deep_copy(sdispls, sdispls_d); + Kokkos::deep_copy(recvcounts, recvcounts_d); + Kokkos::deep_copy(rdispls, rdispls_d); + + Teuchos::RCP> mpiComm = + Teuchos::rcp_dynamic_cast>(comm); + Teuchos::RCP> rawComm = + mpiComm->getRawMpiComm(); + using T = typename ExpView::non_const_value_type; + MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits::getType(T()); + +#if defined(HAVE_TPETRACORE_MPI_ADVANCE) + if (Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL == plan.getSendType()) { + MPIX_Comm *mpixComm = *plan.getMPIXComm(); + TEUCHOS_TEST_FOR_EXCEPTION(!mpixComm, std::runtime_error, + "MPIX_Comm is null in doPostsAllToAll \"" + << __FILE__ << ":" << __LINE__); + + const int err = MPIX_Alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPIX_Alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); + + return; + } +#endif // HAVE_TPETRACORE_MPI_ADVANCE + + const int err = MPI_Alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, (*rawComm)()); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPI_Alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); +} + #if defined(HAVE_TPETRACORE_MPI_ADVANCE) template void DistributorActor::doPostsNbrAllToAllV( @@ -840,6 +1019,117 @@ void DistributorActor::doPostsNbrAllToAllV( << Teuchos::mpiErrorCodeToString(err) << "\"."); } + +template +void DistributorActor::doPostsNbrAllToAllVKokkos( + const DistributorPlan &plan, const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) { + TEUCHOS_TEST_FOR_EXCEPTION( + !plan.getIndicesTo().is_null(), std::runtime_error, + "Send Type=\"Alltoall\" only works for fast-path communication."); + + const Teuchos_Ordinal numSends = plan.getProcsTo().size(); + const Teuchos_Ordinal numRecvs = plan.getProcsFrom().size(); + + auto comm = plan.getComm(); + Kokkos::View sendcounts("sendcounts", comm->getSize()); + Kokkos::View sdispls("sdispls", comm->getSize()); + Kokkos::View recvcounts("recvcounts", comm->getSize()); + Kokkos::View rdispls("rdispls", comm->getSize()); + + auto sendcounts_d = Kokkos::create_mirror_view(ExpExecSpace(), sendcounts); + auto sdispls_d = Kokkos::create_mirror_view(ExpExecSpace(), sdispls); + auto recvcounts_d = Kokkos::create_mirror_view(ImpExecSpace(), recvcounts); + auto rdispls_d = Kokkos::create_mirror_view(ImpExecSpace(), rdispls); + + auto getStartsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto getLengthsTo = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + + Teuchos::RCP> mpiComm = + Teuchos::rcp_dynamic_cast>(comm); + Teuchos::RCP> rawComm = + mpiComm->getRawMpiComm(); + using T = typename ExpView::non_const_value_type; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + MPI_Datatype rawType = ::Tpetra::Details::MpiTypeTraits::getType(T()); + + // unlike standard alltoall, entry `i` in sdispls and sendcounts + // refer to the ith participating rank, rather than rank i + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numSends), KOKKOS_LAMBDA(const Teuchos_Ordinal pp, size_t& curPKToffset, bool is_final) { + sdispls_d(pp) = curPKToffset; + size_t numPackets = 0; + for (size_t j = getStartsTo(pp); j < getStartsTo(pp) + getLengthsTo(pp); ++j) { + numPackets += numExportPacketsPerLID(j); + } + sendcounts_d(pp) = static_cast(numPackets); + curPKToffset += numPackets; + }); + + int overflow; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numSends), KOKKOS_LAMBDA(const Teuchos_Ordinal pp, int& index) { + if(sendcounts_d(pp) < 0) { + index = i+1; + } + }, overflow); + + // numPackets is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Send count for send " + << overflow-1 << " is too large " + "to be represented as int."); + + auto getLengthsFrom = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsFrom()); + + Kokkos::View curLIDoffset("curLIDoffset", numRecvs); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, size_t& offset, bool is_final) { + if(is_final) curLIDoffset(i) = offset; + offset += getLengthsFrom(i); + }); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, size_t& curBufferOffset, bool is_final) { + rdispls_d(i) = curBufferOffset; + size_t totalPacketsFrom_i = 0; + for(size_t j = 0; j < getLengthsFrom(i); j++) { + totalPacketsFrom_i += numImportPacketsPerLID(curLIDoffset(i) + j); + } + + recvcounts_d(i) = static_cast(totalPacketsFrom_i); + curBufferOffset += totalPacketsFrom_i; + }); + + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, numRecvs), KOKKOS_LAMBDA(const Teuchos_Ordinal i, int& index) { + if(recvcounts_d(pp) < 0) { + index = i+1; + } + }, overflow); + + // totalPacketsFrom_i is converted down to int, so make sure it can be + // represented + TEUCHOS_TEST_FOR_EXCEPTION(overflow, std::logic_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "Recv count for receive " + << overflow-1 << ") is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendcounts, sendcounts_d); + Kokkos::deep_copy(sdispls, sdispls_d); + Kokkos::deep_copy(recvcounts, recvcounts_d); + Kokkos::deep_copy(rdispls, rdispls_d); + + MPIX_Comm *mpixComm = *plan.getMPIXComm(); + const int err = MPIX_Neighbor_alltoallv( + exports.data(), sendcounts.data(), sdispls.data(), rawType, + imports.data(), recvcounts.data(), rdispls.data(), rawType, mpixComm); + + TEUCHOS_TEST_FOR_EXCEPTION(err != MPI_SUCCESS, std::runtime_error, + "MPIX_Neighbor_alltoallv failed with error \"" + << Teuchos::mpiErrorCodeToString(err) + << "\"."); +} #endif // HAVE_TPETRACORE_MPI_ADVANCE #endif // HAVE_TPETRA_MPI // clang-format off @@ -1107,16 +1397,16 @@ void DistributorActor::doPosts(const DistributorPlan& plan, // This buffer is long enough for only one message at a time. // Thus, we use DISTRIBUTOR_SEND always in this case, regardless - // of sendType requested by user. + // of sendType requested by user. // This code path formerly errored out with message: - // Tpetra::Distributor::doPosts(4-arg, Kokkos): + // Tpetra::Distributor::doPosts(4-arg, Kokkos): // The "send buffer" code path // doesn't currently work with nonblocking sends. // Now, we opt to just do the communication in a way that works. #ifdef HAVE_TPETRA_DEBUG if (sendType != Details::DISTRIBUTOR_SEND) { if (plan.getComm()->getRank() == 0) - std::cout << "The requested Tpetra send type " + std::cout << "The requested Tpetra send type " << DistributorSendTypeEnumToString(sendType) << " requires Distributor data to be ordered by" << " the receiving processor rank. Since these" @@ -1125,7 +1415,7 @@ void DistributorActor::doPosts(const DistributorPlan& plan, } #endif - Kokkos::View sendArray ("sendArray", + Kokkos::View sendArray ("sendArray", maxNumPackets); Array indicesOffsets (numExportPacketsPerLID.size(), 0); @@ -1180,6 +1470,360 @@ void DistributorActor::doPosts(const DistributorPlan& plan, } } +template +void DistributorActor::doPostsKokkos(const DistributorPlan& plan, + const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) +{ + static_assert(areKokkosViews, + "Data arrays for DistributorActor::doPostsKokkos must be Kokkos::Views"); + static_assert(areKokkosViews, + "Num packets arrays for DistributorActor::doPostsKokkos must be Kokkos::Views"); + using Teuchos::Array; + using Teuchos::as; + using Teuchos::ireceive; + using Teuchos::isend; + using Teuchos::send; + using Teuchos::TypeNameTraits; + using std::endl; + using Kokkos::Compat::create_const_view; + using Kokkos::Compat::create_view; + using Kokkos::Compat::subview_offset; + using Kokkos::Compat::deep_copy_offset; + using ExpExecSpace = typename ExpPacketsView::execution_space; + using ImpExecSpace = typename ImpPacketsView::execution_space; + typedef Array::size_type size_type; + typedef ExpView exports_view_type; + typedef ImpView imports_view_type; + +#ifdef KOKKOS_ENABLE_CUDA + static_assert (! std::is_same::value && + ! std::is_same::value, + "Please do not use Tpetra::Distributor with UVM " + "allocations. See GitHub issue #1088."); +#endif // KOKKOS_ENABLE_CUDA + +#ifdef KOKKOS_ENABLE_SYCL + static_assert (! std::is_same::value && + ! std::is_same::value, + "Please do not use Tpetra::Distributor with SharedUSM " + "allocations. See GitHub issue #1088 (corresponding to CUDA)."); +#endif // KOKKOS_ENABLE_SYCL + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMon (*timer_doPosts4KV_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // Run-time configurable parameters that come from the input + // ParameterList set by setParameterList(). + const Details::EDistributorSendType sendType = plan.getSendType(); + +#ifdef HAVE_TPETRA_MPI + // All-to-all communication layout is quite different from + // point-to-point, so we handle it separately. + if (sendType == Details::DISTRIBUTOR_ALLTOALL) { + doPostsAllToAllKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } +#ifdef HAVE_TPETRACORE_MPI_ADVANCE + else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_ALLTOALL) + { + doPostsAllToAllKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } else if (sendType == Details::DISTRIBUTOR_MPIADVANCE_NBRALLTOALLV) { + doPostsNbrAllToAllVKokkos(plan, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + return; + } +#endif + +#else // HAVE_TPETRA_MPI + if (plan.hasSelfMessage()) { + size_t packetsPerSend; + Kokkos::parallel_reduce(Kokkos::RangePolicy(plan.getStartsTo()[0], plan.getStartsTo()[0]+plan.getLengthsTo()[0]), KOKKOS_LAMBDA(const size_t j, size_t& packets) { + packets += numExportPacketsPerLID(j); + }, packetsPerSend); + + deep_copy_offset(imports, exports, (size_t)0, (size_t)0, packetsPerSend); + } +#endif // HAVE_TPETRA_MPI + + const int myProcID = plan.getComm()->getRank (); + size_t selfReceiveOffset = 0; + +#ifdef HAVE_TPETRA_DEBUG + // Different messages may have different numbers of packets. + size_t totalNumImportPackets = Kokkos::Experimental::reduce(ImpExecSpace(), numImportPacketsPerLID); + TEUCHOS_TEST_FOR_EXCEPTION( + imports.extent (0) < totalNumImportPackets, std::runtime_error, + "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): The 'imports' array must have " + "enough entries to hold the expected number of import packets. " + "imports.extent(0) = " << imports.extent (0) << " < " + "totalNumImportPackets = " << totalNumImportPackets << "."); + TEUCHOS_TEST_FOR_EXCEPTION + (requests_.size () != 0, std::logic_error, "Tpetra::Distributor::" + "doPostsKokkos(4 args, Kokkos): Process " << myProcID << ": requests_.size () = " + << requests_.size () << " != 0."); +#endif // HAVE_TPETRA_DEBUG + // Distributor uses requests_.size() as the number of outstanding + // nonblocking message requests, so we resize to zero to maintain + // this invariant. + // + // getNumReceives() does _not_ include the self message, if there is + // one. Here, we do actually send a message to ourselves, so we + // include any self message in the "actual" number of receives to + // post. + // + // NOTE (mfh 19 Mar 2012): Epetra_MpiDistributor::DoPosts() + // doesn't (re)allocate its array of requests. That happens in + // CreateFromSends(), ComputeRecvs_(), DoReversePosts() (on + // demand), or Resize_(). + const size_type actualNumReceives = as (plan.getNumReceives()) + + as (plan.hasSelfMessage() ? 1 : 0); + requests_.resize (0); + + // Post the nonblocking receives. It's common MPI wisdom to post + // receives before sends. In MPI terms, this means favoring + // adding to the "posted queue" (of receive requests) over adding + // to the "unexpected queue" (of arrived messages not yet matched + // with a receive). + { +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonRecvs (*timer_doPosts4KV_recvs_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + size_t curBufferOffset = 0; + size_t curLIDoffset = 0; + for (size_type i = 0; i < actualNumReceives; ++i) { + size_t totalPacketsFrom_i = 0; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getLengthsFrom()[i]), KOKKOS_LAMBDA(const size_t j, size_t& total) { + total += numImportPacketsPerLID(curLIDoffset+j); + }, totalPacketsFrom_i); + // totalPacketsFrom_i is converted down to int, so make sure it can be represented + TEUCHOS_TEST_FOR_EXCEPTION(totalPacketsFrom_i > size_t(INT_MAX), + std::logic_error, "Tpetra::Distributor::doPostsKokkos(3 args, Kokkos): " + "Recv count for receive " << i << " (" << totalPacketsFrom_i << ") is too large " + "to be represented as int."); + curLIDoffset += plan.getLengthsFrom()[i]; + if (plan.getProcsFrom()[i] != myProcID && totalPacketsFrom_i) { + // If my process is receiving these packet(s) from another + // process (not a self-receive), and if there is at least + // one packet to receive: + // + // 1. Set up the persisting view (recvBuf) into the imports + // array, given the offset and size (total number of + // packets from process getProcsFrom()[i]). + // 2. Start the Irecv and save the resulting request. + imports_view_type recvBuf = + subview_offset (imports, curBufferOffset, totalPacketsFrom_i); + requests_.push_back (ireceive (recvBuf, plan.getProcsFrom()[i], + mpiTag_, *plan.getComm())); + } + else { // Receiving these packet(s) from myself + selfReceiveOffset = curBufferOffset; // Remember the offset + } + curBufferOffset += totalPacketsFrom_i; + } + } + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends (*timer_doPosts4KV_sends_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // setup views containing starting-offsets into exports for each send, + // and num-packets-to-send for each send. + Kokkos::View sendPacketOffsets("sendPacketOffsets", plan.getNumSends()); + Kokkos::View packetsPerSend("packetsPerSend", plan.getNumSends()); + auto sendPacketOffsets_d = Kokkos::create_mirror_view(ExpExecSpace(), sendPacketOffsets); + auto packetsPerSend_d = Kokkos::create_mirror_view(ExpExecSpace(), packetsPerSend); + + auto starts = Kokkos::Compat::getKokkosViewDeepCopy(plan.getStartsTo()); + auto lengths = Kokkos::Compat::getKokkosViewDeepCopy(plan.getLengthsTo()); + + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& curPKToffset, bool final_pass) { + if(final_pass) sendPacketOffsets_d(pp) = curPKToffset; + size_t numPackets = 0; + for(size_t j = starts(pp); j < starts(pp) + lengths(pp); j++) { + numPackets += numExportPacketsPerLID(j); + } + if(final_pass) packetsPerSend_d(pp) = numPackets; + curPKToffset += numPackets; + }); + + size_t maxNumPackets; + Kokkos::parallel_reduce(Kokkos::RangePolicy(0, plan.getNumSends()), KOKKOS_LAMBDA(const size_t pp, size_t& max) { + if(packetsPerSend_d(pp) > max) { + max = packetsPerSend_d(pp); + } + }, Kokkos::Max(maxNumPackets)); + + // numPackets will be used as a message length, so make sure it can be represented as int + TEUCHOS_TEST_FOR_EXCEPTION(maxNumPackets > size_t(INT_MAX), + std::logic_error, "Tpetra::Distributor::doPostsKokkos(4 args, Kokkos): " + "numPackets = " << maxNumPackets << " is too large " + "to be represented as int."); + + Kokkos::deep_copy(sendPacketOffsets, sendPacketOffsets_d); + Kokkos::deep_copy(packetsPerSend, packetsPerSend_d); + + // setup scan through getProcsTo() list starting with higher numbered procs + // (should help balance message traffic) + size_t numBlocks = plan.getNumSends() + plan.hasSelfMessage(); + size_t procIndex = 0; + while ((procIndex < numBlocks) && (plan.getProcsTo()[procIndex] < myProcID)) { + ++procIndex; + } + if (procIndex == numBlocks) { + procIndex = 0; + } + + size_t selfNum = 0; + size_t selfIndex = 0; + if (plan.getIndicesTo().is_null()) { + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_fast_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // Data are already blocked (laid out) by process, so we don't + // need a separate send buffer (besides the exports array). + for (size_t i = 0; i < numBlocks; ++i) { + size_t p = i + procIndex; + if (p > (numBlocks - 1)) { + p -= numBlocks; + } + + if (plan.getProcsTo()[p] != myProcID && packetsPerSend[p] > 0) { + exports_view_type tmpSend = + subview_offset(exports, sendPacketOffsets[p], packetsPerSend[p]); + + if (sendType == Details::DISTRIBUTOR_ISEND) { + exports_view_type tmpSendBuf = + subview_offset (exports, sendPacketOffsets[p], packetsPerSend[p]); + requests_.push_back (isend (tmpSendBuf, plan.getProcsTo()[p], + mpiTag_, *plan.getComm())); + } + else { // DISTRIBUTOR_SEND + send (tmpSend, + as (tmpSend.size ()), + plan.getProcsTo()[p], mpiTag_, *plan.getComm()); + } + } + else { // "Sending" the message to myself + selfNum = p; + } + } + + if (plan.hasSelfMessage()) { + deep_copy_offset(imports, exports, selfReceiveOffset, + sendPacketOffsets[selfNum], packetsPerSend[selfNum]); + } + } + else { // data are not blocked by proc, use send buffer + +#ifdef HAVE_TPETRA_DISTRIBUTOR_TIMINGS + Teuchos::TimeMonitor timeMonSends2 (*timer_doPosts4KV_sends_slow_); +#endif // HAVE_TPETRA_DISTRIBUTOR_TIMINGS + + // FIXME (mfh 05 Mar 2013) This may be broken for Isend. + typedef typename ExpView::non_const_value_type Packet; + typedef typename ExpView::array_layout Layout; + typedef typename ExpView::device_type Device; + typedef typename ExpView::memory_traits Mem; + + // This buffer is long enough for only one message at a time. + // Thus, we use DISTRIBUTOR_SEND always in this case, regardless + // of sendType requested by user. + // This code path formerly errored out with message: + // Tpetra::Distributor::doPostsKokkos(4-arg, Kokkos): + // The "send buffer" code path + // doesn't currently work with nonblocking sends. + // Now, we opt to just do the communication in a way that works. +#ifdef HAVE_TPETRA_DEBUG + if (sendType != Details::DISTRIBUTOR_SEND) { + if (plan.getComm()->getRank() == 0) + std::cout << "The requested Tpetra send type " + << DistributorSendTypeEnumToString(sendType) + << " requires Distributor data to be ordered by" + << " the receiving processor rank. Since these" + << " data are not ordered, Tpetra will use Send" + << " instead." << std::endl; + } +#endif + + Kokkos::View sendArray ("sendArray", + maxNumPackets); + + Kokkos::View indicesOffsets ("indicesOffsets", numExportPacketsPerLID.extent(0)); + size_t ioffset = 0; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, numExportPacketsPerLID.extent(0)), KOKKOS_LAMBDA(const size_t j, size_t& offset, bool is_final) { + if(is_final) indicesOffsets(j) = offset; + offset += numExportPacketsPerLID(j); + }, ioffset); + + for (size_t i = 0; i < numBlocks; ++i) { + size_t p = i + procIndex; + if (p > (numBlocks - 1)) { + p -= numBlocks; + } + + if (plan.getProcsTo()[p] != myProcID) { + size_t j = plan.getStartsTo()[p]; + size_t numPacketsTo_p = 0; + //mirror in case execspaces are different + auto sendArrayMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), sendArray); + auto exportsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), exports); + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getLengthsTo()[p]), KOKKOS_LAMBDA(const size_t k, size_t& offset, bool is_final) { + if(is_final) { + const size_t dst_end = offset + numExportPacketsPerLID(j + k); + const size_t src_end = indicesOffsets(j + k) + numExportPacketsPerLID(j + k); + auto dst_sub = Kokkos::subview(sendArrayMirror, Kokkos::make_pair(offset, dst_end)); + auto src_sub = Kokkos::subview(exportsMirror, Kokkos::make_pair(indicesOffsets(j + k), src_end)); + Kokkos::Experimental::local_deep_copy(dst_sub, src_sub); + } + offset += numExportPacketsPerLID(j + k); + }, numPacketsTo_p); + Kokkos::deep_copy(sendArray, sendArrayMirror); + typename ExpView::execution_space().fence(); + + if (numPacketsTo_p > 0) { + ImpView tmpSend = + subview_offset(sendArray, size_t(0), numPacketsTo_p); + + send (tmpSend, + as (tmpSend.size ()), + plan.getProcsTo()[p], mpiTag_, *plan.getComm()); + } + } + else { // "Sending" the message to myself + selfNum = p; + selfIndex = plan.getStartsTo()[p]; + } + } + + if (plan.hasSelfMessage()) { + //mirror in case execspaces are different + auto importsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), imports); + auto exportsMirror = Kokkos::create_mirror_view_and_copy(ExpExecSpace(), exports); + size_t temp; + Kokkos::parallel_scan(Kokkos::RangePolicy(0, plan.getLengthsTo()[selfNum]), KOKKOS_LAMBDA(const size_t k, size_t& offset, bool is_final) { + if(is_final) { + const size_t dst_end = selfReceiveOffset + offset + numExportPacketsPerLID(selfIndex + k); + const size_t src_end = indicesOffsets(selfIndex + k) + numExportPacketsPerLID(selfIndex + k); + auto dst_sub = Kokkos::subview(importsMirror, Kokkos::make_pair(selfReceiveOffset + offset, dst_end)); + auto src_sub = Kokkos::subview(exportsMirror, Kokkos::make_pair(indicesOffsets(selfIndex + k), src_end)); + Kokkos::Experimental::local_deep_copy(dst_sub, src_sub); + } + offset += numExportPacketsPerLID(selfIndex + k); + }, temp); + Kokkos::deep_copy(imports, importsMirror); + selfIndex += plan.getLengthsTo()[selfNum]; + selfReceiveOffset += temp; + } + } +} + } } diff --git a/packages/tpetra/core/src/Tpetra_Distributor.hpp b/packages/tpetra/core/src/Tpetra_Distributor.hpp index c0c31a0f8b54..a8beece8ee9d 100644 --- a/packages/tpetra/core/src/Tpetra_Distributor.hpp +++ b/packages/tpetra/core/src/Tpetra_Distributor.hpp @@ -23,6 +23,7 @@ #include "KokkosCompat_View.hpp" #include "Kokkos_Core.hpp" #include "Kokkos_TeuchosCommAdapters.hpp" +#include "Kokkos_StdAlgorithms.hpp" #include #include #include @@ -426,6 +427,13 @@ namespace Tpetra { const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doPostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + /// \brief Post the data for a forward plan, but do not execute the waits yet. /// /// Call this overload when you have the same number of Packets @@ -480,6 +488,13 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doPostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); /// \brief Execute the reverse communication plan. /// @@ -501,7 +516,14 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doReversePostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + /// \brief Post the data for a reverse plan, but do not execute the waits yet. /// /// This method takes the same arguments as the three-argument @@ -522,7 +544,14 @@ namespace Tpetra { const Teuchos::ArrayView& numExportPacketsPerLID, const ImpView &imports, const Teuchos::ArrayView& numImportPacketsPerLID); - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + doReversePostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID); + //@} //! @name Implementation of Teuchos::Describable //@{ @@ -640,6 +669,16 @@ namespace Tpetra { actor_.doPostsAndWaits(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doPostsAndWaitsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + actor_.doPostsAndWaitsKokkos(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -661,6 +700,17 @@ namespace Tpetra { { actor_.doPosts(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doPostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + actor_.doPostsKokkos(plan_, exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -685,6 +735,19 @@ namespace Tpetra { numImportPacketsPerLID); doReverseWaits (); } + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doReversePostsAndWaitsKokkos (const ExpView& exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView& imports, + const ImpPacketsView &numImportPacketsPerLID) + { + doReversePostsKokkos (exports, numExportPacketsPerLID, imports, + numImportPacketsPerLID); + doReverseWaits (); + } template typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type @@ -723,7 +786,27 @@ namespace Tpetra { reverseDistributor_->doPosts (exports, numExportPacketsPerLID, imports, numImportPacketsPerLID); } - + + template + typename std::enable_if<(Kokkos::is_view::value && Kokkos::is_view::value)>::type + Distributor:: + doReversePostsKokkos (const ExpView &exports, + const ExpPacketsView &numExportPacketsPerLID, + const ImpView &imports, + const ImpPacketsView &numImportPacketsPerLID) + { + // FIXME (mfh 29 Mar 2012) WHY? + TEUCHOS_TEST_FOR_EXCEPTION( + ! plan_.getIndicesTo().is_null(), std::runtime_error, + "Tpetra::Distributor::doReversePosts(3 args): Can only do " + "reverse communication when original data are blocked by process."); + if (reverseDistributor_.is_null ()) { + createReverseDistributor (); + } + reverseDistributor_->doPostsKokkos (exports, numExportPacketsPerLID, + imports, numImportPacketsPerLID); + } + template void Distributor:: computeSends(const Teuchos::ArrayView& importGIDs, diff --git a/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp b/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp index d14f6b3da408..adb8c325d2f8 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsGraph_decl.hpp @@ -13,6 +13,7 @@ /// \file Tpetra_FECrsGraph_decl.hpp /// \brief Declaration of the Tpetra::FECrsGraph class +#include "Tpetra_ConfigDefs.hpp" #include "Tpetra_FECrsGraph_fwd.hpp" #include "Tpetra_CrsGraph_decl.hpp" @@ -548,25 +549,13 @@ namespace Tpetra { // template // Teuchos::RCP makeOwnedColMap (ViewType ownedGraphIndices); - // Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED, - FE_ACTIVE_OWNED_PLUS_SHARED - }; - - enum class FillState - { - open, // matrix is "open". Values can freely inserted - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP fillState_; // This is whichever graph isn't currently active Teuchos::RCP > inactiveCrsGraph_; // This is in RCP to make shallow copies of the FECrsGraph work correctly - Teuchos::RCP activeCrsGraph_; + Teuchos::RCP activeCrsGraph_; // The importer between the rowmaps of the two graphs Teuchos::RCP ownedRowsImporter_; diff --git a/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp b/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp index c8eb4ab9dfb9..6ef09873bea7 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsGraph_def.hpp @@ -182,8 +182,8 @@ setup(const Teuchos::RCP & ownedRowMap, if(ownedPlusSharedColMap.is_null()) this->allocateIndices(GlobalIndices); else this->allocateIndices(LocalIndices); - activeCrsGraph_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED_PLUS_SHARED)); - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + activeCrsGraph_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED_PLUS_SHARED)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); // Use a very strong map equivalence check bool maps_are_the_same = ownedRowMap->isSameAs(*ownedPlusSharedRowMap); @@ -221,7 +221,7 @@ setup(const Teuchos::RCP & ownedRowMap, template void FECrsGraph::doOwnedPlusSharedToOwned(const CombineMode CM) { const char tfecfFuncName[] = "FECrsGraph::doOwnedPlusSharedToOwned(CombineMode): "; - if(!ownedRowsImporter_.is_null() && *activeCrsGraph_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(!ownedRowsImporter_.is_null() && *activeCrsGraph_ == FE::ACTIVE_OWNED_PLUS_SHARED) { Teuchos::RCP ownedRowMap = ownedRowsImporter_->getSourceMap(); // Do a self-export in "restricted mode" @@ -296,10 +296,10 @@ void FECrsGraph::doOwnedToOwnedPlusShared(con template void FECrsGraph::switchActiveCrsGraph() { - if(*activeCrsGraph_ == FE_ACTIVE_OWNED_PLUS_SHARED) - *activeCrsGraph_ = FE_ACTIVE_OWNED; + if(*activeCrsGraph_ == FE::ACTIVE_OWNED_PLUS_SHARED) + *activeCrsGraph_ = FE::ACTIVE_OWNED; else - *activeCrsGraph_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeCrsGraph_ = FE::ACTIVE_OWNED_PLUS_SHARED; if(inactiveCrsGraph_.is_null()) return; @@ -318,10 +318,10 @@ void FECrsGraph::endFill( doing finite differences, things are easy --- just call fillComplete(). If, we are in the parallel FE case, then: - Precondition: FE_ACTIVE_OWNED_PLUS_SHARED mode + Precondition: FE::ACTIVE_OWNED_PLUS_SHARED mode Postconditions: - 1) FE_ACTIVE_OWNED mode + 1) FE::ACTIVE_OWNED mode 2) The OWNED graph has been fillCompleted with an Aztec-compatible column map 3) rowptr & (local) colinds are aliased between the two graphs 4) The OWNED_PLUS_SHARED graph has been fillCompleted with a column map whose first chunk @@ -333,7 +333,7 @@ void FECrsGraph::endFill( */ // Precondition const char tfecfFuncName[] = "FECrsGraph::endFill(domainMap, rangeMap): "; - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ != FE_ACTIVE_OWNED_PLUS_SHARED,std::runtime_error, "must be in owned+shared mode."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ != FE::ACTIVE_OWNED_PLUS_SHARED,std::runtime_error, "must be in owned+shared mode."); if(ownedRowsImporter_.is_null()) { // The easy case: One graph switchActiveCrsGraph(); @@ -365,7 +365,7 @@ void FECrsGraph::beginFill() { // Unlike FECrsMatrix and FEMultiVector, we do not allow you to call beginFill() after calling endFill() // So we throw an exception if you're in owned mode - TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ == FE_ACTIVE_OWNED,std::runtime_error, "can only be called once."); + TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(*activeCrsGraph_ == FE::ACTIVE_OWNED,std::runtime_error, "can only be called once."); } @@ -373,11 +373,11 @@ template void FECrsGraph::beginAssembly() { const char tfecfFuncName[] = "FECrsGraph::beginAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginAssembly, matrix is not in a closed state" ); - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } @@ -385,11 +385,11 @@ template void FECrsGraph::endAssembly() { const char tfecfFuncName[] = "FECrsGraph::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot endAssembly, matrix is not open to fill but is closed." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } @@ -400,11 +400,11 @@ void FECrsGraph::endAssembly( { const char tfecfFuncName[] = "FECrsGraph::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot endAssembly, matrix is not open to fill but is closed." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(domainMap, rangeMap); } @@ -428,7 +428,7 @@ FECrsGraph::insertGlobalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertGlobalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); @@ -445,7 +445,7 @@ FECrsGraph::insertGlobalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertGlobalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); @@ -461,7 +461,7 @@ FECrsGraph::insertLocalIndicesImpl ( ){ const char tfecfFuncName[] = "FECrsGraph::insertLocalIndices: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::logic_error, "Cannot replace global values, matrix is not open to fill but is closed." ); diff --git a/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp b/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp index 73d9db1d1b1c..2bf93ae03896 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsMatrix_decl.hpp @@ -14,6 +14,7 @@ /// \file Tpetra_FECrsMatrix_decl.hpp /// \brief Declaration of the Tpetra::FECrsMatrix class +#include "Tpetra_ConfigDefs.hpp" #include "Tpetra_CrsMatrix_decl.hpp" #include "Tpetra_FECrsGraph.hpp" @@ -292,12 +293,6 @@ class FECrsMatrix : //@} private: - // Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED, - FE_ACTIVE_OWNED_PLUS_SHARED - }; // The FECrsGraph from construction time Teuchos::RCP > feGraph_; @@ -305,15 +300,9 @@ class FECrsMatrix : // This is whichever multivector isn't currently active Teuchos::RCP > inactiveCrsMatrix_; // This is in RCP to make shallow copies of the FECrsMatrix work correctly - Teuchos::RCP activeCrsMatrix_; - - enum class FillState - { - open, // matrix is "open". Values can freely summed in to and replaced - modify, // matrix is open for modification. *local* values can be replaced - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP activeCrsMatrix_; + + Teuchos::RCP fillState_; }; // end class FECrsMatrix diff --git a/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp b/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp index 012c8ec6b6a5..892db94b2fa7 100644 --- a/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FECrsMatrix_def.hpp @@ -36,8 +36,8 @@ FECrsMatrix(const Teuchos::RCP& graph, "fillComplete. In that case, you must call fillComplete on the graph " "again."); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC - ( *graph->activeCrsGraph_!= fe_crs_graph_type::FE_ACTIVE_OWNED,std::runtime_error, - "Input graph must be in FE_ACTIVE_OWNED mode when this constructor is called."); + ( *graph->activeCrsGraph_!= FE::ACTIVE_OWNED,std::runtime_error, + "Input graph must be in FE::ACTIVE_OWNED mode when this constructor is called."); bool start_owned = false; if (! params.is_null ()) { @@ -46,9 +46,9 @@ FECrsMatrix(const Teuchos::RCP& graph, } } if(start_owned) { - activeCrsMatrix_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED)); + activeCrsMatrix_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED)); } else { - activeCrsMatrix_ = Teuchos::rcp(new FEWhichActive(FE_ACTIVE_OWNED_PLUS_SHARED)); + activeCrsMatrix_ = Teuchos::rcp(new FE::WhichActive(FE::ACTIVE_OWNED_PLUS_SHARED)); } // Make an "inactive" matrix, if we need to @@ -58,14 +58,14 @@ FECrsMatrix(const Teuchos::RCP& graph, inactiveCrsMatrix_ = Teuchos::rcp(new crs_matrix_type(*this,graph)); } - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); } template void FECrsMatrix::doOwnedPlusSharedToOwned(const CombineMode CM) { - if(!inactiveCrsMatrix_.is_null() && *activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(!inactiveCrsMatrix_.is_null() && *activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) { // Do a self-export in "restricted mode" this->doExport(*this,*feGraph_->ownedRowsImporter_,CM,true); inactiveCrsMatrix_->fillComplete(); @@ -81,10 +81,10 @@ void FECrsMatrix::doOwnedToOwnedPlusS template void FECrsMatrix::switchActiveCrsMatrix() { - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) - *activeCrsMatrix_ = FE_ACTIVE_OWNED; + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) + *activeCrsMatrix_ = FE::ACTIVE_OWNED; else - *activeCrsMatrix_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeCrsMatrix_ = FE::ACTIVE_OWNED_PLUS_SHARED; if(inactiveCrsMatrix_.is_null()) return; @@ -95,7 +95,7 @@ void FECrsMatrix::switchActiveCrsMatr template void FECrsMatrix::endFill() { - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED_PLUS_SHARED) { doOwnedPlusSharedToOwned(Tpetra::ADD); switchActiveCrsMatrix(); } @@ -107,7 +107,7 @@ template void FECrsMatrix::beginFill() { // Note: This does not throw an error since the on construction, the FECRS is in overlap mode. Ergo, calling beginFill(), // like one should expect to do in a rational universe, should not cause an error. - if(*activeCrsMatrix_ == FE_ACTIVE_OWNED) { + if(*activeCrsMatrix_ == FE::ACTIVE_OWNED) { this->resumeFill(); switchActiveCrsMatrix(); } @@ -117,59 +117,59 @@ void FECrsMatrix::beginFill() { template void FECrsMatrix::beginAssembly() { const char tfecfFuncName[] = "FECrsMatrix::beginAssembly: "; - if (*fillState_ != FillState::closed) + if (*fillState_ != FE::FillState::closed) { std::ostringstream errmsg; errmsg << "Cannot begin assembly, matrix is not in a closed state " << "but is currently open for " - << (*fillState_ == FillState::open ? "assembly" : "modification"); + << (*fillState_ == FE::FillState::open ? "assembly" : "modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } template void FECrsMatrix::endAssembly() { const char tfecfFuncName[] = "FECrsMatrix::endAssembly: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot end assembly, matrix is not open for assembly " << "but is currently " - << (*fillState_ == FillState::closed ? "closed" : "open for modification"); + << (*fillState_ == FE::FillState::closed ? "closed" : "open for modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } template void FECrsMatrix::beginModify() { const char tfecfFuncName[] = "FECrsMatrix::beginModify: "; - if (*fillState_ != FillState::closed) + if (*fillState_ != FE::FillState::closed) { std::ostringstream errmsg; errmsg << "Cannot begin modifying, matrix is not in a closed state " << "but is currently open for " - << (*fillState_ == FillState::open ? "assembly" : "modification"); + << (*fillState_ == FE::FillState::open ? "assembly" : "modification"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::modify; + *fillState_ = FE::FillState::modify; this->resumeFill(); } template void FECrsMatrix::endModify() { const char tfecfFuncName[] = "FECrsMatrix::endModify: "; - if (*fillState_ != FillState::modify) + if (*fillState_ != FE::FillState::modify) { std::ostringstream errmsg; errmsg << "Cannot end modifying, matrix is not open to modify but is currently " - << (*fillState_ == FillState::open ? "open for assembly" : "closed"); + << (*fillState_ == FE::FillState::open ? "open for assembly" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->fillComplete(); } @@ -184,12 +184,12 @@ FECrsMatrix::replaceGlobalValuesImpl( const LocalOrdinal numElts) { const char tfecfFuncName[] = "FECrsMatrix::replaceGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot replace global values, matrix is not open for assembly " << "but is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::replaceGlobalValuesImpl(rowVals, graph, rowInfo, inds, newVals, numElts); @@ -206,7 +206,7 @@ FECrsMatrix::replaceLocalValuesImpl( const LocalOrdinal numElts) { const char tfecfFuncName[] = "FECrsMatrix::replaceLocalValues: "; - if (*fillState_ != FillState::open && *fillState_ != FillState::modify) + if (*fillState_ != FE::FillState::open && *fillState_ != FE::FillState::modify) { std::ostringstream errmsg; errmsg << "Cannot replace local values, matrix is not open to fill/modify. " @@ -228,12 +228,12 @@ FECrsMatrix::sumIntoGlobalValuesImpl( const bool atomic) { const char tfecfFuncName[] = "FECrsMatrix::sumIntoGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot sum in to global values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::sumIntoGlobalValuesImpl( @@ -253,12 +253,12 @@ FECrsMatrix::sumIntoLocalValuesImpl( const bool atomic) { const char tfecfFuncName[] = "FECrsMatrix::sumIntoLocalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot sum in to local values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::sumIntoLocalValuesImpl( @@ -276,12 +276,12 @@ FECrsMatrix::insertGlobalValuesImpl( const size_t numInputEnt) { const char tfecfFuncName[] = "FECrsMatrix::insertGlobalValues: "; - if (*fillState_ != FillState::open) + if (*fillState_ != FE::FillState::open) { std::ostringstream errmsg; errmsg << "Cannot insert global values, matrix is not open for assembly. " << "The matrix is currently " - << (*fillState_ == FillState::modify ? "open for modification" : "closed"); + << (*fillState_ == FE::FillState::modify ? "open for modification" : "closed"); TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC(true, std::logic_error, errmsg.str()); } return crs_matrix_type::insertGlobalValuesImpl(graph, rowInfo, gblColInds, vals, numInputEnt); diff --git a/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp b/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp index 74ff5cbaadb2..c505c54d7f52 100644 --- a/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp +++ b/packages/tpetra/core/src/Tpetra_FEMultiVector_decl.hpp @@ -174,20 +174,7 @@ namespace Tpetra { /// you call this method. void replaceMap (const Teuchos::RCP& map); - //! Enum for activity - enum FEWhichActive - { - FE_ACTIVE_OWNED_PLUS_SHARED, - FE_ACTIVE_OWNED - }; - - enum class FillState - { - open, // matrix is "open". Values can freely summed in to and replaced - modify, // matrix is open for modification. *local* values can be replaced - closed - }; - Teuchos::RCP fillState_; + Teuchos::RCP fillState_; //! Whichever MultiVector is not currently active. Teuchos::RCP inactiveMultiVector_; @@ -197,7 +184,7 @@ namespace Tpetra { /// /// This is an RCP in order to make shallow copies of the /// FEMultiVector work correctly. - Teuchos::RCP activeMultiVector_; + Teuchos::RCP activeMultiVector_; //! Import object used for communication between the two MultiVectors. Teuchos::RCP> importer_; diff --git a/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp index 68e20b0517a6..a217f64711d5 100644 --- a/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_FEMultiVector_def.hpp @@ -29,7 +29,7 @@ FEMultiVector (const Teuchos::RCP& map, const bool zeroOut) : base_type (importer.is_null () ? map : importer->getTargetMap (), numVecs, zeroOut), - activeMultiVector_ (Teuchos::rcp (new FEWhichActive (FE_ACTIVE_OWNED_PLUS_SHARED))), + activeMultiVector_ (Teuchos::rcp (new FE::WhichActive (FE::ACTIVE_OWNED_PLUS_SHARED))), importer_ (importer) { const char tfecfFuncName[] = "FEMultiVector constructor: "; @@ -60,7 +60,7 @@ FEMultiVector (const Teuchos::RCP& map, inactiveMultiVector_ = Teuchos::rcp (new base_type (*this, importer_->getSourceMap(), 0)); } - fillState_ = Teuchos::rcp(new FillState(FillState::closed)); + fillState_ = Teuchos::rcp(new FE::FillState(FE::FillState::closed)); } template @@ -70,7 +70,7 @@ beginFill () { // The FEMultiVector is in owned+shared mode on construction, so we // do not throw in that case. - if (*activeMultiVector_ == FE_ACTIVE_OWNED) { + if (*activeMultiVector_ == FE::ACTIVE_OWNED) { switchActiveMultiVector (); } } @@ -82,7 +82,7 @@ endFill () { const char tfecfFuncName[] = "endFill: "; - if (*activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + if (*activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { doOwnedPlusSharedToOwned (Tpetra::ADD); switchActiveMultiVector (); } @@ -97,11 +97,11 @@ template void FEMultiVector::beginAssembly() { const char tfecfFuncName[] = "FEMultiVector::beginAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginAssembly, matrix is not in a closed state" ); - *fillState_ = FillState::open; + *fillState_ = FE::FillState::open; this->beginFill(); } @@ -109,11 +109,11 @@ template void FEMultiVector::endAssembly() { const char tfecfFuncName[] = "FEMultiVector::endAssembly: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::open, + *fillState_ != FE::FillState::open, std::runtime_error, "Cannot endAssembly, matrix is not open to fill." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; this->endFill(); } @@ -121,22 +121,22 @@ template void FEMultiVector::beginModify() { const char tfecfFuncName[] = "FEMultiVector::beginModify: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::closed, + *fillState_ != FE::FillState::closed, std::runtime_error, "Cannot beginModify, matrix is not in a closed state" ); - *fillState_ = FillState::modify; + *fillState_ = FE::FillState::modify; } template void FEMultiVector::endModify() { const char tfecfFuncName[] = "FEMultiVector::endModify: "; TEUCHOS_TEST_FOR_EXCEPTION_CLASS_FUNC( - *fillState_ != FillState::modify, + *fillState_ != FE::FillState::modify, std::runtime_error, "Cannot endModify, matrix is not open to modify." ); - *fillState_ = FillState::closed; + *fillState_ = FE::FillState::closed; } template @@ -164,7 +164,7 @@ FEMultiVector:: doOwnedPlusSharedToOwned (const CombineMode CM) { if (! importer_.is_null () && - *activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { + *activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { inactiveMultiVector_->doExport (*this, *importer_, CM); } } @@ -175,7 +175,7 @@ FEMultiVector:: doOwnedToOwnedPlusShared (const CombineMode CM) { if (! importer_.is_null () && - *activeMultiVector_ == FE_ACTIVE_OWNED) { + *activeMultiVector_ == FE::ACTIVE_OWNED) { inactiveMultiVector_->doImport (*this, *importer_, CM); } } @@ -185,11 +185,11 @@ void FEMultiVector:: switchActiveMultiVector () { - if (*activeMultiVector_ == FE_ACTIVE_OWNED_PLUS_SHARED) { - *activeMultiVector_ = FE_ACTIVE_OWNED; + if (*activeMultiVector_ == FE::ACTIVE_OWNED_PLUS_SHARED) { + *activeMultiVector_ = FE::ACTIVE_OWNED; } else { - *activeMultiVector_ = FE_ACTIVE_OWNED_PLUS_SHARED; + *activeMultiVector_ = FE::ACTIVE_OWNED_PLUS_SHARED; } if (importer_.is_null ()) { diff --git a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp index 068927e39ed2..9fafc1502c56 100644 --- a/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp +++ b/packages/tpetra/core/src/Tpetra_MultiVector_def.hpp @@ -1795,7 +1795,7 @@ void MultiVector::copyAndPermute( // - CombineMode needs to be INSERT. // - The number of vectors needs to be 1, otherwise we need to // reorder the received data. - if ((dual_view_type::impl_dualview_is_single_device::value || + if ((std::is_same_v || (Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_device()) || (!Details::Behavior::assumeMpiIsGPUAware () && !this->need_sync_host())) && areRemoteLIDsContiguous && diff --git a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp index 1482f3132e3e..728df1dcd5b0 100644 --- a/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp +++ b/packages/tpetra/core/test/ImportExport2/ImportExport2_UnitTests.cpp @@ -730,7 +730,7 @@ namespace { // MV::imports_ and MV::view_ have the same memory space, the // imports_ view is aliased to the data view of the target MV. if ((myImageID == collectRank) && (myImageID == 0)) { - if (mv_type::dual_view_type::impl_dualview_is_single_device::value) + if (std::is_same_v) TEUCHOS_ASSERT(tgt_mv->importsAreAliased()); // else { // We do not know if copyAndPermute was run on host or device. @@ -800,7 +800,7 @@ namespace { // MV::imports_ and MV::view_ have the same memory space, the // imports_ view is aliased to the data view of the target MV. if ((myImageID == collectRank) && (myImageID == 0)) { - if (mv_type::dual_view_type::impl_dualview_is_single_device::value) + if (std::is_same_v) TEUCHOS_ASSERT(tgt_mv->importsAreAliased()); // else { // We do not know if copyAndPermute was run on host or device.