Skip to content

Commit

Permalink
Merge Pull Request trilinos#13568 from trilinos/Trilinos/master_merge…
Browse files Browse the repository at this point in the history
…_20241101_175831

Automatically Merged using Trilinos Master Merge AutoTester
PR Title: b'Trilinos Master Merge PR Generator: Auto PR created to promote from master_merge_20241101_175831 branch to master'
PR Author: trilinos-autotester
  • Loading branch information
trilinos-autotester authored Nov 2, 2024
2 parents e7fd307 + 76e401d commit 017cae8
Show file tree
Hide file tree
Showing 265 changed files with 18,414 additions and 2,080 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/AT2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ on:
- opened
- synchronize
branches:
- master
- develop
- master
- develop
workflow_dispatch:

# Cancels any in progress 'workflows' associated with this PR
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/spack.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@ on:
types:
- opened
- synchronize
branches:
- master
- develop
branches:
- master
- develop
workflow_dispatch:

# Cancels any in progress 'workflow' associated with this PR
Expand Down
14 changes: 11 additions & 3 deletions packages/PyTrilinos2/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ PYTRILINOS2_CMAKE_ERROR
TRIBITS_ADD_OPTION_AND_DEFINE(PyTrilinos2_BINDER_VERBOSE
PYTRILINOS2_B_VERBOSE
"Increase the verbosity of binder."
OFF )
OFF )

SET(PyTrilinos2_BINDER_NUM_FILES "100" CACHE STRING "Maxinum number of generated files by binder.")

Expand Down Expand Up @@ -184,7 +184,7 @@ FOREACH(line IN LISTS eti_files_without_dir)
ENDFOREACH(line)
file(WRITE ${all_ETI_files_list} ${CONTENTS})

SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector")
SET(ETI_classes "Tpetra_CrsMatrix;Tpetra_Vector;Tpetra_MultiVector;Tpetra_FEMultiVector;Tpetra_FECrsMatrix")
SET(CONTENTS "")
FOREACH(line IN LISTS ETI_classes)
SET(CONTENTS "${CONTENTS}${line}\n")
Expand Down Expand Up @@ -229,7 +229,7 @@ IF(PYTRILINOS2_B_VERBOSE)
ENDIF()
IF(PYTRILINOS2_SUPPRESS_ERRORS)
list(APPEND BINDER_OPTIONS --suppress-errors)
ENDIF()
ENDIF()
list(APPEND BINDER_OPTIONS --config ${CMAKE_CURRENT_SOURCE_DIR}/scripts/PyTrilinos2_config.cfg)
list(APPEND BINDER_OPTIONS --)
IF(TPL_ENABLE_CUDA)
Expand All @@ -241,6 +241,14 @@ if (NOT(MPI_BASE_DIR STREQUAL ""))
list(APPEND BINDER_OPTIONS -I${MPI_BASE_DIR}/include)
ENDIF()
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/mdspan)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/View/MDSpan)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p0009_bits)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p1684_bits)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2389_bits)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2630_bits)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/include_tmp/experimental/__p2642_bits)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_BINARY_DIR}/src)
list(APPEND BINDER_OPTIONS -I${CMAKE_CURRENT_SOURCE_DIR}/src)
IF(NOT DEFINED PyTrilinos2_BINDER_GCC_TOOLCHAIN)
Expand Down
1 change: 1 addition & 0 deletions packages/framework/ini-files/config-specs.ini
Original file line number Diff line number Diff line change
Expand Up @@ -1248,6 +1248,7 @@ opt-set-cmake-var Tpetra_INST_SERIAL BOOL FORCE : ON
opt-set-cmake-var Zoltan_ENABLE_Scotch BOOL FORCE : OFF

[CUDA11-RUN-SERIAL-TESTS]
opt-set-cmake-var Kokkos_CoreUnitTest_Cuda1_SET_RUN_SERIAL BOOL FORCE : ON
opt-set-cmake-var KokkosKernels_sparse_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON
opt-set-cmake-var KokkosKernels_batched_dla_cuda_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON
opt-set-cmake-var Intrepid2_unit-test_MonolithicExecutable_Intrepid2_Tests_MPI_1_SET_RUN_SERIAL BOOL FORCE : ON
Expand Down
4 changes: 4 additions & 0 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix
// <typename lno_row_view_t::non_const_value_type, typename lno_nonzero_view_t::non_const_value_type, typename scalar_nonzero_view_t::value_type,
// HandleExecSpace, TemporaryMemorySpace,PersistentMemorySpace > kk_handle_type;//test
Teuchos::RCP<kk_handle_type> KernelHandle_;
Teuchos::RCP<kk_handle_type> L_Sptrsv_KernelHandle_;
Teuchos::RCP<kk_handle_type> U_Sptrsv_KernelHandle_;

//@}

Expand Down Expand Up @@ -336,6 +338,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix

//! The inverse of the diagonal
Teuchos::RCP<block_crs_matrix_type> D_block_inverse_;

Kokkos::View<impl_scalar_type*, typename values_device_view_type::device_type> tmp_;
};


Expand Down
88 changes: 54 additions & 34 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "Ifpack2_LocalFilter.hpp"
#include "Ifpack2_Utilities.hpp"
#include "Ifpack2_RILUK.hpp"
#include "KokkosSparse_trsv.hpp"
#include "KokkosSparse_sptrsv.hpp"

//#define IFPACK2_RBILUK_INITIAL
//#define IFPACK2_RBILUK_INITIAL_NOKK
Expand Down Expand Up @@ -194,6 +194,11 @@ void RBILUK<MatrixType>::allocate_L_and_U_blocks ()
U_block_->setAllToScalar (STM::zero ());
D_block_->setAllToScalar (STM::zero ());

// Allocate temp space for apply
if (this->isKokkosKernelsSpiluk_) {
const auto numRows = L_block_->getLocalNumRows();
tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_);
}
}
this->isAllocated_ = true;
}
Expand Down Expand Up @@ -322,12 +327,21 @@ void RBILUK<MatrixType>::initialize ()

if (this->isKokkosKernelsSpiluk_) {
this->KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
const auto numRows = this->A_local_->getLocalNumRows();
KernelHandle_->create_spiluk_handle( KokkosSparse::Experimental::SPILUKAlgorithm::SEQLVLSCHD_TP1,
this->A_local_->getLocalNumRows(),
numRows,
2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
2*this->A_local_->getLocalNumEntries()*(this->LevelOfFill_+1),
blockSize_);
this->Graph_->initialize(KernelHandle_); // this calls spiluk_symbolic

this->L_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ());
this->U_Sptrsv_KernelHandle_ = Teuchos::rcp (new kk_handle_type ());

KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_TP1;

this->L_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
this->U_Sptrsv_KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
}
else {
this->Graph_->initialize ();
Expand Down Expand Up @@ -914,6 +928,10 @@ void RBILUK<MatrixType>::compute ()
KokkosSparse::Experimental::spiluk_numeric( KernelHandle_.getRawPtr(), this->LevelOfFill_,
A_local_rowmap, A_local_entries, A_local_values,
L_rowmap, L_entries, L_values, U_rowmap, U_entries, U_values );

// Now call symbolic for sptrsvs
KokkosSparse::Experimental::sptrsv_symbolic(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
KokkosSparse::Experimental::sptrsv_symbolic(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
}
} // Stop timing

Expand Down Expand Up @@ -1070,7 +1088,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm without KokkosKernels. ");
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}
}
else { // alpha != 1 or beta != 0
Expand All @@ -1088,42 +1106,44 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
}
}
else {
// Kokkos kernels impl. For now, the only block trsv available is Sequential
// and must be done on host.
using row_map_type = typename local_matrix_host_type::row_map_type;
using index_type = typename local_matrix_host_type::index_type;
using values_type = typename local_matrix_host_type::values_type;

auto X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
auto Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);

auto L_row_ptrs_host = L_block_->getCrsGraph().getLocalRowPtrsHost();
auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost();
auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost();
auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost();
auto L_values_host = L_block_->getValuesHost();
auto U_values_host = U_block_->getValuesHost();

row_map_type* L_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&L_row_ptrs_host);
index_type* L_entries_host_ri = reinterpret_cast<index_type*>(&L_entries_host);
row_map_type* U_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&U_row_ptrs_host);
index_type* U_entries_host_ri = reinterpret_cast<index_type*>(&U_entries_host);
values_type* L_values_host_ri = reinterpret_cast<values_type*>(&L_values_host);
values_type* U_values_host_ri = reinterpret_cast<values_type*>(&U_values_host);
// Kokkos kernels impl.
auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);

const auto numRows = L_block_->getLocalNumRows();
local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_);
local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_);
auto lclL = L_block_->getLocalMatrixDevice();
auto L_rowmap = lclL.graph.row_map;
auto L_entries = lclL.graph.entries;
auto L_values = lclL.values;

auto lclU = U_block_->getLocalMatrixDevice();
auto U_rowmap = lclU.graph.row_map;
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;

if (mode == Teuchos::NO_TRANS) {
KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view);
KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
{
const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(L_Sptrsv_KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
}
}

{
const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(U_Sptrsv_KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
}
}

KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
}
else {
KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view);
KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}

//Y.getWrappedDualView().sync();
Expand Down
55 changes: 55 additions & 0 deletions packages/intrepid2/src/Discretization/Basis/Intrepid2_Basis.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,61 @@ using HostBasisPtr = BasisPtr<typename Kokkos::HostSpace::device_type, OutputTyp
}
}


/** \brief Return the size of the scratch space, in bytes, needed for using the team-level implementation of getValues.
Warning, <var>inputPoints</var> is only used to deduce the type of the points where to evaluate basis functions.
The rank of </var>inputPoints</var> and its size are not relevant, however,
when using DFAD types, </var>inputPoints</var> cannot be empty,
otherwise the size of the scracth space needed won't be deduced correctly.
\param space [in] - inputPoints
\param perTeamSpaceSize [out] - size of the scratch space needed per team
\param perThreadeSize [out] - size of the scratch space beeded per thread
*/
virtual
void getScratchSpaceSize( ordinal_type& perTeamSpaceSize,
ordinal_type& perThreadSpaceSize,
const PointViewType inputPoints,
const EOperator operatorType = OPERATOR_VALUE) const {
INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error,
">>> ERROR (Basis::getValuesScratchSpace): this method is not supported or should be overridden accordingly by derived classes.");
}


/** \brief Team-level evaluation of basis functions on a <strong>reference cell</strong>.
Returns values of <var>operatorType</var> acting on basis functions for a set of
points in the <strong>reference cell</strong> for which the basis is defined.
The interface allow also to select basis functions associated to a particular entity.
As an example, if <var>subcellDim==1</var> (edges) and <var>subcellOrdinal==0</var>, <var>outputValues</var> will contain all the basis functions associated with the first edge.
<var>outputValues</var> will contain all the cell basis functions when the default value (-1) is used for <var>subcellDim</var> and <var>subcellOrdinal</var>
\param outputValues [out] - variable rank array with the basis values
\param inputPoints [in] - rank-2 array (P,D) with the evaluation points
\param operatorType [in] - the operator acting on the basis functions
\param teamMember [in] - team member of the Kokkos::TemaPolicy
\param scratchStorage [in] - scratch space to use by each team
\param subcellDim [in] - the dimension of the subcells, the default values of -1 returns basis functions associated to subcells of all dimensions
\param subcellOrdinal [in] - the ordinal of the subcell, the default values of -1 returns basis functions associated to subcells of all ordinals
\remark This function is supposed to be called within a TeamPolicy kernel.
The size of the required scratch space is determined by the getScratchSpaceSize function.
*/
KOKKOS_INLINE_FUNCTION
virtual
void getValues( OutputViewType /* outputValues */,
const PointViewType /* inputPoints */,
const EOperator /* operatorType */,
const typename Kokkos::TeamPolicy<ExecutionSpace>::member_type& teamMember,
const typename ExecutionSpace::scratch_memory_space &scratchStorage,
const ordinal_type subcellDim=-1,
const ordinal_type subcellOrdinal=-1) const {
INTREPID2_TEST_FOR_EXCEPTION_DEVICE_SAFE( true, std::logic_error,
">>> ERROR (Basis::getValues): this method is not supported or should be overridden accordingly by derived classes.");
}

/** \brief Evaluation of a FEM basis on a <strong>reference cell</strong>.
Returns values of <var>operatorType</var> acting on FEM basis functions for a set of
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,23 @@ namespace Intrepid2 {
operatorType );
}

virtual void
getScratchSpaceSize( ordinal_type& perTeamSpaceSize,
ordinal_type& perThreadSpaceSize,
const PointViewType inputPointsconst,
const EOperator operatorType = OPERATOR_VALUE) const override;

KOKKOS_INLINE_FUNCTION
virtual void
getValues(
OutputViewType outputValues,
const PointViewType inputPoints,
const EOperator operatorType,
const typename Kokkos::TeamPolicy<typename DeviceType::execution_space>::member_type& team_member,
const typename DeviceType::execution_space::scratch_memory_space & scratchStorage,
const ordinal_type subcellDim = -1,
const ordinal_type subcellOrdinal = -1) const override;

virtual
void
getDofCoords( ScalarViewType dofCoords ) const override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -330,6 +330,57 @@ namespace Intrepid2 {

}

template<typename DT, typename OT, typename PT>
void
Basis_HCURL_HEX_I1_FEM<DT,OT,PT>::getScratchSpaceSize(
ordinal_type& perTeamSpaceSize,
ordinal_type& perThreadSpaceSize,
const PointViewType inputPoints,
const EOperator operatorType) const {
perTeamSpaceSize = 0;
perThreadSpaceSize = 0;
}

template<typename DT, typename OT, typename PT>
KOKKOS_INLINE_FUNCTION
void
Basis_HCURL_HEX_I1_FEM<DT,OT,PT>::getValues(
OutputViewType outputValues,
const PointViewType inputPoints,
const EOperator operatorType,
const typename Kokkos::TeamPolicy<typename DT::execution_space>::member_type& team_member,
const typename DT::execution_space::scratch_memory_space & scratchStorage,
const ordinal_type subcellDim,
const ordinal_type subcellOrdinal) const {

INTREPID2_TEST_FOR_ABORT( !((subcellDim == -1) && (subcellOrdinal == -1)),
">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), The capability of selecting subsets of basis functions has not been implemented yet.");

(void) scratchStorage; //avoid unused variable warning

const int numPoints = inputPoints.extent(0);

switch(operatorType) {
case OPERATOR_VALUE:
Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) {
auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() );
const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() );
Impl::Basis_HCURL_HEX_I1_FEM::Serial<OPERATOR_VALUE>::getValues( output, input);
});
break;
case OPERATOR_CURL:
Kokkos::parallel_for (Kokkos::TeamThreadRange (team_member, numPoints), [=] (ordinal_type& pt) {
auto output = Kokkos::subview( outputValues, Kokkos::ALL(), pt, Kokkos::ALL() );
const auto input = Kokkos::subview( inputPoints, pt, Kokkos::ALL() );
Impl::Basis_HCURL_HEX_I1_FEM::Serial<OPERATOR_CURL>::getValues( output, input);
});
break;
default: {
INTREPID2_TEST_FOR_ABORT( true, ">>> ERROR: (Intrepid2::Basis_HCURL_HEX_I1_FEM::getValues), Operator Type not supported.");
}
}
}

}// namespace Intrepid2

#endif
Loading

0 comments on commit 017cae8

Please sign in to comment.