From d5570aff42af6b400907511ec7d6522b4c2fbd5c Mon Sep 17 00:00:00 2001 From: Yossi Itigin Date: Fri, 19 Mar 2021 18:44:27 +0200 Subject: [PATCH] pml/ucx: ignore request leak by default, override by mca param Signed-off-by: Yossi Itigin (cherry picked from commit 6672d07dcd) --- config/ompi_check_ucx.m4 | 3 ++- ompi/mca/pml/ucx/pml_ucx.c | 7 +++++++ ompi/mca/pml/ucx/pml_ucx.h | 1 + ompi/mca/pml/ucx/pml_ucx_component.c | 15 +++++++++++++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/config/ompi_check_ucx.m4 b/config/ompi_check_ucx.m4 index 6901a46127e..3830e00be20 100644 --- a/config/ompi_check_ucx.m4 +++ b/config/ompi_check_ucx.m4 @@ -136,7 +136,8 @@ AC_DEFUN([OMPI_CHECK_UCX],[ UCP_ATOMIC_FETCH_OP_FAND, UCP_ATOMIC_FETCH_OP_FOR, UCP_ATOMIC_FETCH_OP_FXOR, - UCP_PARAM_FIELD_ESTIMATED_NUM_PPN], + UCP_PARAM_FIELD_ESTIMATED_NUM_PPN, + UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK], [], [], [#include ]) AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS], diff --git a/ompi/mca/pml/ucx/pml_ucx.c b/ompi/mca/pml/ucx/pml_ucx.c index fc46995d940..a2126ca41ea 100644 --- a/ompi/mca/pml/ucx/pml_ucx.c +++ b/ompi/mca/pml/ucx/pml_ucx.c @@ -296,6 +296,13 @@ int mca_pml_ucx_init(int enable_mpi_threads) params.thread_mode = UCS_THREAD_MODE_SINGLE; } +#if HAVE_DECL_UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK + if (!ompi_pml_ucx.request_leak_check) { + params.field_mask |= UCP_WORKER_PARAM_FIELD_FLAGS; + params.flags |= UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK; + } +#endif + status = ucp_worker_create(ompi_pml_ucx.ucp_context, ¶ms, &ompi_pml_ucx.ucp_worker); if (UCS_OK != status) { diff --git a/ompi/mca/pml/ucx/pml_ucx.h b/ompi/mca/pml/ucx/pml_ucx.h index 39ab15e9d1e..ca81ce4cd0f 100644 --- a/ompi/mca/pml/ucx/pml_ucx.h +++ b/ompi/mca/pml/ucx/pml_ucx.h @@ -58,6 +58,7 @@ struct mca_pml_ucx_module { int priority; bool cuda_initialized; + bool request_leak_check; }; extern mca_pml_base_component_2_0_0_t mca_pml_ucx_component; diff --git a/ompi/mca/pml/ucx/pml_ucx_component.c b/ompi/mca/pml/ucx/pml_ucx_component.c index 91fd188907c..6aed6c41d11 100644 --- a/ompi/mca/pml/ucx/pml_ucx_component.c +++ b/ompi/mca/pml/ucx/pml_ucx_component.c @@ -64,6 +64,21 @@ static int mca_pml_ucx_component_register(void) OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_LOCAL, &ompi_pml_ucx.num_disconnect); + +#if HAVE_DECL_UCP_WORKER_FLAG_IGNORE_REQUEST_LEAK + ompi_pml_ucx.request_leak_check = false; + (void) mca_base_component_var_register(&mca_pml_ucx_component.pmlm_version, "request_leak_check", + "Enable showing a warning during MPI_Finalize if some " + "non-blocking MPI requests have not been released", + MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0, + OPAL_INFO_LVL_3, + MCA_BASE_VAR_SCOPE_LOCAL, + &ompi_pml_ucx.request_leak_check); +#else + /* If UCX does not support ignoring leak check, then it's always enabled */ + ompi_pml_ucx.request_leak_check = true; +#endif + opal_common_ucx_mca_var_register(&mca_pml_ucx_component.pmlm_version); return 0; }