From c3f1a510a6ff4ebd987eb8935f06b4b8215dcc1c Mon Sep 17 00:00:00 2001 From: Ralph Castain Date: Fri, 16 Jan 2026 17:55:14 -0700 Subject: [PATCH] Fix usage of PMIx_Fence_nb to conform with PMIx Standard The PMIx_Fence_nb function can return PMIX_OPERATION_SUCCEEDED to indicate that the function was executed atomically and the callback function will therefore not be called. The PMIx Standard lists a few reasons why this can happen, but the point here was to fix usage to properly handle that possibility. Signed-off-by: Ralph Castain --- ompi/instance/instance.c | 54 ++++++++++++++++++++--------- ompi/runtime/ompi_mpi_finalize.c | 20 ++++++++--- ompi/runtime/ompi_mpi_init.c | 59 ++++++++++++++++++++++---------- 3 files changed, 94 insertions(+), 39 deletions(-) diff --git a/ompi/instance/instance.c b/ompi/instance/instance.c index bd686d2bab2..6d50d32ffb2 100644 --- a/ompi/instance/instance.c +++ b/ompi/instance/instance.c @@ -8,6 +8,7 @@ * reserved. * Copyright (c) 2023 Jeffrey M. Squyres. All rights reserved. * Copyright (c) 2024 NVIDIA Corporation. All rights reserved. + * Copyright (c) 2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -586,11 +587,16 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + } } } } else { @@ -602,12 +608,19 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); - if( PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence() failed", ret); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + } else { + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* cannot just wait on thread as we need to call opal_progress */ - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } @@ -748,7 +761,9 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) * we have to wait here for it to complete. However, there * is no reason to do two barriers! */ if (background_fence) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + if (active) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } } else if (!ompi_async_mpi_init) { /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -757,12 +772,19 @@ static int ompi_mpi_instance_init_common (int argc, char **argv) active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - return ompi_instance_print_error ("PMIx_Fence_nb() failed", ret); + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + return ompi_instance_print_error ("PMIx_Fence() failed", ret); + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } diff --git a/ompi/runtime/ompi_mpi_finalize.c b/ompi/runtime/ompi_mpi_finalize.c index ad8a328dc55..08c6efaa616 100644 --- a/ompi/runtime/ompi_mpi_finalize.c +++ b/ompi/runtime/ompi_mpi_finalize.c @@ -24,6 +24,7 @@ * reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. + * Copyright (c) 2026 Nanook Consulting All rights reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -281,14 +282,25 @@ int ompi_mpi_finalize(void) * communications/actions to complete. See * https://github.com/open-mpi/ompi/issues/1576 for the * original bug report. */ - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - OMPI_ERROR_LOG(ret); + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_cbfunc, (void*)&active); + if (PMIX_SUCCESS != rc) { /* Reset the active flag to false, to avoid waiting for * completion when the fence was failed. */ active = false; + // can return operation_succeeded if atomically completed + if (PMIX_OPERATION_SUCCEEDED == rc) { + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + OMPI_ERROR_LOG(ret); + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + /* NOTE: we lose the fence return status here. This can be + * a problem as the fence CAN fail. Might consider retrieving + * the returned status so you can respond if it doesn't + * successfully complete? */ } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } ompi_mpi_instance_finalize (&ompi_mpi_instance_default); diff --git a/ompi/runtime/ompi_mpi_init.c b/ompi/runtime/ompi_mpi_init.c index c7e61c5bf94..deea53cb02e 100644 --- a/ompi/runtime/ompi_mpi_init.c +++ b/ompi/runtime/ompi_mpi_init.c @@ -26,7 +26,7 @@ * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. * Copyright (c) 2020 Amazon.com, Inc. or its affiliates. * All Rights reserved. - * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021-2026 Nanook Consulting All rights reserved. * Copyright (c) 2021-2022 Triad National Security, LLC. All rights * reserved. * Copyright (c) 2025 Advanced Micro Devices, Inc. All rights reserved. @@ -464,12 +464,17 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); - if( PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, NULL, 0, - fence_release, - (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; + rc = PMIx_Fence_nb(NULL, 0, NULL, 0, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } } } } else { @@ -482,12 +487,19 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &opal_pmix_collect_all_data, PMIX_BOOL); rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); if( PMIX_SUCCESS != rc) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence() failed"; - goto error; + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } + } else { + /* cannot just wait on thread as we need to call opal_progress */ + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - /* cannot just wait on thread as we need to call opal_progress */ - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } } @@ -537,7 +549,9 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, * we have to wait here for it to complete. However, there * is no reason to do two barriers! */ if (background_fence) { - OMPI_LAZY_WAIT_FOR_COMPLETION(active); + if (active) { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); + } } else if (!ompi_async_mpi_init) { /* wait for everyone to reach this point - this is a hard * barrier requirement at this time, though we hope to relax @@ -546,13 +560,20 @@ int ompi_mpi_init(int argc, char **argv, int requested, int *provided, active = true; OPAL_POST_OBJECT(&active); PMIX_INFO_LOAD(&info[0], PMIX_COLLECT_DATA, &flag, PMIX_BOOL); - if (PMIX_SUCCESS != (rc = PMIx_Fence_nb(NULL, 0, info, 1, - fence_release, (void*)&active))) { - ret = opal_pmix_convert_status(rc); - error = "PMIx_Fence_nb() failed"; - goto error; + rc = PMIx_Fence_nb(NULL, 0, info, 1, fence_release, (void*)&active); + if (PMIX_SUCCESS != rc) { + active = false; + if (PMIX_OPERATION_SUCCEEDED == rc) { + // can return operation_succeeded if atomically completed + ret = MPI_SUCCESS; + } else { + ret = opal_pmix_convert_status(rc); + error = "PMIx_Fence_nb() failed"; + goto error; + } + } else { + OMPI_LAZY_WAIT_FOR_COMPLETION(active); } - OMPI_LAZY_WAIT_FOR_COMPLETION(active); } }