Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
90 changes: 52 additions & 38 deletions config/ompi_check_ucx.m4
Original file line number Diff line number Diff line change
Expand Up @@ -104,44 +104,58 @@ AC_DEFUN([OMPI_CHECK_UCX],[
old_CPPFLAGS="$CPPFLAGS"
AS_IF([test -n "$ompi_check_ucx_dir"],
[CPPFLAGS="$CPPFLAGS -I$ompi_check_ucx_dir/include"])
AC_CHECK_DECLS([ucp_tag_send_nbr],
[AC_DEFINE([HAVE_UCP_TAG_SEND_NBR],[1],
[have ucp_tag_send_nbr()])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb,
ucp_request_check_status, ucp_put_nb, ucp_get_nb,
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucm_test_events,
ucm_test_external_events],
[], [],
[#include <ucm/api/ucm.h>])
AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND,
UCP_ATOMIC_POST_OP_OR,
UCP_ATOMIC_POST_OP_XOR,
UCP_ATOMIC_FETCH_OP_FAND,
UCP_ATOMIC_FETCH_OP_FOR,
UCP_ATOMIC_FETCH_OP_FXOR,
UCP_PARAM_FIELD_ESTIMATED_NUM_PPN],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],
[AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1],
[have worker address attribute])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_ATTR_FIELD_MEMORY_TYPES],
[AC_DEFINE([HAVE_UCP_ATTR_MEMORY_TYPES], [1],
[have memory types attribute])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucp_tag_send_nbx,
ucp_tag_send_sync_nbx,
ucp_tag_recv_nbx],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_TYPES([ucp_request_param_t],
[], [],
[[#include <ucp/api/ucp.h>]])
# Turn off UCX version v1.8 due to issue #8321
AC_MSG_CHECKING([UCX version])
AC_PREPROC_IFELSE([AC_LANG_PROGRAM([#include <ucp/api/ucp_version.h>
#if (UCP_API_MAJOR == 1) && (UCP_API_MINOR == 8)
#error "Invalid version"
#endif], [])],
[AC_MSG_RESULT([ok (not 1.8.x)])],
[AC_MSG_RESULT([bad (1.8.x)])
AC_MSG_WARN([UCX support skipped because version 1.8.x was found, which has a known catastrophic issue.])
AC_MSG_WARN([Please upgrade to UCX version 1.9 or higher.])
ompi_check_ucx_happy=no])
AS_IF([test "$ompi_check_ucx_happy" = yes],
[
AC_CHECK_DECLS([ucp_tag_send_nbr],
[AC_DEFINE([HAVE_UCP_TAG_SEND_NBR],[1],
[have ucp_tag_send_nbr()])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucp_ep_flush_nb, ucp_worker_flush_nb,
ucp_request_check_status, ucp_put_nb, ucp_get_nb,
ucp_put_nbx, ucp_get_nbx, ucp_atomic_op_nbx],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucm_test_events,
ucm_test_external_events],
[], [],
[#include <ucm/api/ucm.h>])
AC_CHECK_DECLS([UCP_ATOMIC_POST_OP_AND,
UCP_ATOMIC_POST_OP_OR,
UCP_ATOMIC_POST_OP_XOR,
UCP_ATOMIC_FETCH_OP_FAND,
UCP_ATOMIC_FETCH_OP_FOR,
UCP_ATOMIC_FETCH_OP_FXOR,
UCP_PARAM_FIELD_ESTIMATED_NUM_PPN],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_WORKER_ATTR_FIELD_ADDRESS_FLAGS],
[AC_DEFINE([HAVE_UCP_WORKER_ADDRESS_FLAGS], [1],
[have worker address attribute])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([UCP_ATTR_FIELD_MEMORY_TYPES],
[AC_DEFINE([HAVE_UCP_ATTR_MEMORY_TYPES], [1],
[have memory types attribute])], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_DECLS([ucp_tag_send_nbx,
ucp_tag_send_sync_nbx,
ucp_tag_recv_nbx],
[], [],
[#include <ucp/api/ucp.h>])
AC_CHECK_TYPES([ucp_request_param_t],
[], [],
[[#include <ucp/api/ucp.h>]])
])
CPPFLAGS=$old_CPPFLAGS

OPAL_SUMMARY_ADD([[Transports]],[[Open UCX]],[$1],[$ompi_check_ucx_happy])])])
Expand Down
21 changes: 16 additions & 5 deletions ompi/mca/pml/ucx/pml_ucx.c
Original file line number Diff line number Diff line change
Expand Up @@ -186,12 +186,23 @@ static int mca_pml_ucx_recv_worker_address(ompi_proc_t *proc,

int mca_pml_ucx_open(void)
{
unsigned major_version, minor_version, release_number;
ucp_context_attr_t attr;
ucp_params_t params;
ucp_config_t *config;
ucs_status_t status;

PML_UCX_VERBOSE(1, "mca_pml_ucx_open");
/* Check version */
ucp_get_version(&major_version, &minor_version, &release_number);
PML_UCX_VERBOSE(1, "mca_pml_ucx_open: UCX version %u.%u.%u",
major_version, minor_version, release_number);

if ((major_version == 1) && (minor_version == 8)) {
/* disabled due to issue #8321 */
PML_UCX_VERBOSE(1, "UCX PML is disabled because the run-time UCX version "
"is 1.8, which has a known catastrophic issue");
return OMPI_ERROR;
}

/* Read options */
status = ucp_config_read("MPI", NULL, &config);
Expand Down Expand Up @@ -694,7 +705,7 @@ int mca_pml_ucx_isend_init(const void *buf, size_t count, ompi_datatype_t *datat
}

static ucs_status_ptr_t
mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count,
mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count,
ompi_datatype_t *datatype, uint64_t pml_tag)
{
ompi_request_t *req;
Expand All @@ -717,7 +728,7 @@ mca_pml_ucx_bsend(ucp_ep_h ep, const void *buf, size_t count,
PML_UCX_ERROR("bsend: failed to allocate buffer");
return UCS_STATUS_PTR(OMPI_ERROR);
}

iov_count = 1;
iov.iov_base = packed_data;
iov.iov_len = packed_length;
Expand Down Expand Up @@ -805,8 +816,8 @@ int mca_pml_ucx_isend(const void *buf, size_t count, ompi_datatype_t *datatype,
ompi_request_t *req;
ucp_ep_h ep;

PML_UCX_TRACE_SEND("i%ssend request *%p",
buf, count, datatype, dst, tag, mode, comm,
PML_UCX_TRACE_SEND("i%ssend request *%p",
buf, count, datatype, dst, tag, mode, comm,
mode == MCA_PML_BASE_SEND_BUFFERED ? "b" : "",
(void*)request)

Expand Down