From 2a53a9767b78b5d908d5193f914b63c0508938cf Mon Sep 17 00:00:00 2001 From: William Zhang Date: Mon, 16 Aug 2021 16:58:57 -0700 Subject: [PATCH 1/2] v4.1.x: common/ofi: Utilize new libfabric API to import memhooks monitor This patch attempts to open up libfabric resources in order to notify libfabric when our memhooks patcher intercepts free calls. Signed-off-by: William Zhang (cherry picked from commit 25811e2be8ce841066fa6f4c46c594791dd92d29) --- config/opal_check_ofi.m4 | 14 ++++ ompi/mca/mtl/ofi/mtl_ofi_component.c | 6 +- opal/mca/btl/ofi/btl_ofi_component.c | 5 +- opal/mca/common/ofi/common_ofi.c | 116 +++++++++++++++++++++++++++ opal/mca/common/ofi/common_ofi.h | 35 ++++++++ 5 files changed, 171 insertions(+), 5 deletions(-) diff --git a/config/opal_check_ofi.m4 b/config/opal_check_ofi.m4 index ae90ad1dc5a..f0dbf6c28b4 100644 --- a/config/opal_check_ofi.m4 +++ b/config/opal_check_ofi.m4 @@ -3,6 +3,8 @@ dnl dnl Copyright (c) 2015-2020 Cisco Systems, Inc. All rights reserved. dnl Copyright (c) 2016-2017 Los Alamos National Security, LLC. All rights dnl reserved. +dnl Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights +dnl reserved. dnl $COPYRIGHT$ dnl dnl Additional copyrights may follow @@ -155,6 +157,18 @@ AC_DEFUN([_OPAL_CHECK_OFI],[ [AC_MSG_WARN([OFI libfabric support requested (via --with-ofi or --with-libfabric), but not found.]) AC_MSG_ERROR([Cannot continue.])]) ]) + opal_ofi_import_monitor=no + AS_IF([test $opal_ofi_happy = "yes"], + [OPAL_CHECK_OFI_VERSION_GE([1,13], + [opal_ofi_import_monitor=yes], + [opal_ofi_import_monitor=no])]) + + +if test "$opal_ofi_import_monitor" = "yes"; then + AC_DEFINE_UNQUOTED([OPAL_OFI_IMPORT_MONITOR_SUPPORT],1, + [Whether libfabric supports monitor import]) +fi + ])dnl diff --git a/ompi/mca/mtl/ofi/mtl_ofi_component.c b/ompi/mca/mtl/ofi/mtl_ofi_component.c index 2dde406c154..5b34a4f2682 100644 --- a/ompi/mca/mtl/ofi/mtl_ofi_component.c +++ b/ompi/mca/mtl/ofi/mtl_ofi_component.c @@ -5,7 +5,7 @@ * Copyright (c) 2014-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2015-2016 Los Alamos National Security, LLC. All rights * reserved. - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -282,8 +282,7 @@ ompi_mtl_ofi_component_open(void) "provider_exclude")) { return OMPI_ERR_NOT_AVAILABLE; } - - return OMPI_SUCCESS; + return opal_common_ofi_init(); } static int @@ -298,6 +297,7 @@ static int ompi_mtl_ofi_component_close(void) { opal_common_ofi_mca_deregister(); + opal_common_ofi_fini(); return OMPI_SUCCESS; } diff --git a/opal/mca/btl/ofi/btl_ofi_component.c b/opal/mca/btl/ofi/btl_ofi_component.c index 36dca4634ff..b929aef02e1 100644 --- a/opal/mca/btl/ofi/btl_ofi_component.c +++ b/opal/mca/btl/ofi/btl_ofi_component.c @@ -14,7 +14,7 @@ * reserved. * Copyright (c) 2018 Intel, Inc, All rights reserved * - * Copyright (c) 2018 Amazon.com, Inc. or its affiliates. All Rights reserved. + * Copyright (c) 2018-2021 Amazon.com, Inc. or its affiliates. All Rights reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. * $COPYRIGHT$ @@ -219,7 +219,7 @@ static int mca_btl_ofi_component_register(void) static int mca_btl_ofi_component_open(void) { mca_btl_ofi_component.module_count = 0; - return OPAL_SUCCESS; + return opal_common_ofi_init(); } /* @@ -228,6 +228,7 @@ static int mca_btl_ofi_component_open(void) static int mca_btl_ofi_component_close(void) { opal_common_ofi_mca_deregister(); + opal_common_ofi_fini(); /* If we don't sleep, sockets provider freaks out. Ummm this is a scary comment */ sleep(1); return OPAL_SUCCESS; diff --git a/opal/mca/common/ofi/common_ofi.c b/opal/mca/common/ofi/common_ofi.c index 769ecb75239..da43e10e7a6 100644 --- a/opal/mca/common/ofi/common_ofi.c +++ b/opal/mca/common/ofi/common_ofi.c @@ -8,6 +8,8 @@ * and Technology (RIST). All rights reserved. * Copyright (c) 2020-2021 Cisco Systems, Inc. All rights reserved * Copyright (c) 2021 Nanook Consulting. All rights reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights + * reserved. * $COPYRIGHT$ * * Additional copyrights may follow @@ -38,6 +40,120 @@ OPAL_DECLSPEC opal_common_ofi_module_t opal_common_ofi = { }; static const char default_prov_exclude_list[] = "shm,sockets,tcp,udp,rstream,usnic"; +static bool opal_common_ofi_initialized = false; +static int opal_common_ofi_init_ref_cnt = 0; + +#if OPAL_OFI_IMPORT_MONITOR_SUPPORT + +static int opal_common_ofi_monitor_start(struct fid_mem_monitor *monitor) +{ + return 0; +} +static void opal_common_ofi_monitor_stop(struct fid_mem_monitor *monitor) +{ + return; +} +static int opal_common_ofi_monitor_subscribe(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return 0; +} +static void opal_common_ofi_monitor_unsubscribe(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return; +} +static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor, + const void *addr, size_t len) +{ + return true; +} + +static struct fid_mem_monitor *opal_common_ofi_monitor; +static struct fid *opal_common_ofi_cache_fid; +static struct fi_ops_mem_monitor opal_common_ofi_export_ops = { + .size = sizeof(struct fi_ops_mem_monitor), + .start = opal_common_ofi_monitor_start, + .stop = opal_common_ofi_monitor_stop, + .subscribe = opal_common_ofi_monitor_subscribe, + .unsubscribe = opal_common_ofi_monitor_unsubscribe, + .valid = opal_common_ofi_monitor_valid, +}; + +OPAL_DECLSPEC void opal_common_ofi_mem_release_cb(void *buf, size_t length, + void *cbdata, bool from_alloc) +{ + opal_common_ofi_monitor->import_ops->notify(opal_common_ofi_monitor, + buf, length); +} +#endif /* OPAL_OFI_IMPORT_MONITOR_SUPPORT */ + +OPAL_DECLSPEC int opal_common_ofi_init(void) +{ + int ret; + + opal_common_ofi_init_ref_cnt++; + if (opal_common_ofi_initialized) { + return OPAL_SUCCESS; + } +#if OPAL_OFI_IMPORT_MONITOR_SUPPORT + + mca_base_framework_open(&opal_memory_base_framework, 0); + if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT) + != (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT)) + & opal_mem_hooks_support_level())) { + return OPAL_SUCCESS; + } + + ret = fi_open(FI_VERSION(1,13), "mr_cache", NULL, 0, 0, &opal_common_ofi_cache_fid, NULL); + if (ret) { + goto err; + } + + opal_common_ofi_monitor = calloc(1, sizeof(*opal_common_ofi_monitor)); + if (!opal_common_ofi_monitor) { + goto err; + } + + opal_common_ofi_monitor->fid.fclass = FI_CLASS_MEM_MONITOR; + opal_common_ofi_monitor->export_ops = &opal_common_ofi_export_ops; + ret = fi_import_fid(opal_common_ofi_cache_fid, &opal_common_ofi_monitor->fid, 0); + if (ret) { + goto err; + } + opal_mem_hooks_register_release(opal_common_ofi_mem_release_cb, NULL); + opal_common_ofi_initialized = true; + + return OPAL_SUCCESS; +err: + if (opal_common_ofi_cache_fid) { + fi_close(opal_common_ofi_cache_fid); + } + if (opal_common_ofi_monitor) { + free(opal_common_ofi_monitor); + } + + return OPAL_ERROR; +#else + opal_common_ofi_initialized = true; + return OPAL_SUCCESS; +#endif +} + +OPAL_DECLSPEC int opal_common_ofi_fini(void) +{ + if (opal_common_ofi_initialized && !--opal_common_ofi_init_ref_cnt) { +#if OPAL_OFI_IMPORT_MONITOR_SUPPORT + opal_mem_hooks_unregister_release(opal_common_ofi_mem_release_cb); + fi_close(opal_common_ofi_cache_fid); + fi_close(&opal_common_ofi_monitor->fid); + free(opal_common_ofi_monitor); +#endif + opal_common_ofi_initialized = false; + } + + return OPAL_SUCCESS; +} OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item) { diff --git a/opal/mca/common/ofi/common_ofi.h b/opal/mca/common/ofi/common_ofi.h index d132a8d3402..b63d381886f 100644 --- a/opal/mca/common/ofi/common_ofi.h +++ b/opal/mca/common/ofi/common_ofi.h @@ -5,6 +5,8 @@ * reserved. * Copyright (c) 2020 Triad National Security, LLC. All rights * reserved. + * Copyright (c) 2021 Amazon.com, Inc. or its affiliates. All rights + * reserved. * * $COPYRIGHT$ * @@ -20,7 +22,11 @@ #include "opal/mca/base/mca_base_var.h" #include "opal/mca/base/mca_base_framework.h" #include "opal/util/proc.h" +#include "opal/memoryhooks/memory.h" #include +#if OPAL_OFI_IMPORT_MONITOR_SUPPORT +#include +#endif BEGIN_C_DECLS @@ -33,6 +39,7 @@ typedef struct opal_common_ofi_module { } opal_common_ofi_module_t; extern opal_common_ofi_module_t opal_common_ofi; +extern mca_base_framework_t opal_memory_base_framework; OPAL_DECLSPEC int opal_common_ofi_register_mca_variables(const mca_base_component_t *component); OPAL_DECLSPEC void opal_common_ofi_mca_register(void); @@ -54,6 +61,34 @@ OPAL_DECLSPEC void opal_common_ofi_mca_deregister(void); */ OPAL_DECLSPEC int opal_common_ofi_is_in_list(char **list, char *item); +#if OPAL_OFI_IMPORT_MONITOR_SUPPORT +/* + * @param buf (IN) Pointer to the start of the allocation + * @param length (IN) Length of the allocation + * @param cbdata (IN) Data passed to memory hooks when callback + * was registered + * @param from_alloc (IN) True if the callback is caused by a call to the + * general allocation routines (malloc, calloc, free, + * etc.) or directly from the user (mmap, munmap, etc.) + * + * Callback function triggered when memory is about to be freed. + * is about to be freed. The callback will be triggered according to + * the note in opal_mem_hooks_register_release(). + * + */ +OPAL_DECLSPEC void opal_common_ofi_mem_release_cb(void *buf, size_t length, void *cbdata, bool from_alloc); +#endif /* OPAL_OFI_IMPORT_MONITOR_SUPPORT */ + +/* + * Initializes common objects for libfabric + */ +OPAL_DECLSPEC int opal_common_ofi_init(void); + +/* + * Cleans up common objects for libfabric + */ +OPAL_DECLSPEC int opal_common_ofi_fini(void); + END_C_DECLS struct fi_info* opal_mca_common_ofi_select_provider(struct fi_info *provider_list, From 52ffd38a6dbcade40c0c327c1543e0b7cb8d425a Mon Sep 17 00:00:00 2001 From: William Zhang Date: Thu, 26 Aug 2021 14:05:35 -0700 Subject: [PATCH 2/2] v4.1.x: common/ofi: Disable new monitor API until libfabric 1.14.0 There are known issues with the API in libfabric 1.13.0 which will guarantee segfaults when used. These issues are fixed in libfabric 1.13.1, but we do not have a way to detect which patch version of libfabric is used. Thus, delay the usage of the API until the subsequent minor release. Signed-off-by: William Zhang (cherry picked from commit 190feba613fd6fb621550b7f097fb2b5672a5b98) --- config/opal_check_ofi.m4 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/opal_check_ofi.m4 b/config/opal_check_ofi.m4 index f0dbf6c28b4..459dead27ce 100644 --- a/config/opal_check_ofi.m4 +++ b/config/opal_check_ofi.m4 @@ -159,7 +159,7 @@ AC_DEFUN([_OPAL_CHECK_OFI],[ ]) opal_ofi_import_monitor=no AS_IF([test $opal_ofi_happy = "yes"], - [OPAL_CHECK_OFI_VERSION_GE([1,13], + [OPAL_CHECK_OFI_VERSION_GE([1,14], [opal_ofi_import_monitor=yes], [opal_ofi_import_monitor=no])])