3636#include "opal/util/show_help.h"
3737
3838opal_common_ofi_module_t opal_common_ofi = {.prov_include = NULL ,
39- .prov_exclude = NULL ,
40- .registered = 0 ,
41- .verbose = 0 };
42-
39+ .prov_exclude = NULL ,
40+ .output = -1 };
4341static const char default_prov_exclude_list [] = "shm,sockets,tcp,udp,rstream,usnic" ;
4442static opal_mutex_t opal_common_ofi_mutex = OPAL_MUTEX_STATIC_INIT ;
45- static bool opal_common_ofi_initialized = false;
4643static int opal_common_ofi_init_ref_cnt = 0 ;
44+ static bool opal_common_ofi_installed_memory_monitor = false;
4745
4846#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
4947
5048/*
51- * These no-op functions are necessary since libfabric does not allow null
52- * function pointers here.
49+ * Monitor object to inject into Libfabric to provide memory release
50+ * notifications using our own memory hooks framework. Monitors may
51+ * use the subscribe/unsubscribe notifications to reduce unnecessary
52+ * notifications, but are not required to do so. Because patcher
53+ * notifies about all releases, it is cheaper for us to not filter and
54+ * this monitor can safely ignore subscribe/unsubscribe notifications.
55+ *
56+ * Libfabric requires the object to be fully defined. Unlike most of
57+ * Open MPI, it does not have NULL function pointer checks in calling
58+ * code.
5359 */
5460static int opal_common_ofi_monitor_start (struct fid_mem_monitor * monitor )
5561{
@@ -80,7 +86,7 @@ static bool opal_common_ofi_monitor_valid(struct fid_mem_monitor *monitor,
8086}
8187
8288static struct fid_mem_monitor * opal_common_ofi_monitor ;
83- static struct fid * opal_common_ofi_cache_fid ;
89+ static struct fid * opal_common_ofi_cache_fid = NULL ;
8490static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
8591 .size = sizeof (struct fi_ops_mem_monitor ),
8692 .start = opal_common_ofi_monitor_start ,
@@ -90,6 +96,12 @@ static struct fi_ops_mem_monitor opal_common_ofi_export_ops = {
9096 .valid = opal_common_ofi_monitor_valid ,
9197};
9298
99+ /**
100+ * Callback function from Open MPI memory monitor
101+ *
102+ * Translation function between the callback function from Open MPI's
103+ * memory notifier to the Libfabric memory monitor.
104+ */
93105static void opal_common_ofi_mem_release_cb (void * buf , size_t length ,
94106 void * cbdata , bool from_alloc )
95107{
@@ -99,87 +111,131 @@ static void opal_common_ofi_mem_release_cb(void *buf, size_t length,
99111
100112#endif /* HAVE_STRUCT_FI_OPS_MEM_MONITOR */
101113
102- int opal_common_ofi_open (void )
114+ int opal_common_ofi_inject_memory_monitor (void )
103115{
104- int ret ;
116+ int ret = - FI_ENOSYS ;
105117
106- opal_common_ofi_init_ref_cnt ++ ;
107- if (opal_common_ofi_initialized ) {
108- return OPAL_SUCCESS ;
109- }
110118#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
119+ uint32_t version ;
111120
112- mca_base_framework_open (& opal_memory_base_framework , 0 );
121+ OPAL_THREAD_LOCK (& opal_common_ofi_mutex );
122+
123+ if (opal_common_ofi_cache_fid != NULL ) {
124+ return 0 ;
125+ }
126+
127+ /*
128+ * While the memory import functionality was introduced in 1.13,
129+ * some deadlock bugs exist in the 1.13 series. Require version
130+ * 1.14 before this code is activated. Not activating the code
131+ * should not break any functionality directly, but may lead to
132+ * sub-optimal memory monitors being used in Libfabric, as Open
133+ * MPI will almost certainly install a patcher first.
134+ */
135+ version = fi_version ();
136+ if (FI_VERSION_LT (version , FI_VERSION (1 , 14 ))) {
137+ ret = - FI_ENOSYS ;
138+ goto err ;
139+ }
140+
141+ ret = mca_base_framework_open (& opal_memory_base_framework , 0 );
142+ if (OPAL_SUCCESS != ret ) {
143+ ret = - FI_ENOSYS ;
144+ goto err ;
145+ }
113146 if ((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT )
114147 != (((OPAL_MEMORY_FREE_SUPPORT | OPAL_MEMORY_MUNMAP_SUPPORT ))
115148 & opal_mem_hooks_support_level ())) {
116- return OPAL_SUCCESS ;
149+ ret = - FI_ENOSYS ;
150+ goto err ;
117151 }
118152
119153 /*
120- * This cache object doesn't do much, but is necessary for the API to work.
121- * It is required to call the fi_import_fid API. This API was introduced in
122- * libfabric version 1.13.0 and "mr_cache" is a "well known" name (documented
123- * in libfabric) to indicate the type of object that we are trying to open.
154+ * The monitor inject object has the well known name "mr_cache"
155+ * and was introduced in Libfabric 1.13
124156 */
125- ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 , & opal_common_ofi_cache_fid , NULL );
126- if (ret ) {
157+ ret = fi_open (FI_VERSION (1 ,13 ), "mr_cache" , NULL , 0 , 0 ,
158+ & opal_common_ofi_cache_fid , NULL );
159+ if (0 != ret ) {
127160 goto err ;
128161 }
129162
130163 opal_common_ofi_monitor = calloc (1 , sizeof (* opal_common_ofi_monitor ));
131- if (!opal_common_ofi_monitor ) {
164+ if (NULL == opal_common_ofi_monitor ) {
165+ ret = - FI_ENOMEM ;
132166 goto err ;
133167 }
134168
135169 opal_common_ofi_monitor -> fid .fclass = FI_CLASS_MEM_MONITOR ;
136170 opal_common_ofi_monitor -> export_ops = & opal_common_ofi_export_ops ;
137- /*
138- * This import_fid call must occur before the libfabric provider creates
139- * its memory registration cache. This will typically occur during domain
140- * open as it is a domain level object. We put it early in initialization
141- * to guarantee this and share the import monitor between the ofi btl
142- * and ofi mtl.
143- */
144- ret = fi_import_fid (opal_common_ofi_cache_fid , & opal_common_ofi_monitor -> fid , 0 );
145- if (ret ) {
171+ ret = fi_import_fid (opal_common_ofi_cache_fid ,
172+ & opal_common_ofi_monitor -> fid , 0 );
173+ if (0 != ret ) {
146174 goto err ;
147175 }
148176 opal_mem_hooks_register_release (opal_common_ofi_mem_release_cb , NULL );
149- opal_common_ofi_initialized = true;
177+ opal_common_ofi_installed_memory_monitor = true;
178+
179+ ret = 0 ;
150180
151- return OPAL_SUCCESS ;
152181err :
153- if (opal_common_ofi_cache_fid ) {
154- fi_close (opal_common_ofi_cache_fid );
155- }
156- if (opal_common_ofi_monitor ) {
157- free (opal_common_ofi_monitor );
182+ if (0 != ret ) {
183+ if (opal_common_ofi_cache_fid ) {
184+ fi_close (opal_common_ofi_cache_fid );
185+ }
186+ if (opal_common_ofi_monitor ) {
187+ free (opal_common_ofi_monitor );
188+ }
158189 }
159190
160- return OPAL_ERROR ;
161- #else
162- opal_common_ofi_initialized = true;
163- return OPAL_SUCCESS ;
191+ OPAL_THREAD_UNLOCK (& opal_common_ofi_mutex );
164192#endif
193+
194+ return ret ;
165195}
166196
167- int opal_common_ofi_close (void )
197+ static int opal_common_ofi_remove_memory_monitor (void )
168198{
169- if (opal_common_ofi_initialized && !-- opal_common_ofi_init_ref_cnt ) {
170199#ifdef HAVE_STRUCT_FI_OPS_MEM_MONITOR
200+ if (opal_common_ofi_installed_memory_monitor ) {
171201 opal_mem_hooks_unregister_release (opal_common_ofi_mem_release_cb );
172202 fi_close (opal_common_ofi_cache_fid );
173203 fi_close (& opal_common_ofi_monitor -> fid );
174204 free (opal_common_ofi_monitor );
205+ opal_common_ofi_installed_memory_monitor = false;
206+ }
175207#endif
176- opal_common_ofi_initialized = false;
208+
209+ return OPAL_SUCCESS ;
210+ }
211+
212+ int opal_common_ofi_open (void )
213+ {
214+ if ((opal_common_ofi_init_ref_cnt ++ ) > 0 ) {
215+ return OPAL_SUCCESS ;
177216 }
178217
179- opal_common_ofi .registered -- ;
180- assert (opal_common_ofi .registered >= 0 );
181- if (opal_common_ofi .registered == 0 ) {
182- opal_output_close (opal_common_ofi .output );
218+ return OPAL_SUCCESS ;
219+ }
220+
221+ int opal_common_ofi_close (void )
222+ {
223+ int ret ;
224+
225+ if ((-- opal_common_ofi_init_ref_cnt ) > 0 ) {
226+ return OPAL_SUCCESS ;
227+ }
228+
229+ ret = opal_common_ofi_remove_memory_monitor ();
230+ if (ret != OPAL_SUCCESS ) {
231+ return ret ;
232+ }
233+
234+ if (opal_common_ofi .output != -1 ) {
235+ opal_output_close (opal_common_ofi .output );
236+ if (ret != OPAL_SUCCESS ) {
237+ return ret ;
238+ }
183239 }
184240
185241 return OPAL_SUCCESS ;
@@ -209,18 +265,13 @@ int opal_common_ofi_mca_register(const mca_base_component_t *component)
209265 static int include_index ;
210266 static int exclude_index ;
211267 static int verbose_index ;
268+ int verbose ;
212269 int param ;
213270
214271 if (fi_version () < FI_VERSION (1 , 0 )) {
215272 return OPAL_ERROR ;
216273 }
217274
218- opal_common_ofi .registered ++ ;
219- if (opal_common_ofi .registered == 1 ) {
220- opal_common_ofi .output = opal_output_open (NULL );
221- opal_output_set_verbosity (opal_common_ofi .output , opal_common_ofi .verbose );
222- }
223-
224275 OPAL_THREAD_LOCK (& opal_common_ofi_mutex );
225276
226277 param = mca_base_var_find ("opal" , "opal_common" , "ofi" , "provider_include" );
@@ -272,7 +323,7 @@ int opal_common_ofi_mca_register(const mca_base_component_t *component)
272323 MCA_BASE_VAR_TYPE_INT , NULL , 0 ,
273324 MCA_BASE_VAR_FLAG_SETTABLE , OPAL_INFO_LVL_3 ,
274325 MCA_BASE_VAR_SCOPE_LOCAL ,
275- & opal_common_ofi . verbose );
326+ & verbose );
276327 } else {
277328 verbose_index = param ;
278329 }
@@ -289,6 +340,11 @@ int opal_common_ofi_mca_register(const mca_base_component_t *component)
289340 "verbose" , 0 );
290341 }
291342
343+ if (opal_common_ofi .output == -1 ) {
344+ opal_common_ofi .output = opal_output_open (NULL );
345+ opal_output_set_verbosity (opal_common_ofi .output , verbose );
346+ }
347+
292348 OPAL_THREAD_UNLOCK (& opal_common_ofi_mutex );
293349
294350 return OPAL_SUCCESS ;
0 commit comments