From abf2cca7cbb2f141c8e32fd751d0d3b47a0fe8b2 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Fri, 16 Dec 2016 13:22:37 -0500 Subject: [PATCH 1/3] osc/pt2pt: Previous locks must complete in order * To prevent deadlock force all previously requested locks to complete before starting a new lock. Otherwise this can lead to deadlock if the locks are processes in arbitrary order. Signed-off-by: Mark Allen (cherry picked from commit 20b4618eeec594ee6328595e20174ca2bebaf663) --- ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c index 091757511f3..e11eb8a795a 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_passive_target.c @@ -272,6 +272,9 @@ static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert, ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_osc_pt2pt_sync_t *lock; int ret = OMPI_SUCCESS; + ompi_osc_pt2pt_sync_t *otherlock = NULL; + int target_key; + void *iter_hash_node = NULL; /* Check if no_locks is set. TODO: we also need to track whether we are in an * active target epoch. Fence can make this tricky to track. */ @@ -345,6 +348,25 @@ static int ompi_osc_pt2pt_lock_internal (int lock_type, int target, int assert, return OMPI_ERR_RMA_CONFLICT; } + /* All previously requested locks must be complete before we can start a new + * lock, otherwise we deadlock from mis-ordering of locks. + */ + ret = opal_hash_table_get_first_key_uint32(&module->outstanding_locks, + (uint32_t *) &target_key, + (void **) &otherlock, + &iter_hash_node); + while( OPAL_SUCCESS == ret ) { + if( NULL != otherlock ) { + ompi_osc_pt2pt_sync_wait_expected (otherlock); + } + + ret = opal_hash_table_get_next_key_uint32(&module->outstanding_locks, + (uint32_t *) &target_key, + (void **) &otherlock, + iter_hash_node, &iter_hash_node); + } + ret = OPAL_SUCCESS; + ++module->passive_target_access_epoch; ompi_osc_pt2pt_module_lock_insert (module, lock); @@ -596,7 +618,7 @@ int ompi_osc_pt2pt_flush_all (struct ompi_win_t *win) } ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, (uint32_t *) &target, - (void **) lock, node, &node); + (void **) &lock, node, &node); if (OPAL_SUCCESS != ret) { ret = OPAL_SUCCESS; break; From 0aaf11418e4e6d6d0b4c07013f35f79759fa46e3 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Fri, 16 Dec 2016 15:40:32 -0500 Subject: [PATCH 2/3] osc/pt2pt: Accumulation lock, not module lock when appending * The `opal_list_remove_first` is protected by the accumulation lock not the module lock. So make sure to use the same lock to protect the `opal_list_append` to prevent current access, which can cause a segv and/or lost enties in the list. Signed-off-by: Joshua Hursey (cherry picked from commit 130d196024cabd672b15bba9bec636828ae7f6e6) --- ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c index 1342c01a695..67fe27d2b62 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_data_move.c @@ -753,7 +753,9 @@ static int ompi_osc_pt2pt_acc_op_queue (ompi_osc_pt2pt_module_t *module, ompi_os } /* add to the pending acc queue */ - OPAL_THREAD_SCOPED_LOCK(&module->pending_acc_lock, opal_list_append (&module->pending_acc, &pending_acc->super)); + ompi_osc_pt2pt_accumulate_lock(module); + opal_list_append (&module->pending_acc, &pending_acc->super); + ompi_osc_pt2pt_accumulate_unlock(module); return OMPI_SUCCESS; } From 334049dcdb319ce1630bc49ce96dde03bce7a971 Mon Sep 17 00:00:00 2001 From: Joshua Hursey Date: Fri, 16 Dec 2016 15:44:25 -0500 Subject: [PATCH 3/3] osc/pt2pt: Protect the sync structure during start * The `ompi_osc_pt2pt_sync_pscw_peer` call needs ot be protected when called, and the `sync` structure need to be protected in the `start` function. - This prevents the `ompi_osc_pt2pt_sync_pscw_peer` function from processing while `start` is also in progress on the same window in two different threads (e.g., `progress` and `main` thread) - This seems to happen when the 'main' thread is part way through the `start` function then the progress thread starts processing the `post` message received from another peer for this window. Both functions try to access the `peer_list` portion of the structure and a NULL is stepped on in the `ompi_osc_pt2pt_sync_pscw_peer` function. - This patch locks the `sync` structure for the duration of the `start` and ensures that whenever `ompi_osc_pt2pt_sync_pscw_peer` is called the caller is holding the `sync->lock`. This provides exclusivity to this structure ensuring that the latter function sees a fully updated structure. Signed-off-by: Joshua Hursey (cherry picked from commit ab49b8ac852b982c9de4ece45fad673e46715a94) --- ompi/mca/osc/pt2pt/osc_pt2pt.h | 3 +++ ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt.h b/ompi/mca/osc/pt2pt/osc_pt2pt.h index 4b1a423ded1..4034bf6ef39 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt.h +++ b/ompi/mca/osc/pt2pt/osc_pt2pt.h @@ -918,11 +918,14 @@ static inline ompi_osc_pt2pt_sync_t *ompi_osc_pt2pt_module_sync_lookup (ompi_osc return &module->all_sync; case OMPI_OSC_PT2PT_SYNC_TYPE_PSCW: + OPAL_THREAD_LOCK(&module->all_sync.lock); if (ompi_osc_pt2pt_sync_pscw_peer (module, target, peer)) { + OPAL_THREAD_UNLOCK(&module->all_sync.lock); OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "osc/pt2pt: found PSCW access epoch target for %d", target)); return &module->all_sync; } + OPAL_THREAD_UNLOCK(&module->all_sync.lock); } return NULL; diff --git a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c index 33df9440a62..11db0df092d 100644 --- a/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c +++ b/ompi/mca/osc/pt2pt/osc_pt2pt_active_target.c @@ -213,11 +213,13 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) ompi_osc_pt2pt_module_t *module = GET_MODULE(win); ompi_osc_pt2pt_sync_t *sync = &module->all_sync; + OPAL_THREAD_LOCK(&module->lock); OPAL_THREAD_LOCK(&sync->lock); /* check if we are already in an access epoch */ if (ompi_osc_pt2pt_access_epoch_active (module)) { OPAL_THREAD_UNLOCK(&sync->lock); + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_RMA_SYNC; } @@ -251,6 +253,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) /* nothing more to do. this is an empty start epoch */ sync->eager_send_active = true; OPAL_THREAD_UNLOCK(&sync->lock); + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_SUCCESS; } @@ -260,6 +263,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) sync->peer_list.peers = ompi_osc_pt2pt_get_peers (module, group); if (NULL == sync->peer_list.peers) { OPAL_THREAD_UNLOCK(&sync->lock); + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_ERR_OUT_OF_RESOURCE; } @@ -295,6 +299,7 @@ int ompi_osc_pt2pt_start (ompi_group_t *group, int assert, ompi_win_t *win) sync->eager_send_active)); OPAL_THREAD_UNLOCK(&sync->lock); + OPAL_THREAD_UNLOCK(&module->lock); return OMPI_SUCCESS; }