From e14f80dc6032a6dfa0b6307fc4238b79e174b0b2 Mon Sep 17 00:00:00 2001 From: Austen Lauria Date: Thu, 7 Jan 2021 16:11:01 -0500 Subject: [PATCH 1/2] Make a managed allocation filter a hostfile/hostlist. If the user asks for a hostfile/hostlist inside of a managed allocation, make sure that rmaps filters these and maps processes based on them. Otherwise, it can result in inconsistent mappings across root and compute nodes if the user orders their hostfile differently than the resource manager. Signed-off-by: Austen Lauria --- orte/mca/rmaps/base/rmaps_base_support_fns.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/orte/mca/rmaps/base/rmaps_base_support_fns.c b/orte/mca/rmaps/base/rmaps_base_support_fns.c index 43c4bbe37bd..d84eae77d47 100644 --- a/orte/mca/rmaps/base/rmaps_base_support_fns.c +++ b/orte/mca/rmaps/base/rmaps_base_support_fns.c @@ -160,9 +160,15 @@ int orte_rmaps_base_get_target_nodes(opal_list_t *allocated_nodes, orte_std_cntr /* if this is NOT a managed allocation, then we use the nodes * that were specified for this app - there is no need to collect - * all available nodes and "filter" them + * all available nodes and "filter" them. + * + * However, if it is a managed allocation AND the hostfile or the hostlist was + * provided, those take precedence, so process them and filter as we normally do. */ - if (!orte_managed_allocation) { + if ( !orte_managed_allocation || + (orte_managed_allocation && + (orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING) || + orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)))) { OBJ_CONSTRUCT(&nodes, opal_list_t); /* if the app provided a dash-host, and we are not treating * them as requested or "soft" locations, then use those nodes From 35cf87a1d3245269cc0caa36aeb5c90e07f916a6 Mon Sep 17 00:00:00 2001 From: Austen Lauria Date: Thu, 7 Jan 2021 16:15:30 -0500 Subject: [PATCH 2/2] Fix bug where orte under a managed allocation does not honor -host. For example: $. bsub -n 40 -m "node1 node2" mpirun -np 6 -host node1:2,node2:4 hostname would not map two hostname processes to node1 and four to node2. Instead, it would still think that each node1 and node2 had (for example) 20 cpu resources, and map accordingly. Signed-off-by: Austen Lauria --- orte/util/dash_host/dash_host.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/orte/util/dash_host/dash_host.c b/orte/util/dash_host/dash_host.c index 4d899c95199..f2461a35a9d 100644 --- a/orte/util/dash_host/dash_host.c +++ b/orte/util/dash_host/dash_host.c @@ -305,7 +305,30 @@ int orte_util_add_dash_host_nodes(opal_list_t *nodes, } } - rc = ORTE_SUCCESS; + // Managed allocation: Update the node pool slots + // with what was asked for in the host list. + if(orte_managed_allocation) { + orte_node_t *node_from_pool = NULL; + for (i = 0; i < orte_node_pool->size; i++) { + if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) { + continue; + } + for (itm = opal_list_get_first(nodes); + itm != opal_list_get_end(nodes); + itm = opal_list_get_next(itm)) { + node = (orte_node_t*) itm; + if (0 == strcmp(node_from_pool->name, node->name)) { + if(node->slots < node_from_pool -> slots) { + node_from_pool->slots = node->slots; + } + break; + } + // There's no need to check that this host exists in the pool. That + // should have already been checked at this point. + } + } + } + rc = ORTE_SUCCESS; cleanup: if (NULL != mapped_nodes) {