Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions orte/mca/odls/default/odls_default.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
* University of Stuttgart. All rights reserved.
* Copyright (c) 2004-2005 The Regents of the University of California.
* All rights reserved.
* Copyright (c) 2022 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -35,6 +36,7 @@ BEGIN_C_DECLS
*/
int orte_odls_default_component_open(void);
int orte_odls_default_component_close(void);
int orte_odls_default_component_register(void);
int orte_odls_default_component_query(mca_base_module_t **module, int *priority);

/*
Expand All @@ -43,6 +45,11 @@ int orte_odls_default_component_query(mca_base_module_t **module, int *priority)
extern orte_odls_base_module_t orte_odls_default_module;
ORTE_MODULE_DECLSPEC extern orte_odls_base_component_t mca_odls_default_component;

/* In non-Linux environments where we can't just see which fd's are
open (e.g., MacOS), use this value as the maximum number of FDs
to close when forking a new child process. */
extern int orte_odls_default_maxfd;

END_C_DECLS

#endif /* ORTE_ODLS_H */
17 changes: 17 additions & 0 deletions orte/mca/odls/default/odls_default_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
* Copyright (c) 2015 Los Alamos National Security, LLC. All rights
* reserved.
* Copyright (c) 2017 Intel, Inc. All rights reserved.
* Copyright (c) 2022 Cisco Systems, Inc. All rights reserved
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand Down Expand Up @@ -60,6 +61,7 @@ orte_odls_base_component_t mca_odls_default_component = {
/* Component open and close functions */
.mca_open_component = orte_odls_default_component_open,
.mca_close_component = orte_odls_default_component_close,
.mca_register_component_params = orte_odls_default_component_register,
.mca_query_component = orte_odls_default_component_query,
},
.base_data = {
Expand All @@ -68,13 +70,28 @@ orte_odls_base_component_t mca_odls_default_component = {
},
};

int orte_odls_default_maxfd = 1024;


int orte_odls_default_component_open(void)
{
return ORTE_SUCCESS;
}

int orte_odls_default_component_register(void)
{
mca_base_component_var_register(&mca_odls_default_component.version, "maxfd",
"In non-Linux environments, use this value as a maximum number of file descriptors to close when forking a new child process",
MCA_BASE_VAR_TYPE_INT,
NULL,
0,
0,
OPAL_INFO_LVL_6,
MCA_BASE_VAR_SCOPE_READONLY,
&orte_odls_default_maxfd);
return ORTE_SUCCESS;
}

int orte_odls_default_component_query(mca_base_module_t **module, int *priority)
{
/* the base open/select logic protects us against operation when
Expand Down
24 changes: 21 additions & 3 deletions orte/mca/odls/default/odls_default_module.c
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
* All rights reserved.
* Copyright (c) 2007-2010 Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2007 Evergrid, Inc. All rights reserved.
* Copyright (c) 2008-2017 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2008-2022 Cisco Systems, Inc. All rights reserved
* Copyright (c) 2010 IBM Corporation. All rights reserved.
* Copyright (c) 2011-2013 Los Alamos National Security, LLC. All rights
* reserved.
Expand Down Expand Up @@ -337,7 +337,6 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
{
int i;
sigset_t sigs;
long fd, fdmax = sysconf(_SC_OPEN_MAX);
char dir[MAXPATHLEN];

#if HAVE_SETPGID
Expand Down Expand Up @@ -400,8 +399,27 @@ static int do_child(orte_odls_spawn_caddy_t *cd, int write_fd)
the pipe used for the IOF INTERNAL messages, and the pipe up to
the parent. */
if (ORTE_SUCCESS != close_open_file_descriptors(write_fd, cd->opts)) {
/* On some versions of MacOS (e.g., 12.3.1), we have seen
sysconf(_SC_OPEN_MAX) -- and "ulimit -n" -- return very
large numbers, and sometime return -1 (which means
"unlimited"). This can result in an unreasonably large
loop over closing all FDs (especially if -1 gets
interpreted as LONG_MAX).
https://github.com/open-mpi/ompi/issues/10358 has some
links to others who have seen this kind of behavior.

Protect against -1 and arbitrarily large values being
returned from sysconf(_SC_OPEN_MAX): use an MCA param to
cap the max value that we'll use, just in case there's an
actual reason for a user to change the built-in default
value that we're (somewhat arbitrarily) picking. */
long fd, fdmax = sysconf(_SC_OPEN_MAX);
if (-1 == fdmax || orte_odls_default_maxfd < fdmax) {
fdmax = orte_odls_default_maxfd;
}

// close *all* file descriptors -- slow
for(fd=3; fd<fdmax; fd++) {
for (fd = 3; fd < fdmax; fd++) {
if (
#if OPAL_PMIX_V1
fd != cd->opts.p_internal[1] &&
Expand Down