Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion ompi/mca/op/avx/op_avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@ typedef struct {
avxs; replace them with whatever is relevant for your
component. */

uint32_t flags; /* AVX capabilities supported by the processor */
uint32_t supported; /* AVX capabilities supported by the environment */
uint32_t flags; /* AVX capabilities requested by this process */
} ompi_op_avx_component_t;

/**
Expand Down
80 changes: 75 additions & 5 deletions ompi/mca/op/avx/op_avx_component.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
* reserved.
* Copyright (c) 2020 Research Organization for Information Science
* and Technology (RIST). All rights reserved.
* Copyright (c) 2021 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$
*
* Additional copyrights may follow
Expand All @@ -20,6 +21,7 @@
#include "ompi_config.h"

#include "opal/util/printf.h"
#include "ompi/include/mpi_portable_platform.h"

#include "ompi/constants.h"
#include "ompi/op/op.h"
Expand All @@ -35,6 +37,18 @@ static struct ompi_op_base_module_1_0_0_t *
avx_component_op_query(struct ompi_op_t *op, int *priority);
static int avx_component_register(void);

static mca_base_var_enum_value_flag_t avx_support_flags[] = {
{ .flag = 0x001, .string = "SSE" },
{ .flag = 0x002, .string = "SSE2" },
{ .flag = 0x004, .string = "SSE3" },
{ .flag = 0x008, .string = "SSE4.1" },
{ .flag = 0x010, .string = "AVX" },
{ .flag = 0x020, .string = "AVX2" },
{ .flag = 0x100, .string = "AVX512F" },
{ .flag = 0x200, .string = "AVX512BW" },
{ .flag = 0, .string = NULL },
};

/**
* A slightly modified code from
* https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family
Expand Down Expand Up @@ -177,15 +191,71 @@ static int avx_component_close(void)
static int
avx_component_register(void)
{
int32_t requested_flags = mca_op_avx_component.flags = has_intel_AVX_features();
mca_op_avx_component.supported =
mca_op_avx_component.flags = has_intel_AVX_features();

// MCA var enum flag for conveniently seeing SSE/MMX/AVX support
// values
mca_base_var_enum_flag_t *new_enum_flag;
(void) mca_base_var_enum_create_flag("op_avx_support_flags",
avx_support_flags, &new_enum_flag);
(void) mca_base_var_enum_register("ompi", "op", "avx", "support_flags",
&new_enum_flag);

/**
* In January 2021, testing showed that using AVX512 with at least
* one application (LAMPS) when Open MPI was compiled with
* non-Intel compilers (e.g., all versions of GCC up through
* 10.x), frequency scaling issues on Intel cores resulted in
* noticeably worse performance compared to not using AVX2 or
* using AVX at all. This effective seemed to occur on several
* flavors of Intel chips that we could test in January 2021
* (i.e., AVX512 performance with icc = good, AVX512 performance
* with gcc = bad).
*
* Being therefore conservative:
*
* 1. We're enabling all flavors of AVX by default (including
* AVX512) when Open MPI was compiled with the Intel compiler
* suite
* 2. We're disabling AVX512 by default (but leaving other AVX
* flavors enabled) in all other cases.
*
* Users can still enable / disable whatever they want via the
* op_avx_support MCA param; these changes only affect the
* defaults.
*
* We should continue to test over time to understand what is
* happening here, and see if we can get better defaults.
*/
if (strcasecmp(_STRINGIFY(OPAL_BUILD_PLATFORM_COMPILER_FAMILYNAME),
"intel") != 0) {
mca_op_avx_component.supported &=
~(OMPI_OP_AVX_HAS_AVX512F_FLAG |
OMPI_OP_AVX_HAS_AVX512BW_FLAG);
}
(void) mca_base_component_var_register(&mca_op_avx_component.super.opc_version,
"available",
"Level of SSE/MMX/AVX support available",
MCA_BASE_VAR_TYPE_INT,
&(new_enum_flag->super), 0, 0,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_op_avx_component.supported);

(void) mca_base_component_var_register(&mca_op_avx_component.super.opc_version,
"support",
"Level of SSE/MMX/AVX support to be used (combination of processor capabilities as follow SSE 0x01, SSE2 0x02, SSE3 0x04, SSE4.1 0x08, AVX 0x010, AVX2 0x020, AVX512F 0x100, AVX512BW 0x200) capped by the local architecture capabilities",
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
OPAL_INFO_LVL_6,
"Level of SSE/MMX/AVX support to be used, capped by the local architecture capabilities",
MCA_BASE_VAR_TYPE_INT,
&(new_enum_flag->super), 0,
MCA_BASE_VAR_FLAG_SETTABLE,
OPAL_INFO_LVL_4,
MCA_BASE_VAR_SCOPE_LOCAL,
&mca_op_avx_component.flags);
mca_op_avx_component.flags &= requested_flags;
OBJ_RELEASE(new_enum_flag);

mca_op_avx_component.flags &= mca_op_avx_component.supported;

return OMPI_SUCCESS;
}

Expand Down