-
Notifications
You must be signed in to change notification settings - Fork 936
Closed
Labels
Milestone
Description
Background information
What version of Open MPI are you using?
4.1.4
Describe how Open MPI was installed (e.g., from a source/distribution tarball, from a git clone, from an operating system distribution package, etc.)
Installation via Homebrew on macOS and from tar.bz2 on RHEL 8.4 using the OS-supplied gcc/g++/gfortran
Please describe the system on which you are running
- Operating system/version: macOS 12 and RHEL 8.4
- Computer hardware: some Mac Mini and Atos Sequana with AMD EPYC 7763
- Network type: None needed (issue reproduces on single node)
Details of the problem
When testing for an issue originally discovered in MPICH 4.0.2 pmodels/mpich#6083, we found that Open MPI 4.1.4 also gives incorrect results for minimum value reductions
shell$ mpicc test_mpi_reduce_unsigned.c
shell$ mpirun -n 2 ./a.out
ERROR in MPI_Reduce (MPI_UNSIGNED_LONG): recv_data = [18446744073709551615; 18446744073709551615] (expected [0,0])
--------------------------------------------------------------------------
MPI_ABORT was invoked on rank 0 in communicator MPI_COMM_WORLD
with errorcode 1.
NOTE: invoking MPI_ABORT causes Open MPI to kill all MPI processes.
You may or may not see output from other processes, depending on
exactly when Open MPI kills them.
--------------------------------------------------------------------------The test program is as follows:
#include <stdlib.h>
#include <stdio.h>
#include <limits.h>
#include <inttypes.h>
#include <mpi.h>
// acx_mpi_job_count=2
int main(int argc, char **argv) {
// init mpi
MPI_Init(&argc, &argv);
int comm_rank, comm_size;
MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank);
MPI_Comm_size(MPI_COMM_WORLD, &comm_size);
if (comm_size != 2) {
fputs("wrong number of processes\n", stderr);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
if (comm_rank == 0) {
{
unsigned long send_data[2] = {0, ULONG_MAX};
unsigned long recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
if ((recv_data[0] != 0) || (recv_data[1] != 0)) {
fprintf(
stderr, "ERROR in MPI_Reduce (MPI_UNSIGNED_LONG): "
"recv_data = [%lu; %lu] (expected [0,0])\n",
recv_data[0], recv_data[1]);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
{
uint8_t send_data[2] = {0, UINT8_MAX};
uint8_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT8_T, MPI_MIN, 0, MPI_COMM_WORLD);
if ((recv_data[0] != 0) || (recv_data[1] != 0)) {
fprintf(
stderr, "ERROR in MPI_Reduce (MPI_UINT8_T): "
"recv_data = [%"PRIu8"; %"PRIu8"] (expected [0,0])\n",
recv_data[0], recv_data[1]);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
{
uint32_t send_data[2] = {0, UINT32_MAX};
uint32_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT32_T, MPI_MIN, 0, MPI_COMM_WORLD);
if ((recv_data[0] != 0) || (recv_data[1] != 0)) {
fprintf(
stderr, "ERROR in MPI_Reduce (MPI_UINT32_T): "
"recv_data = [%"PRIu32"; %"PRIu32"] (expected [0,0])\n",
recv_data[0], recv_data[1]);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
{
uint64_t send_data[2] = {0, UINT64_MAX};
uint64_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT64_T, MPI_MIN, 0, MPI_COMM_WORLD);
if ((recv_data[0] != 0) || (recv_data[1] != 0)) {
fprintf(
stderr, "ERROR in MPI_Reduce (MPI_UINT64_T): "
"recv_data = [%"PRIu64"; %"PRIu64"] (expected [0,0])\n",
recv_data[0], recv_data[1]);
MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
}
}
} else {
{
unsigned long send_data[2] = {ULONG_MAX, 0};
unsigned long recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UNSIGNED_LONG, MPI_MIN, 0, MPI_COMM_WORLD);
}
{
uint8_t send_data[2] = {UINT8_MAX, 0};
uint8_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT8_T, MPI_MIN, 0, MPI_COMM_WORLD);
}
{
uint32_t send_data[2] = {UINT32_MAX, 0};
uint32_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT32_T, MPI_MIN, 0, MPI_COMM_WORLD);
}
{
uint64_t send_data[2] = {UINT64_MAX, 0};
uint64_t recv_data[2] = {13, 13};
MPI_Reduce(
send_data, recv_data, 2, MPI_UINT64_T, MPI_MIN, 0, MPI_COMM_WORLD);
}
}
// finalise mpi
MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize();
return EXIT_SUCCESS;
}