Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions config/m4/cuda.m4
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ AS_IF([test "x$cuda_checked" != "xyes"],
[])],
[],
[[#include <nvml.h>]])])
AC_CHECK_SIZEOF(cuFloatComplex,,[#include <cuComplex.h>])
AC_CHECK_SIZEOF(cuDoubleComplex,,[#include <cuComplex.h>])

# Check for NVCC
AC_ARG_VAR(NVCC, [NVCC compiler command])
Expand Down
8 changes: 7 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2001-2021. ALL RIGHTS RESERVED.
# Copyright (C) Mellanox Technologies Ltd. 2001-2022. ALL RIGHTS RESERVED.
# This software product is a proprietary product of Mellanox Technologies Ltd.
# (the "Company") and all right, title, and interest and to the software product,
# including all associated intellectual property rights, are and shall
Expand Down Expand Up @@ -77,6 +77,12 @@ AC_PROG_LIBTOOL
AC_HEADER_STDC
CFLAGS="$CFLAGS_save"

AC_CHECK_SIZEOF(float)
AC_CHECK_SIZEOF(double)
AC_CHECK_SIZEOF(long double)
AC_CHECK_SIZEOF(float _Complex)
AC_CHECK_SIZEOF(double _Complex)
AC_CHECK_SIZEOF(long double _Complex)
#
# Check if 'ln' supports creating relative links
#
Expand Down
46 changes: 27 additions & 19 deletions src/components/mc/cpu/Makefile.am
Original file line number Diff line number Diff line change
@@ -1,25 +1,33 @@
#
# Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
# Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
#

sources = \
mc_cpu.h \
mc_cpu.c \
reduce/mc_cpu_reduce.h \
reduce/mc_cpu_reduce_int8.c \
reduce/mc_cpu_reduce_int16.c \
reduce/mc_cpu_reduce_int32.c \
reduce/mc_cpu_reduce_int64.c \
reduce/mc_cpu_reduce_uint8.c \
reduce/mc_cpu_reduce_uint16.c \
reduce/mc_cpu_reduce_uint32.c \
reduce/mc_cpu_reduce_uint64.c \
reduce/mc_cpu_reduce_float.c \
reduce/mc_cpu_reduce_bfloat16.c \
reduce/mc_cpu_reduce_double.c \
reduce_alpha/mc_cpu_reduce_alpha_float.c \
reduce_alpha/mc_cpu_reduce_alpha_bfloat16.c \
reduce_alpha/mc_cpu_reduce_alpha_double.c
sources = \
mc_cpu.h \
mc_cpu.c \
reduce/mc_cpu_reduce.h \
reduce/mc_cpu_reduce_int8.c \
reduce/mc_cpu_reduce_int16.c \
reduce/mc_cpu_reduce_int32.c \
reduce/mc_cpu_reduce_int64.c \
reduce/mc_cpu_reduce_uint8.c \
reduce/mc_cpu_reduce_uint16.c \
reduce/mc_cpu_reduce_uint32.c \
reduce/mc_cpu_reduce_uint64.c \
reduce/mc_cpu_reduce_float.c \
reduce/mc_cpu_reduce_bfloat16.c \
reduce/mc_cpu_reduce_double.c \
reduce/mc_cpu_reduce_long_double.c \
reduce/mc_cpu_reduce_float_complex.c \
reduce/mc_cpu_reduce_double_complex.c \
reduce/mc_cpu_reduce_long_double_complex.c \
reduce_alpha/mc_cpu_reduce_alpha_float.c \
reduce_alpha/mc_cpu_reduce_alpha_bfloat16.c \
reduce_alpha/mc_cpu_reduce_alpha_double.c \
reduce_alpha/mc_cpu_reduce_alpha_long.c \
reduce_alpha/mc_cpu_reduce_alpha_float_complex.c \
reduce_alpha/mc_cpu_reduce_alpha_double_complex.c \
reduce_alpha/mc_cpu_reduce_alpha_long_complex.c

module_LTLIBRARIES = libucc_mc_cpu.la
libucc_mc_cpu_la_SOURCES = $(sources)
Expand Down
86 changes: 79 additions & 7 deletions src/components/mc/cpu/mc_cpu.c
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
* Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand Down Expand Up @@ -189,16 +189,50 @@ static ucc_status_t ucc_mc_cpu_reduce_multi(const void *src1, const void *src2,
return ucc_mc_cpu_reduce_multi_uint64(src1, src2, dst, n_vectors,
count, stride, op);
case UCC_DT_FLOAT32:
ucc_assert(4 == sizeof(float));
return ucc_mc_cpu_reduce_multi_float(src1, src2, dst, n_vectors,
count, stride, op);
#if SIZEOF_FLOAT == 4
return ucc_mc_cpu_reduce_multi_float(src1, src2, dst, n_vectors, count,
stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT64:
ucc_assert(8 == sizeof(double));
#if SIZEOF_DOUBLE == 8
return ucc_mc_cpu_reduce_multi_double(src1, src2, dst, n_vectors,
count, stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT128:
#if SIZEOF_LONG_DOUBLE == 16
return ucc_mc_cpu_reduce_multi_long_double(src1, src2, dst, n_vectors,
count, stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_BFLOAT16:
return ucc_mc_cpu_reduce_multi_bfloat16(src1, src2, dst, n_vectors,
count, stride, op);
case UCC_DT_FLOAT32_COMPLEX:
#if SIZEOF_FLOAT__COMPLEX == 8
return ucc_mc_cpu_reduce_multi_float_complex(src1, src2, dst, n_vectors,
count, stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT64_COMPLEX:
#if SIZEOF_DOUBLE__COMPLEX == 16
return ucc_mc_cpu_reduce_multi_double_complex(
src1, src2, dst, n_vectors, count, stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT128_COMPLEX:
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
return ucc_mc_cpu_reduce_multi_long_double_complex(
src1, src2, dst, n_vectors, count, stride, op);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
default:
mc_error(&ucc_mc_cpu.super, "unsupported reduction type (%s)",
ucc_datatype_str(dt));
Expand All @@ -222,19 +256,57 @@ ucc_mc_cpu_reduce_multi_alpha(const void *src1, const void *src2, void *dst,
{
switch (dt) {
case UCC_DT_FLOAT32:
ucc_assert(4 == sizeof(float));
#if SIZEOF_FLOAT == 4
return ucc_mc_cpu_reduce_multi_alpha_float(src1, src2, dst, n_vectors,
count, stride, reduce_op,
vector_op, (float)alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT64:
ucc_assert(8 == sizeof(double));
#if SIZEOF_DOUBLE == 8
return ucc_mc_cpu_reduce_multi_alpha_double(src1, src2, dst, n_vectors,
count, stride, reduce_op,
vector_op, alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT128:
#if SIZEOF_LONG_DOUBLE == 16
return ucc_mc_cpu_reduce_multi_alpha_long(
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
(long double)alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_BFLOAT16:
return ucc_mc_cpu_reduce_multi_alpha_bfloat16(src1, src2, dst, n_vectors,
count, stride, reduce_op,
vector_op, (float)alpha);
case UCC_DT_FLOAT32_COMPLEX:
#if SIZEOF_FLOAT__COMPLEX == 8
return ucc_mc_cpu_reduce_multi_alpha_float_complex(
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
(float)alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT64_COMPLEX:
#if SIZEOF_DOUBLE__COMPLEX == 16
return ucc_mc_cpu_reduce_multi_alpha_double_complex(
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
(double)alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
case UCC_DT_FLOAT128_COMPLEX:
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
return ucc_mc_cpu_reduce_multi_alpha_long_complex(
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
(long double)alpha);
#else
return UCC_ERR_NOT_SUPPORTED;
#endif
default:
mc_error(&ucc_mc_cpu.super, "unsupported reduction type (%s)",
ucc_datatype_str(dt));
Expand Down
53 changes: 43 additions & 10 deletions src/components/mc/cpu/reduce/mc_cpu_reduce.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
* Copyright (C) Mellanox Technologies Ltd. 2021-2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/
Expand All @@ -8,6 +8,8 @@
#define UCC_MC_CPU_REDUCE_H_

#include "utils/ucc_math.h"
#include <complex.h>

#define OP_1(_s1, _s2, _i, _sc, _OP) _OP(_s1[_i], _s2[_i])
#define OP_2(_s1, _s2, _i, _sc, _OP) \
_OP((OP_1(_s1, _s2, _i, _sc, _OP)), _s2[_i + 1 * _sc])
Expand Down Expand Up @@ -163,6 +165,32 @@
} \
} while (0)

#define DO_DT_REDUCE_FLOAT_COMPLEX(type, reduce_op, src1_p, src2_p, dest_p, \
size, count, stride) \
do { \
const type *restrict s1 = (const type *restrict)src1_p; \
const type *restrict s2 = (const type *restrict)src2_p; \
type *restrict d = (type * restrict) dest_p; \
ucc_assert((ptrdiff_t)d <= (ptrdiff_t)src2_p || \
(ptrdiff_t)d > (ptrdiff_t)src2_p + (size - 1) * stride + \
count * sizeof(type)); \
switch (reduce_op) { \
case UCC_OP_SUM: \
case UCC_OP_AVG: \
DO_DT_REDUCE_WITH_OP(s1, s2, d, size, count, stride, DO_OP_SUM); \
break; \
case UCC_OP_PROD: \
DO_DT_REDUCE_WITH_OP(s1, s2, d, size, count, stride, DO_OP_PROD); \
break; \
default: \
mc_error(&ucc_mc_cpu.super, \
"float complex dtype does not support " \
"requested reduce op: %s", \
ucc_reduction_op_str(reduce_op)); \
return UCC_ERR_NOT_SUPPORTED; \
} \
} while (0)

#define VEC_OP(_d, OP) \
do { \
size_t _i; \
Expand Down Expand Up @@ -201,18 +229,23 @@ REDUCE_FN_DECLARE(uint32);
REDUCE_FN_DECLARE(uint64);
REDUCE_FN_DECLARE(float);
REDUCE_FN_DECLARE(double);
REDUCE_FN_DECLARE(long_double);
REDUCE_FN_DECLARE(bfloat16);
REDUCE_FN_DECLARE(float_complex);
REDUCE_FN_DECLARE(double_complex);
REDUCE_FN_DECLARE(long_double_complex);

#define REDUCE_ALPHA_FN_DECLARE(_type) \
#define REDUCE_ALPHA_FN_DECLARE(_type, alpha_dt) \
ucc_status_t ucc_mc_cpu_reduce_multi_alpha_##_type( \
const void *src1, const void *src2, void *dst, size_t n_vectors, \
size_t count, size_t stride, ucc_reduction_op_t reduce_op, \
ucc_reduction_op_t vector_op, _type alpha)
REDUCE_ALPHA_FN_DECLARE(float);
REDUCE_ALPHA_FN_DECLARE(double);
ucc_status_t
ucc_mc_cpu_reduce_multi_alpha_bfloat16(const void *src1, const void *src2,
void *dst, size_t n_vectors, size_t count,
size_t stride, ucc_reduction_op_t reduce_op,
ucc_reduction_op_t vector_op, float alpha);
ucc_reduction_op_t vector_op, alpha_dt alpha)
REDUCE_ALPHA_FN_DECLARE(float, float);
REDUCE_ALPHA_FN_DECLARE(double, double);
REDUCE_ALPHA_FN_DECLARE(long, long double);
REDUCE_ALPHA_FN_DECLARE(bfloat16, float);
REDUCE_ALPHA_FN_DECLARE(float_complex, float);
REDUCE_ALPHA_FN_DECLARE(double_complex, double);
REDUCE_ALPHA_FN_DECLARE(long_complex, long double);

#endif
19 changes: 19 additions & 0 deletions src/components/mc/cpu/reduce/mc_cpu_reduce_double_complex.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "mc_cpu.h"
#include "reduce/mc_cpu_reduce.h"

ucc_status_t ucc_mc_cpu_reduce_multi_double_complex(const void *src1,
const void *src2, void *dst,
size_t n_vectors,
size_t count, size_t stride,
ucc_reduction_op_t op)
{
DO_DT_REDUCE_FLOAT_COMPLEX(double complex, op, src1, src2, dst, n_vectors,
count, stride);
return UCC_OK;
}
19 changes: 19 additions & 0 deletions src/components/mc/cpu/reduce/mc_cpu_reduce_float_complex.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "mc_cpu.h"
#include "reduce/mc_cpu_reduce.h"

ucc_status_t ucc_mc_cpu_reduce_multi_float_complex(const void *src1,
const void *src2, void *dst,
size_t n_vectors,
size_t count, size_t stride,
ucc_reduction_op_t op)
{
DO_DT_REDUCE_FLOAT_COMPLEX(float complex, op, src1, src2, dst, n_vectors,
count, stride);
return UCC_OK;
}
19 changes: 19 additions & 0 deletions src/components/mc/cpu/reduce/mc_cpu_reduce_long_double.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "mc_cpu.h"
#include "reduce/mc_cpu_reduce.h"

ucc_status_t ucc_mc_cpu_reduce_multi_long_double(const void *src1,
const void *src2, void *dst,
size_t n_vectors, size_t count,
size_t stride,
ucc_reduction_op_t op)
{
DO_DT_REDUCE_FLOAT(long double, op, src1, src2, dst, n_vectors, count,
stride);
return UCC_OK;
}
17 changes: 17 additions & 0 deletions src/components/mc/cpu/reduce/mc_cpu_reduce_long_double_complex.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "mc_cpu.h"
#include "reduce/mc_cpu_reduce.h"

ucc_status_t ucc_mc_cpu_reduce_multi_long_double_complex(
const void *src1, const void *src2, void *dst, size_t n_vectors,
size_t count, size_t stride, ucc_reduction_op_t op)
{
DO_DT_REDUCE_FLOAT_COMPLEX(long double complex, op, src1, src2, dst,
n_vectors, count, stride);
return UCC_OK;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/**
* Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
*
* See file LICENSE for terms.
*/

#include "mc_cpu.h"
#include "reduce/mc_cpu_reduce.h"

ucc_status_t ucc_mc_cpu_reduce_multi_alpha_double_complex(
const void *src1, const void *src2, void *dst, size_t n_vectors,
size_t count, size_t stride, ucc_reduction_op_t reduce_op,
ucc_reduction_op_t vector_op, double alpha)
{
DO_DT_REDUCE_FLOAT_COMPLEX(double complex, reduce_op, src1, src2, dst,
n_vectors, count, stride);
DO_VEC_OP(double complex, dst);
return UCC_OK;
}
Loading