Skip to content

Commit 8afd34a

Browse files
authored
API: add float128 and float32(64,128)_complex dt (#492)
* API: add float128 and float32(64,128)_complex dt * TEST: update mpi_tests with new dt * TEST: update Gtest with new dt * BUILD: check dt size during preprocessing
1 parent acd0f61 commit 8afd34a

32 files changed

+908
-187
lines changed

config/m4/cuda.m4

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,8 @@ AS_IF([test "x$cuda_checked" != "xyes"],
8484
[])],
8585
[],
8686
[[#include <nvml.h>]])])
87+
AC_CHECK_SIZEOF(cuFloatComplex,,[#include <cuComplex.h>])
88+
AC_CHECK_SIZEOF(cuDoubleComplex,,[#include <cuComplex.h>])
8789
8890
# Check for NVCC
8991
AC_ARG_VAR(NVCC, [NVCC compiler command])

configure.ac

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#
2-
# Copyright (C) Mellanox Technologies Ltd. 2001-2021. ALL RIGHTS RESERVED.
2+
# Copyright (C) Mellanox Technologies Ltd. 2001-2022. ALL RIGHTS RESERVED.
33
# This software product is a proprietary product of Mellanox Technologies Ltd.
44
# (the "Company") and all right, title, and interest and to the software product,
55
# including all associated intellectual property rights, are and shall
@@ -77,6 +77,12 @@ AC_PROG_LIBTOOL
7777
AC_HEADER_STDC
7878
CFLAGS="$CFLAGS_save"
7979

80+
AC_CHECK_SIZEOF(float)
81+
AC_CHECK_SIZEOF(double)
82+
AC_CHECK_SIZEOF(long double)
83+
AC_CHECK_SIZEOF(float _Complex)
84+
AC_CHECK_SIZEOF(double _Complex)
85+
AC_CHECK_SIZEOF(long double _Complex)
8086
#
8187
# Check if 'ln' supports creating relative links
8288
#

src/components/mc/cpu/Makefile.am

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,33 @@
11
#
2-
# Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
2+
# Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
33
#
44

5-
sources = \
6-
mc_cpu.h \
7-
mc_cpu.c \
8-
reduce/mc_cpu_reduce.h \
9-
reduce/mc_cpu_reduce_int8.c \
10-
reduce/mc_cpu_reduce_int16.c \
11-
reduce/mc_cpu_reduce_int32.c \
12-
reduce/mc_cpu_reduce_int64.c \
13-
reduce/mc_cpu_reduce_uint8.c \
14-
reduce/mc_cpu_reduce_uint16.c \
15-
reduce/mc_cpu_reduce_uint32.c \
16-
reduce/mc_cpu_reduce_uint64.c \
17-
reduce/mc_cpu_reduce_float.c \
18-
reduce/mc_cpu_reduce_bfloat16.c \
19-
reduce/mc_cpu_reduce_double.c \
20-
reduce_alpha/mc_cpu_reduce_alpha_float.c \
21-
reduce_alpha/mc_cpu_reduce_alpha_bfloat16.c \
22-
reduce_alpha/mc_cpu_reduce_alpha_double.c
5+
sources = \
6+
mc_cpu.h \
7+
mc_cpu.c \
8+
reduce/mc_cpu_reduce.h \
9+
reduce/mc_cpu_reduce_int8.c \
10+
reduce/mc_cpu_reduce_int16.c \
11+
reduce/mc_cpu_reduce_int32.c \
12+
reduce/mc_cpu_reduce_int64.c \
13+
reduce/mc_cpu_reduce_uint8.c \
14+
reduce/mc_cpu_reduce_uint16.c \
15+
reduce/mc_cpu_reduce_uint32.c \
16+
reduce/mc_cpu_reduce_uint64.c \
17+
reduce/mc_cpu_reduce_float.c \
18+
reduce/mc_cpu_reduce_bfloat16.c \
19+
reduce/mc_cpu_reduce_double.c \
20+
reduce/mc_cpu_reduce_long_double.c \
21+
reduce/mc_cpu_reduce_float_complex.c \
22+
reduce/mc_cpu_reduce_double_complex.c \
23+
reduce/mc_cpu_reduce_long_double_complex.c \
24+
reduce_alpha/mc_cpu_reduce_alpha_float.c \
25+
reduce_alpha/mc_cpu_reduce_alpha_bfloat16.c \
26+
reduce_alpha/mc_cpu_reduce_alpha_double.c \
27+
reduce_alpha/mc_cpu_reduce_alpha_long.c \
28+
reduce_alpha/mc_cpu_reduce_alpha_float_complex.c \
29+
reduce_alpha/mc_cpu_reduce_alpha_double_complex.c \
30+
reduce_alpha/mc_cpu_reduce_alpha_long_complex.c
2331

2432
module_LTLIBRARIES = libucc_mc_cpu.la
2533
libucc_mc_cpu_la_SOURCES = $(sources)

src/components/mc/cpu/mc_cpu.c

Lines changed: 79 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
2+
* Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
33
*
44
* See file LICENSE for terms.
55
*/
@@ -189,16 +189,50 @@ static ucc_status_t ucc_mc_cpu_reduce_multi(const void *src1, const void *src2,
189189
return ucc_mc_cpu_reduce_multi_uint64(src1, src2, dst, n_vectors,
190190
count, stride, op);
191191
case UCC_DT_FLOAT32:
192-
ucc_assert(4 == sizeof(float));
193-
return ucc_mc_cpu_reduce_multi_float(src1, src2, dst, n_vectors,
194-
count, stride, op);
192+
#if SIZEOF_FLOAT == 4
193+
return ucc_mc_cpu_reduce_multi_float(src1, src2, dst, n_vectors, count,
194+
stride, op);
195+
#else
196+
return UCC_ERR_NOT_SUPPORTED;
197+
#endif
195198
case UCC_DT_FLOAT64:
196-
ucc_assert(8 == sizeof(double));
199+
#if SIZEOF_DOUBLE == 8
197200
return ucc_mc_cpu_reduce_multi_double(src1, src2, dst, n_vectors,
198201
count, stride, op);
202+
#else
203+
return UCC_ERR_NOT_SUPPORTED;
204+
#endif
205+
case UCC_DT_FLOAT128:
206+
#if SIZEOF_LONG_DOUBLE == 16
207+
return ucc_mc_cpu_reduce_multi_long_double(src1, src2, dst, n_vectors,
208+
count, stride, op);
209+
#else
210+
return UCC_ERR_NOT_SUPPORTED;
211+
#endif
199212
case UCC_DT_BFLOAT16:
200213
return ucc_mc_cpu_reduce_multi_bfloat16(src1, src2, dst, n_vectors,
201214
count, stride, op);
215+
case UCC_DT_FLOAT32_COMPLEX:
216+
#if SIZEOF_FLOAT__COMPLEX == 8
217+
return ucc_mc_cpu_reduce_multi_float_complex(src1, src2, dst, n_vectors,
218+
count, stride, op);
219+
#else
220+
return UCC_ERR_NOT_SUPPORTED;
221+
#endif
222+
case UCC_DT_FLOAT64_COMPLEX:
223+
#if SIZEOF_DOUBLE__COMPLEX == 16
224+
return ucc_mc_cpu_reduce_multi_double_complex(
225+
src1, src2, dst, n_vectors, count, stride, op);
226+
#else
227+
return UCC_ERR_NOT_SUPPORTED;
228+
#endif
229+
case UCC_DT_FLOAT128_COMPLEX:
230+
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
231+
return ucc_mc_cpu_reduce_multi_long_double_complex(
232+
src1, src2, dst, n_vectors, count, stride, op);
233+
#else
234+
return UCC_ERR_NOT_SUPPORTED;
235+
#endif
202236
default:
203237
mc_error(&ucc_mc_cpu.super, "unsupported reduction type (%s)",
204238
ucc_datatype_str(dt));
@@ -222,19 +256,57 @@ ucc_mc_cpu_reduce_multi_alpha(const void *src1, const void *src2, void *dst,
222256
{
223257
switch (dt) {
224258
case UCC_DT_FLOAT32:
225-
ucc_assert(4 == sizeof(float));
259+
#if SIZEOF_FLOAT == 4
226260
return ucc_mc_cpu_reduce_multi_alpha_float(src1, src2, dst, n_vectors,
227261
count, stride, reduce_op,
228262
vector_op, (float)alpha);
263+
#else
264+
return UCC_ERR_NOT_SUPPORTED;
265+
#endif
229266
case UCC_DT_FLOAT64:
230-
ucc_assert(8 == sizeof(double));
267+
#if SIZEOF_DOUBLE == 8
231268
return ucc_mc_cpu_reduce_multi_alpha_double(src1, src2, dst, n_vectors,
232269
count, stride, reduce_op,
233270
vector_op, alpha);
271+
#else
272+
return UCC_ERR_NOT_SUPPORTED;
273+
#endif
274+
case UCC_DT_FLOAT128:
275+
#if SIZEOF_LONG_DOUBLE == 16
276+
return ucc_mc_cpu_reduce_multi_alpha_long(
277+
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
278+
(long double)alpha);
279+
#else
280+
return UCC_ERR_NOT_SUPPORTED;
281+
#endif
234282
case UCC_DT_BFLOAT16:
235283
return ucc_mc_cpu_reduce_multi_alpha_bfloat16(src1, src2, dst, n_vectors,
236284
count, stride, reduce_op,
237285
vector_op, (float)alpha);
286+
case UCC_DT_FLOAT32_COMPLEX:
287+
#if SIZEOF_FLOAT__COMPLEX == 8
288+
return ucc_mc_cpu_reduce_multi_alpha_float_complex(
289+
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
290+
(float)alpha);
291+
#else
292+
return UCC_ERR_NOT_SUPPORTED;
293+
#endif
294+
case UCC_DT_FLOAT64_COMPLEX:
295+
#if SIZEOF_DOUBLE__COMPLEX == 16
296+
return ucc_mc_cpu_reduce_multi_alpha_double_complex(
297+
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
298+
(double)alpha);
299+
#else
300+
return UCC_ERR_NOT_SUPPORTED;
301+
#endif
302+
case UCC_DT_FLOAT128_COMPLEX:
303+
#if SIZEOF_LONG_DOUBLE__COMPLEX == 32
304+
return ucc_mc_cpu_reduce_multi_alpha_long_complex(
305+
src1, src2, dst, n_vectors, count, stride, reduce_op, vector_op,
306+
(long double)alpha);
307+
#else
308+
return UCC_ERR_NOT_SUPPORTED;
309+
#endif
238310
default:
239311
mc_error(&ucc_mc_cpu.super, "unsupported reduction type (%s)",
240312
ucc_datatype_str(dt));

src/components/mc/cpu/reduce/mc_cpu_reduce.h

Lines changed: 43 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/**
2-
* Copyright (C) Mellanox Technologies Ltd. 2020-2021. ALL RIGHTS RESERVED.
2+
* Copyright (C) Mellanox Technologies Ltd. 2021-2022. ALL RIGHTS RESERVED.
33
*
44
* See file LICENSE for terms.
55
*/
@@ -8,6 +8,8 @@
88
#define UCC_MC_CPU_REDUCE_H_
99

1010
#include "utils/ucc_math.h"
11+
#include <complex.h>
12+
1113
#define OP_1(_s1, _s2, _i, _sc, _OP) _OP(_s1[_i], _s2[_i])
1214
#define OP_2(_s1, _s2, _i, _sc, _OP) \
1315
_OP((OP_1(_s1, _s2, _i, _sc, _OP)), _s2[_i + 1 * _sc])
@@ -163,6 +165,32 @@
163165
} \
164166
} while (0)
165167

168+
#define DO_DT_REDUCE_FLOAT_COMPLEX(type, reduce_op, src1_p, src2_p, dest_p, \
169+
size, count, stride) \
170+
do { \
171+
const type *restrict s1 = (const type *restrict)src1_p; \
172+
const type *restrict s2 = (const type *restrict)src2_p; \
173+
type *restrict d = (type * restrict) dest_p; \
174+
ucc_assert((ptrdiff_t)d <= (ptrdiff_t)src2_p || \
175+
(ptrdiff_t)d > (ptrdiff_t)src2_p + (size - 1) * stride + \
176+
count * sizeof(type)); \
177+
switch (reduce_op) { \
178+
case UCC_OP_SUM: \
179+
case UCC_OP_AVG: \
180+
DO_DT_REDUCE_WITH_OP(s1, s2, d, size, count, stride, DO_OP_SUM); \
181+
break; \
182+
case UCC_OP_PROD: \
183+
DO_DT_REDUCE_WITH_OP(s1, s2, d, size, count, stride, DO_OP_PROD); \
184+
break; \
185+
default: \
186+
mc_error(&ucc_mc_cpu.super, \
187+
"float complex dtype does not support " \
188+
"requested reduce op: %s", \
189+
ucc_reduction_op_str(reduce_op)); \
190+
return UCC_ERR_NOT_SUPPORTED; \
191+
} \
192+
} while (0)
193+
166194
#define VEC_OP(_d, OP) \
167195
do { \
168196
size_t _i; \
@@ -201,18 +229,23 @@ REDUCE_FN_DECLARE(uint32);
201229
REDUCE_FN_DECLARE(uint64);
202230
REDUCE_FN_DECLARE(float);
203231
REDUCE_FN_DECLARE(double);
232+
REDUCE_FN_DECLARE(long_double);
204233
REDUCE_FN_DECLARE(bfloat16);
234+
REDUCE_FN_DECLARE(float_complex);
235+
REDUCE_FN_DECLARE(double_complex);
236+
REDUCE_FN_DECLARE(long_double_complex);
205237

206-
#define REDUCE_ALPHA_FN_DECLARE(_type) \
238+
#define REDUCE_ALPHA_FN_DECLARE(_type, alpha_dt) \
207239
ucc_status_t ucc_mc_cpu_reduce_multi_alpha_##_type( \
208240
const void *src1, const void *src2, void *dst, size_t n_vectors, \
209241
size_t count, size_t stride, ucc_reduction_op_t reduce_op, \
210-
ucc_reduction_op_t vector_op, _type alpha)
211-
REDUCE_ALPHA_FN_DECLARE(float);
212-
REDUCE_ALPHA_FN_DECLARE(double);
213-
ucc_status_t
214-
ucc_mc_cpu_reduce_multi_alpha_bfloat16(const void *src1, const void *src2,
215-
void *dst, size_t n_vectors, size_t count,
216-
size_t stride, ucc_reduction_op_t reduce_op,
217-
ucc_reduction_op_t vector_op, float alpha);
242+
ucc_reduction_op_t vector_op, alpha_dt alpha)
243+
REDUCE_ALPHA_FN_DECLARE(float, float);
244+
REDUCE_ALPHA_FN_DECLARE(double, double);
245+
REDUCE_ALPHA_FN_DECLARE(long, long double);
246+
REDUCE_ALPHA_FN_DECLARE(bfloat16, float);
247+
REDUCE_ALPHA_FN_DECLARE(float_complex, float);
248+
REDUCE_ALPHA_FN_DECLARE(double_complex, double);
249+
REDUCE_ALPHA_FN_DECLARE(long_complex, long double);
250+
218251
#endif
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#include "mc_cpu.h"
8+
#include "reduce/mc_cpu_reduce.h"
9+
10+
ucc_status_t ucc_mc_cpu_reduce_multi_double_complex(const void *src1,
11+
const void *src2, void *dst,
12+
size_t n_vectors,
13+
size_t count, size_t stride,
14+
ucc_reduction_op_t op)
15+
{
16+
DO_DT_REDUCE_FLOAT_COMPLEX(double complex, op, src1, src2, dst, n_vectors,
17+
count, stride);
18+
return UCC_OK;
19+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#include "mc_cpu.h"
8+
#include "reduce/mc_cpu_reduce.h"
9+
10+
ucc_status_t ucc_mc_cpu_reduce_multi_float_complex(const void *src1,
11+
const void *src2, void *dst,
12+
size_t n_vectors,
13+
size_t count, size_t stride,
14+
ucc_reduction_op_t op)
15+
{
16+
DO_DT_REDUCE_FLOAT_COMPLEX(float complex, op, src1, src2, dst, n_vectors,
17+
count, stride);
18+
return UCC_OK;
19+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#include "mc_cpu.h"
8+
#include "reduce/mc_cpu_reduce.h"
9+
10+
ucc_status_t ucc_mc_cpu_reduce_multi_long_double(const void *src1,
11+
const void *src2, void *dst,
12+
size_t n_vectors, size_t count,
13+
size_t stride,
14+
ucc_reduction_op_t op)
15+
{
16+
DO_DT_REDUCE_FLOAT(long double, op, src1, src2, dst, n_vectors, count,
17+
stride);
18+
return UCC_OK;
19+
}
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
/**
2+
* Copyright (C) Mellanox Technologies Ltd. 2022. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#include "mc_cpu.h"
8+
#include "reduce/mc_cpu_reduce.h"
9+
10+
ucc_status_t ucc_mc_cpu_reduce_multi_long_double_complex(
11+
const void *src1, const void *src2, void *dst, size_t n_vectors,
12+
size_t count, size_t stride, ucc_reduction_op_t op)
13+
{
14+
DO_DT_REDUCE_FLOAT_COMPLEX(long double complex, op, src1, src2, dst,
15+
n_vectors, count, stride);
16+
return UCC_OK;
17+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
/**
2+
* Copyright (C) Mellanox Technologies Ltd. 2020-2022. ALL RIGHTS RESERVED.
3+
*
4+
* See file LICENSE for terms.
5+
*/
6+
7+
#include "mc_cpu.h"
8+
#include "reduce/mc_cpu_reduce.h"
9+
10+
ucc_status_t ucc_mc_cpu_reduce_multi_alpha_double_complex(
11+
const void *src1, const void *src2, void *dst, size_t n_vectors,
12+
size_t count, size_t stride, ucc_reduction_op_t reduce_op,
13+
ucc_reduction_op_t vector_op, double alpha)
14+
{
15+
DO_DT_REDUCE_FLOAT_COMPLEX(double complex, reduce_op, src1, src2, dst,
16+
n_vectors, count, stride);
17+
DO_VEC_OP(double complex, dst);
18+
return UCC_OK;
19+
}

0 commit comments

Comments
 (0)