Skip to content

Commit d6b25c4

Browse files
authored
Merge pull request #5542 from abhishek-iitmadras/abhishek_new_tt_a64fx
[A64FX]: add tt for a64fx dot
2 parents 5b79d01 + a14caf4 commit d6b25c4

File tree

3 files changed

+36
-39
lines changed

3 files changed

+36
-39
lines changed

kernel/arm64/KERNEL.A64FX

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ DGEMVNKERNEL = gemv_n_sve_v4x3.c
55
SGEMVTKERNEL = gemv_t_sve_v4x3.c
66
DGEMVTKERNEL = gemv_t_sve_v4x3.c
77

8-
DDOTKERNEL = dot_sve_v8.c
9-
SDOTKERNEL = dot_sve_v8.c
8+
DDOTKERNEL = dot.c
9+
SDOTKERNEL = dot.c
1010

1111
SAXPYKERNEL = axpy_sve.c
1212
DAXPYKERNEL = axpy_sve.c

kernel/arm64/dot.c

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
4242
#ifdef USE_SVE
4343
#ifdef DOT_KERNEL_SVE
4444
#include DOT_KERNEL_SVE
45+
#elif defined(A64FX)
46+
#include "dot_kernel_sve_v8.c"
4547
#else
4648
#include "dot_kernel_sve.c"
4749
#endif
@@ -82,14 +84,43 @@ static inline int get_dot_optimal_nthreads_neoversev1(BLASLONG N, int ncpu) {
8284
}
8385
#endif
8486

87+
#if defined(DYNAMIC_ARCH) || defined(A64FX)
88+
static inline int get_dot_optimal_nthreads_a64fx(BLASLONG N, int ncpu) {
89+
#ifdef DOUBLE
90+
return (N <= 11000L) ? 1
91+
: (N <= 20000L) ? MIN(ncpu, 2)
92+
: (N <= 35000L) ? MIN(ncpu, 4)
93+
: (N <= 50000L) ? MIN(ncpu, 6)
94+
: (N <= 440000L) ? MIN(ncpu, 8)
95+
: (N <= 880000L) ? MIN(ncpu, 16)
96+
: (N <= 1020000L) ? MIN(ncpu, 24)
97+
: ncpu;
98+
#else
99+
return (N <= 22000L) ? 1
100+
: (N <= 39000L) ? MIN(ncpu, 2)
101+
: (N <= 79000L) ? MIN(ncpu, 4)
102+
: (N <= 120000L) ? MIN(ncpu, 6)
103+
: (N <= 1020000L) ? MIN(ncpu, 8)
104+
: ncpu;
105+
#endif
106+
}
107+
#endif
108+
85109
static inline int get_dot_optimal_nthreads(BLASLONG n) {
86110
int ncpu = num_cpu_avail(1);
87111

88-
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
112+
#if defined(A64FX) && !defined(COMPLEX) && !defined(BFLOAT16)
113+
return get_dot_optimal_nthreads_a64fx(n, ncpu);
114+
#elif defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
89115
return get_dot_optimal_nthreads_neoversev1(n, ncpu);
90116
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16)
91-
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
92-
return get_dot_optimal_nthreads_neoversev1(n, ncpu);
117+
{
118+
const char *core = gotoblas_corename();
119+
if (strcmp(core, "a64fx") == 0) {
120+
return get_dot_optimal_nthreads_a64fx(n, ncpu);
121+
} else if (strcmp(core, "neoversev1") == 0) {
122+
return get_dot_optimal_nthreads_neoversev1(n, ncpu);
123+
}
93124
}
94125
#endif
95126

kernel/arm64/dot_sve_v8.c

Lines changed: 0 additions & 34 deletions
This file was deleted.

0 commit comments

Comments
 (0)