Skip to content

Commit

Permalink
Merge pull request #4108 from martin-frbg/c910-iamax
Browse files Browse the repository at this point in the history
Fix RISCV-C910V IDAMAX
  • Loading branch information
martin-frbg authored Jun 27, 2023
2 parents dc24391 + ceaee7d commit b4f233e
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 15 deletions.
2 changes: 1 addition & 1 deletion Makefile.riscv64
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -O1
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
endif
2 changes: 1 addition & 1 deletion kernel/riscv64/dot_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG i=0, j=0;
double dot = 0.0 ;

if ( n < 0 ) return(dot);
if ( n < 1 ) return(dot);

FLOAT_V_T vr, vx, vy;
unsigned int gvl = 0;
Expand Down
40 changes: 27 additions & 13 deletions kernel/riscv64/iamax_vector.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <math.h>

#if defined(DOUBLE)

#define ABS fabs
#define VFMVFS_FLOAT vfmv_f_s_f64m1_f64
#define VSETVL(n) vsetvl_e64m8(n)
#define VSETVL_MAX vsetvlmax_e64m1()
#define FLOAT_V_T vfloat64m8_t
Expand All @@ -54,8 +53,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define VADDVX_UINT vadd_vx_u64m8
#define VMVVX_UINT vmv_v_x_u64m8
#else

#define ABS fabsf
#define VFMVFS_FLOAT vfmv_f_s_f32m1_f32
#define VSETVL(n) vsetvl_e32m8(n)
#define VSETVL_MAX vsetvlmax_e32m1()
#define FLOAT_V_T vfloat32m8_t
Expand Down Expand Up @@ -85,7 +83,11 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
{
BLASLONG i=0, j=0;
FLOAT maxf=0.0;
#ifdef DOUBLE
BLASLONG max_index = 0;
#else
unsigned int max_index = 0;
#endif
if (n <= 0 || inc_x <= 0) return(max_index);

FLOAT_V_T vx, v_max;
Expand Down Expand Up @@ -117,11 +119,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
j += gvl;
}
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl);
maxf = *((FLOAT*)&v_res);
maxf = VFMVFS_FLOAT(v_res);
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
max_index = VMFIRSTM(mask,gvl);
max_index = *((unsigned int*)&v_max_index+max_index);

#ifdef DOUBLE
max_index = *((BLASLONG *)&v_max_index+max_index);
#else
max_index = *((unsigned int *)&v_max_index+max_index);
#endif
if(j < n){
gvl = VSETVL(n-j);
vx = VLEV_FLOAT(&x[j], gvl);
Expand All @@ -130,15 +135,19 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl);

v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl);
FLOAT cur_maxf = *((FLOAT*)&v_res);
FLOAT cur_maxf = VFMVFS_FLOAT(v_res);
if(cur_maxf > maxf){
//tail index
v_max_index = VIDV_UINT(gvl);
v_max_index = VADDVX_UINT(v_max_index, j, gvl);

mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
max_index = VMFIRSTM(mask,gvl);
#ifdef DOUBLE
max_index = *((BLASLONG*)&v_max_index+max_index);
#else
max_index = *((unsigned int*)&v_max_index+max_index);
#endif
}
}
}else{
Expand All @@ -165,11 +174,14 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
idx += inc_v;
}
v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl);
maxf = *((FLOAT*)&v_res);
maxf = VFMVFS_FLOAT(v_res);
mask = VMFGEVF_FLOAT(v_max, maxf, gvl);
max_index = VMFIRSTM(mask,gvl);
#ifdef DOUBLE
max_index = *((BLASLONG*)&v_max_index+max_index);
#else
max_index = *((unsigned int*)&v_max_index+max_index);

#endif
if(j < n){
gvl = VSETVL(n-j);
vx = VLSEV_FLOAT(&x[idx], stride_x, gvl);
Expand All @@ -178,19 +190,21 @@ BLASLONG CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x)
v_max = VFRSUBVF_MASK_FLOAT(mask, vx, vx, 0, gvl);

v_res = VFREDMAXVS_FLOAT(v_res, v_max, v_z0, gvl);
FLOAT cur_maxf = *((FLOAT*)&v_res);
FLOAT cur_maxf = VFMVFS_FLOAT(v_res);
if(cur_maxf > maxf){
//tail index
v_max_index = VIDV_UINT(gvl);
v_max_index = VADDVX_UINT(v_max_index, j, gvl);

mask = VMFGEVF_FLOAT(v_max, cur_maxf, gvl);
max_index = VMFIRSTM(mask,gvl);
#ifdef DOUBLE
max_index = *((BLASLONG*)&v_max_index+max_index);
#else
max_index = *((unsigned int*)&v_max_index+max_index);
#endif
}
}
}
return(max_index+1);
}


0 comments on commit b4f233e

Please sign in to comment.