Skip to content

Commit

Permalink
target/riscv: Add new Nuclei custom dsp N2 instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
lxx committed Nov 21, 2023
1 parent 4fa8d55 commit ff7a9ec
Show file tree
Hide file tree
Showing 4 changed files with 203 additions and 1 deletion.
11 changes: 11 additions & 0 deletions target/riscv/NucleiCustom.decode
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ dsmtt32_sra14 0111101 ..... ..... 000 ..... 1111011 @r
dsmtt32_sra32 0111110 ..... ..... 000 ..... 1111011 @r
dpkbb32 0111111 ..... ..... 000 ..... 1111011 @r
dpkbt32 1000000 ..... ..... 000 ..... 1111011 @r
dpack32 1100110 ..... ..... 000 ..... 1111011 @r
dpktt32 1000001 ..... ..... 000 ..... 1111011 @r
dpktb32 1000010 ..... ..... 000 ..... 1111011 @r
dpktb16 1000011 ..... ..... 000 ..... 1111011 @r
Expand Down Expand Up @@ -166,6 +167,16 @@ dksms32_u 1100100 ..... ..... 000 ..... 1111011 @r
dmada32 1100101 ..... ..... 000 ..... 1111011 @r
dredas16 0000110 00010 ..... 000 ..... 1111011 @r2
dredsa16 0000110 00011 ..... 000 ..... 1111011 @r2
dsunpkd810 0000110 00100 ..... 000 ..... 1111011 @r2
dsunpkd820 0000110 00101 ..... 000 ..... 1111011 @r2
dsunpkd830 0000110 00110 ..... 000 ..... 1111011 @r2
dsunpkd831 0000110 00111 ..... 000 ..... 1111011 @r2
dsunpkd832 0000110 01000 ..... 000 ..... 1111011 @r2
dzunpkd810 0000110 01001 ..... 000 ..... 1111011 @r2
dzunpkd820 0000110 01010 ..... 000 ..... 1111011 @r2
dzunpkd830 0000110 01011 ..... 000 ..... 1111011 @r2
dzunpkd831 0000110 01100 ..... 000 ..... 1111011 @r2
dzunpkd832 0000110 01101 ..... 000 ..... 1111011 @r2
dsma32_u 1101000 ..... ..... 000 ..... 1111011 @r
dsmxs32_u 1101001 ..... ..... 000 ..... 1111011 @r
dsmxa32_u 1101010 ..... ..... 000 ..... 1111011 @r
Expand Down
11 changes: 11 additions & 0 deletions target/riscv/helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1607,6 +1607,17 @@ DEF_HELPER_3(dmsr16, i64, env, i64, i64)
DEF_HELPER_3(dmsr17, i64, env, i64, i64)
DEF_HELPER_3(dmsr33, i64, env, i64, i64)
DEF_HELPER_3(dmxsr33, i64, env, i64, i64)
DEF_HELPER_3(dpack32, i64, env, i32, i32)
DEF_HELPER_2(dsunpkd810, i64, env, i64)
DEF_HELPER_2(dsunpkd820, i64, env, i64)
DEF_HELPER_2(dsunpkd830, i64, env, i64)
DEF_HELPER_2(dsunpkd831, i64, env, i64)
DEF_HELPER_2(dsunpkd832, i64, env, i64)
DEF_HELPER_2(dzunpkd810, i64, env, i64)
DEF_HELPER_2(dzunpkd820, i64, env, i64)
DEF_HELPER_2(dzunpkd830, i64, env, i64)
DEF_HELPER_2(dzunpkd831, i64, env, i64)
DEF_HELPER_2(dzunpkd832, i64, env, i64)

/* Nuclei N3 SIMD DSP Additional Instruction */
DEF_HELPER_4(dkmmac, i64, env, i64, i64, i64)
Expand Down
50 changes: 50 additions & 0 deletions target/riscv/insn_trans/trans_rvp.c.inc
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,38 @@ static inline bool r_ool_d64_s64_s32_d(DisasContext *ctx, arg_r *a,
#endif
}

static inline bool r_ool_d64_s32_s32_d(DisasContext *ctx, arg_r *a,
void (* fn)(TCGv_i64, TCGv_ptr, TCGv_i32, TCGv_i32))
{
#ifdef TARGET_RISCV64
return true;
#else
TCGv a0, a1, d0, d1;
TCGv_i64 dst;
TCGv_i32 src1, src2;

a0 = get_gpr(ctx, a->rs1, EXT_ZERO);
a1 = get_gpr(ctx, a->rs2, EXT_ZERO);

src1 = tcg_temp_new_i32();
src2 = tcg_temp_new_i32();
dst = tcg_temp_new_i64();

tcg_gen_trunc_tl_i32(src1, a0);
tcg_gen_trunc_tl_i32(src2, a1);

fn(dst, cpu_env, src1, src2);

d0 = dest_gpr(ctx, a->rd);
d1 = dest_gpr(ctx, a->rd + 1);
tcg_gen_extrl_i64_i32(d0, dst);
tcg_gen_extrh_i64_i32(d1, dst);
gen_set_gpr(ctx, a->rd, d0);
gen_set_gpr(ctx, a->rd + 1, d1);
return true;
#endif
}

static inline bool r_ool_d32_s64_s64_d(DisasContext *ctx, arg_r *a,
void (* fn)(TCGv_i32, TCGv_ptr, TCGv_i64, TCGv_i64))
{
Expand Down Expand Up @@ -237,6 +269,13 @@ static bool trans_##NAME(DisasContext *s, arg_r *a) \
return r_ool_d64_s64_s32_d(s, a, gen_helper_##NAME);\
}

#define GEN_RVP_R_OOL_D64_S32_S32_D_N2(NAME) \
static bool trans_##NAME(DisasContext *s, arg_r *a) \
{ \
REQUIRE_XXLDSPN2X(s); \
return r_ool_d64_s32_s32_d(s, a, gen_helper_##NAME);\
}

#define GEN_RVP_R_OOL_D32_S64_S64_D_N3(NAME) \
static bool trans_##NAME(DisasContext *s, arg_r *a) \
{ \
Expand Down Expand Up @@ -715,6 +754,17 @@ GEN_RVP_R2_OOL_D32_S64_D_N2(dkclip64);
GEN_RVP_SHIFTI_D_N2(dsclip8);
GEN_RVP_SHIFTI_D_N2(dsclip16);
GEN_RVP_SHIFTI_D_N2(dsclip32);
GEN_RVP_R_OOL_D64_S32_S32_D_N2(dpack32);
GEN_RVP_R2_OOL_D_N2(dsunpkd810);
GEN_RVP_R2_OOL_D_N2(dsunpkd820);
GEN_RVP_R2_OOL_D_N2(dsunpkd830);
GEN_RVP_R2_OOL_D_N2(dsunpkd831);
GEN_RVP_R2_OOL_D_N2(dsunpkd832);
GEN_RVP_R2_OOL_D_N2(dzunpkd810);
GEN_RVP_R2_OOL_D_N2(dzunpkd820);
GEN_RVP_R2_OOL_D_N2(dzunpkd830);
GEN_RVP_R2_OOL_D_N2(dzunpkd831);
GEN_RVP_R2_OOL_D_N2(dzunpkd832);

/* 8-bit Unpacking Instructions */
GEN_RVP_R2_OOL(sunpkd810);
Expand Down
132 changes: 131 additions & 1 deletion target/riscv/packed_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,19 @@ rvprd_d64_s64_s32(CPURISCVState *env, uint64_t a, uint32_t b,
return result;
}

static inline uint64_t
rvprd_d64_s32_s32(CPURISCVState *env, uint32_t a, uint32_t b,
uint8_t step, uint8_t size, PackedFn3i *fn)
{
int i, passes = sizeof(uint64_t) / size;
uint64_t result = 0;

for (i = 0; i < passes; i += step) {
fn(env, &result, &a, &b, i);
}
return result;
}

static inline uint32_t
rvprd_d32_s64_s64(CPURISCVState *env, uint64_t a, uint64_t b,
uint8_t step, uint8_t size, PackedFn3i *fn)
Expand Down Expand Up @@ -98,13 +111,20 @@ uint64_t HELPER(NAME)(CPURISCVState *env, uint64_t a, \
return rvprd(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
}

#define RVPRD_D64_S64_S32(NAME, STEP, SIZE) \
#define RVPRD_D64_S64_S32(NAME, STEP, SIZE) \
uint64_t HELPER(NAME)(CPURISCVState *env, uint64_t a, \
uint32_t b) \
{ \
return rvprd_d64_s64_s32(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
}

#define RVPRD_D64_S32_S32(NAME, STEP, SIZE) \
uint64_t HELPER(NAME)(CPURISCVState *env, uint32_t a, \
uint32_t b) \
{ \
return rvprd_d64_s32_s32(env, a, b, STEP, SIZE, (PackedFn3i *)do_##NAME);\
}

#define RVPRD_D32_S64_S64(NAME, STEP, SIZE) \
uint32_t HELPER(NAME)(CPURISCVState *env, uint64_t a, \
uint64_t b) \
Expand Down Expand Up @@ -558,6 +578,16 @@ static inline void do_dpkbt32(CPURISCVState *env, void *vd, void *va,

RVPRD(dpkbt32, 1, 8);

static inline void do_dpack32(CPURISCVState *env, void *vd, void *va,
void *vb, uint8_t i)
{
int32_t *d = vd, *a = va, *b = vb;
d[i] = b[i];
d[i+1] = a[i];
}

RVPRD_D64_S32_S32(dpack32, 1, 8);

static inline void do_dpktt32(CPURISCVState *env, void *vd, void *va,
void *vb, uint8_t i)
{
Expand Down Expand Up @@ -2232,6 +2262,106 @@ static inline void do_dredsa16(CPURISCVState *env, void *vd, void *va, uint8_t i

RVPR2D_D32_S64(dredsa16, 1, 8);

static inline void do_dsunpkd810(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int16_t *d = vd;
int8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 1];
}

RVPR2D(dsunpkd810, 1, 4);

static inline void do_dsunpkd820(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int16_t *d = vd;
int8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 2];
}

RVPR2D(dsunpkd820, 1, 4);

static inline void do_dsunpkd830(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int16_t *d = vd;
int8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dsunpkd830, 1, 4);

static inline void do_dsunpkd831(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int16_t *d = vd;
int8_t *a = va;
d[i * 2] = a[i * 4 + 1];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dsunpkd831, 1, 4);

static inline void do_dsunpkd832(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int16_t *d = vd;
int8_t *a = va;
d[i * 2] = a[i * 4 + 2];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dsunpkd832, 1, 4);

static inline void do_dzunpkd810(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
uint16_t *d = vd;
uint8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 1];
}

RVPR2D(dzunpkd810, 1, 4);

static inline void do_dzunpkd820(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
uint16_t *d = vd;
uint8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 2];
}

RVPR2D(dzunpkd820, 1, 4);

static inline void do_dzunpkd830(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
uint16_t *d = vd;
uint8_t *a = va;
d[i * 2] = a[i * 4];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dzunpkd830, 1, 4);

static inline void do_dzunpkd831(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
uint16_t *d = vd;
uint8_t *a = va;
d[i * 2] = a[i * 4 + 1];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dzunpkd831, 1, 4);

static inline void do_dzunpkd832(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
uint16_t *d = vd;
uint8_t *a = va;
d[i * 2] = a[i * 4 + 2];
d[i * 2 + 1] = a[i * 4 + 3];
}

RVPR2D(dzunpkd832, 1, 4);

static inline void do_clrs8(CPURISCVState *env, void *vd, void *va, uint8_t i)
{
int8_t *d = vd, *a = va;
Expand Down

0 comments on commit ff7a9ec

Please sign in to comment.