@@ -131,7 +131,7 @@ source %{
131131 // These operations are not profitable to be vectorized on NEON, because no direct
132132 // NEON instructions support them. But the match rule support for them is profitable for
133133 // Vector API intrinsics.
134- if ((opcode == Op_VectorCastD2X && bt == T_INT) ||
134+ if ((opcode == Op_VectorCastD2X && ( bt == T_INT || bt == T_SHORT) ) ||
135135 (opcode == Op_VectorCastL2X && bt == T_FLOAT) ||
136136 (opcode == Op_CountLeadingZerosV && bt == T_LONG) ||
137137 (opcode == Op_CountTrailingZerosV && bt == T_LONG) ||
@@ -189,6 +189,18 @@ source %{
189189 return false;
190190 }
191191 break;
192+ case Op_AddReductionVI:
193+ case Op_AndReductionV:
194+ case Op_OrReductionV:
195+ case Op_XorReductionV:
196+ case Op_MinReductionV:
197+ case Op_MaxReductionV:
198+ // Reductions with less than 8 bytes vector length are
199+ // not supported.
200+ if (length_in_bytes < 8) {
201+ return false;
202+ }
203+ break;
192204 case Op_MulReductionVD:
193205 case Op_MulReductionVF:
194206 case Op_MulReductionVI:
@@ -4244,8 +4256,8 @@ instruct vzeroExtStoX(vReg dst, vReg src) %{
42444256 assert(bt == T_INT || bt == T_LONG, "must be");
42454257 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
42464258 if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4247- // 4S to 4I
4248- __ neon_vector_extend($dst$$FloatRegister, T_INT , length_in_bytes,
4259+ // 2S to 2I/2L, 4S to 4I
4260+ __ neon_vector_extend($dst$$FloatRegister, bt , length_in_bytes,
42494261 $src$$FloatRegister, T_SHORT, /* is_unsigned */ true);
42504262 } else {
42514263 assert(UseSVE > 0, "must be sve");
@@ -4265,11 +4277,11 @@ instruct vzeroExtItoX(vReg dst, vReg src) %{
42654277 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
42664278 if (VM_Version::use_neon_for_vector(length_in_bytes)) {
42674279 // 2I to 2L
4268- __ neon_vector_extend($dst$$FloatRegister, T_LONG , length_in_bytes,
4280+ __ neon_vector_extend($dst$$FloatRegister, bt , length_in_bytes,
42694281 $src$$FloatRegister, T_INT, /* is_unsigned */ true);
42704282 } else {
42714283 assert(UseSVE > 0, "must be sve");
4272- __ sve_vector_extend($dst$$FloatRegister, __ D ,
4284+ __ sve_vector_extend($dst$$FloatRegister, __ elemType_to_regVariant(bt) ,
42734285 $src$$FloatRegister, __ S, /* is_unsigned */ true);
42744286 }
42754287 %}
@@ -4343,11 +4355,15 @@ instruct vcvtStoX_extend(vReg dst, vReg src) %{
43434355 BasicType bt = Matcher::vector_element_basic_type(this);
43444356 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
43454357 if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4346- // 4S to 4I/4F
4347- __ neon_vector_extend($dst$$FloatRegister, T_INT, length_in_bytes,
4348- $src$$FloatRegister, T_SHORT);
4349- if (bt == T_FLOAT) {
4350- __ scvtfv(__ T4S, $dst$$FloatRegister, $dst$$FloatRegister);
4358+ if (is_floating_point_type(bt)) {
4359+ // 2S to 2F/2D, 4S to 4F
4360+ __ neon_vector_extend($dst$$FloatRegister, bt == T_FLOAT ? T_INT : T_LONG,
4361+ length_in_bytes, $src$$FloatRegister, T_SHORT);
4362+ __ scvtfv(get_arrangement(this), $dst$$FloatRegister, $dst$$FloatRegister);
4363+ } else {
4364+ // 2S to 2I/2L, 4S to 4I
4365+ __ neon_vector_extend($dst$$FloatRegister, bt, length_in_bytes,
4366+ $src$$FloatRegister, T_SHORT);
43514367 }
43524368 } else {
43534369 assert(UseSVE > 0, "must be sve");
@@ -4371,7 +4387,7 @@ instruct vcvtItoX_narrow_neon(vReg dst, vReg src) %{
43714387 effect(TEMP_DEF dst);
43724388 format %{ "vcvtItoX_narrow_neon $dst, $src" %}
43734389 ins_encode %{
4374- // 4I to 4B/4S
4390+ // 2I to 2S, 4I to 4B/4S
43754391 BasicType bt = Matcher::vector_element_basic_type(this);
43764392 uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
43774393 __ neon_vector_narrow($dst$$FloatRegister, bt,
@@ -4434,28 +4450,29 @@ instruct vcvtItoX(vReg dst, vReg src) %{
44344450
44354451// VectorCastL2X
44364452
4437- instruct vcvtLtoI_neon(vReg dst, vReg src) %{
4438- predicate(Matcher::vector_element_basic_type(n) == T_INT &&
4453+ instruct vcvtLtoX_narrow_neon(vReg dst, vReg src) %{
4454+ predicate((Matcher::vector_element_basic_type(n) == T_INT ||
4455+ Matcher::vector_element_basic_type(n) == T_SHORT) &&
44394456 VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
44404457 match(Set dst (VectorCastL2X src));
4441- format %{ "vcvtLtoI_neon $dst, $src" %}
4458+ format %{ "vcvtLtoX_narrow_neon $dst, $src" %}
44424459 ins_encode %{
4443- // 2L to 2I
4460+ // 2L to 2S/2I
4461+ BasicType bt = Matcher::vector_element_basic_type(this);
44444462 uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4445- __ neon_vector_narrow($dst$$FloatRegister, T_INT ,
4463+ __ neon_vector_narrow($dst$$FloatRegister, bt ,
44464464 $src$$FloatRegister, T_LONG, length_in_bytes);
44474465 %}
44484466 ins_pipe(pipe_slow);
44494467%}
44504468
4451- instruct vcvtLtoI_sve(vReg dst, vReg src, vReg tmp) %{
4452- predicate((Matcher::vector_element_basic_type(n) == T_INT &&
4453- !VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1)))) ||
4454- Matcher::vector_element_basic_type(n) == T_BYTE ||
4455- Matcher::vector_element_basic_type(n) == T_SHORT);
4469+ instruct vcvtLtoX_narrow_sve(vReg dst, vReg src, vReg tmp) %{
4470+ predicate(!VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))) &&
4471+ !is_floating_point_type(Matcher::vector_element_basic_type(n)) &&
4472+ type2aelembytes(Matcher::vector_element_basic_type(n)) <= 4);
44564473 match(Set dst (VectorCastL2X src));
44574474 effect(TEMP_DEF dst, TEMP tmp);
4458- format %{ "vcvtLtoI_sve $dst, $src\t# KILL $tmp" %}
4475+ format %{ "vcvtLtoX_narrow_sve $dst, $src\t# KILL $tmp" %}
44594476 ins_encode %{
44604477 assert(UseSVE > 0, "must be sve");
44614478 BasicType bt = Matcher::vector_element_basic_type(this);
@@ -4521,10 +4538,11 @@ instruct vcvtFtoX_narrow_neon(vReg dst, vReg src) %{
45214538 effect(TEMP_DEF dst);
45224539 format %{ "vcvtFtoX_narrow_neon $dst, $src" %}
45234540 ins_encode %{
4524- // 4F to 4B/4S
4541+ // 2F to 2S, 4F to 4B/4S
45254542 BasicType bt = Matcher::vector_element_basic_type(this);
45264543 uint length_in_bytes = Matcher::vector_length_in_bytes(this, $src);
4527- __ fcvtzs($dst$$FloatRegister, __ T4S, $src$$FloatRegister);
4544+ __ fcvtzs($dst$$FloatRegister, length_in_bytes == 16 ? __ T4S : __ T2S,
4545+ $src$$FloatRegister);
45284546 __ neon_vector_narrow($dst$$FloatRegister, bt,
45294547 $dst$$FloatRegister, T_INT, length_in_bytes);
45304548 %}
@@ -4590,19 +4608,25 @@ instruct vcvtFtoX(vReg dst, vReg src) %{
45904608// VectorCastD2X
45914609
45924610instruct vcvtDtoI_neon(vReg dst, vReg src) %{
4593- predicate(UseSVE == 0 && Matcher::vector_element_basic_type(n) == T_INT);
4611+ predicate(UseSVE == 0 &&
4612+ (Matcher::vector_element_basic_type(n) == T_INT ||
4613+ Matcher::vector_element_basic_type(n) == T_SHORT));
45944614 match(Set dst (VectorCastD2X src));
45954615 effect(TEMP_DEF dst);
4596- format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2I" %}
4616+ format %{ "vcvtDtoI_neon $dst, $src\t# 2D to 2S/ 2I" %}
45974617 ins_encode %{
4598- // 2D to 2I
4618+ // 2D to 2S/ 2I
45994619 __ ins($dst$$FloatRegister, __ D, $src$$FloatRegister, 0, 1);
46004620 // We can't use fcvtzs(vector, integer) instruction here because we need
46014621 // saturation arithmetic. See JDK-8276151.
46024622 __ fcvtzdw(rscratch1, $src$$FloatRegister);
46034623 __ fcvtzdw(rscratch2, $dst$$FloatRegister);
46044624 __ fmovs($dst$$FloatRegister, rscratch1);
46054625 __ mov($dst$$FloatRegister, __ S, 1, rscratch2);
4626+ if (Matcher::vector_element_basic_type(this) == T_SHORT) {
4627+ __ neon_vector_narrow($dst$$FloatRegister, T_SHORT,
4628+ $dst$$FloatRegister, T_INT, 8);
4629+ }
46064630 %}
46074631 ins_pipe(pipe_slow);
46084632%}
@@ -4676,7 +4700,7 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
46764700 ins_encode %{
46774701 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
46784702 if (VM_Version::use_neon_for_vector(length_in_bytes)) {
4679- // 4HF to 4F
4703+ // 2HF to 2F, 4HF to 4F
46804704 __ fcvtl($dst$$FloatRegister, __ T4S, $src$$FloatRegister, __ T4H);
46814705 } else {
46824706 assert(UseSVE > 0, "must be sve");
@@ -4692,9 +4716,9 @@ instruct vcvtHFtoF(vReg dst, vReg src) %{
46924716instruct vcvtFtoHF_neon(vReg dst, vReg src) %{
46934717 predicate(VM_Version::use_neon_for_vector(Matcher::vector_length_in_bytes(n->in(1))));
46944718 match(Set dst (VectorCastF2HF src));
4695- format %{ "vcvtFtoHF_neon $dst, $src\t# 4F to 4HF" %}
4719+ format %{ "vcvtFtoHF_neon $dst, $src\t# 2F/ 4F to 2HF/ 4HF" %}
46964720 ins_encode %{
4697- // 4F to 4HF
4721+ // 2F to 2HF, 4F to 4HF
46984722 __ fcvtn($dst$$FloatRegister, __ T4H, $src$$FloatRegister, __ T4S);
46994723 %}
47004724 ins_pipe(pipe_slow);
@@ -6396,14 +6420,12 @@ instruct vpopcountI(vReg dst, vReg src) %{
63966420 } else {
63976421 assert(bt == T_SHORT || bt == T_INT, "unsupported");
63986422 if (UseSVE == 0) {
6399- assert(length_in_bytes == 8 || length_in_bytes == 16, "unsupported");
6400- __ cnt($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6401- $src$$FloatRegister);
6402- __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
6403- $dst$$FloatRegister);
6423+ assert(length_in_bytes <= 16, "unsupported");
6424+ bool isQ = length_in_bytes == 16;
6425+ __ cnt($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $src$$FloatRegister);
6426+ __ uaddlp($dst$$FloatRegister, isQ ? __ T16B : __ T8B, $dst$$FloatRegister);
64046427 if (bt == T_INT) {
6405- __ uaddlp($dst$$FloatRegister, length_in_bytes == 16 ? __ T8H : __ T4H,
6406- $dst$$FloatRegister);
6428+ __ uaddlp($dst$$FloatRegister, isQ ? __ T8H : __ T4H, $dst$$FloatRegister);
64076429 }
64086430 } else {
64096431 __ sve_cnt($dst$$FloatRegister, __ elemType_to_regVariant(bt),
@@ -6465,7 +6487,7 @@ instruct vblend_neon(vReg dst, vReg src1, vReg src2) %{
64656487 format %{ "vblend_neon $dst, $src1, $src2" %}
64666488 ins_encode %{
64676489 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
6468- assert(length_in_bytes == 8 || length_in_bytes = = 16, "must be");
6490+ assert(length_in_bytes < = 16, "must be");
64696491 __ bsl($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
64706492 $src2$$FloatRegister, $src1$$FloatRegister);
64716493 %}
@@ -6852,7 +6874,7 @@ instruct vcountTrailingZeros(vReg dst, vReg src) %{
68526874 } else {
68536875 assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
68546876 if (UseSVE == 0) {
6855- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6877+ assert(length_in_bytes < = 16, "unsupported");
68566878 __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
68576879 bt, /* isQ */ length_in_bytes == 16);
68586880 if (bt != T_LONG) {
@@ -6911,7 +6933,7 @@ instruct vreverse(vReg dst, vReg src) %{
69116933 } else {
69126934 assert(bt == T_SHORT || bt == T_INT || bt == T_LONG, "unsupported type");
69136935 if (UseSVE == 0) {
6914- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6936+ assert(length_in_bytes < = 16, "unsupported");
69156937 __ neon_reverse_bits($dst$$FloatRegister, $src$$FloatRegister,
69166938 bt, /* isQ */ length_in_bytes == 16);
69176939 } else {
@@ -6947,7 +6969,7 @@ instruct vreverseBytes(vReg dst, vReg src) %{
69476969 BasicType bt = Matcher::vector_element_basic_type(this);
69486970 uint length_in_bytes = Matcher::vector_length_in_bytes(this);
69496971 if (VM_Version::use_neon_for_vector(length_in_bytes)) {
6950- assert(length_in_bytes == 8 || length_in_bytes = = 16, "unsupported");
6972+ assert(length_in_bytes < = 16, "unsupported");
69516973 if (bt == T_BYTE) {
69526974 if ($dst$$FloatRegister != $src$$FloatRegister) {
69536975 __ orr($dst$$FloatRegister, length_in_bytes == 16 ? __ T16B : __ T8B,
0 commit comments