Skip to content

Commit 26fdcb4

Browse files
committed
Merge remote-tracking branch 'upstream/master' into bufSizeClamp
2 parents 5e839d4 + ec13c28 commit 26fdcb4

File tree

183 files changed

+4863
-2240
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

183 files changed

+4863
-2240
lines changed

src/hotspot/cpu/aarch64/aarch64.ad

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1267,38 +1267,38 @@ source %{
12671267
// registers conditionally reserved.
12681268

12691269
_ANY_REG32_mask = _ALL_REG32_mask;
1270-
_ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
1270+
_ANY_REG32_mask.remove(OptoReg::as_OptoReg(r31_sp->as_VMReg()));
12711271

12721272
_ANY_REG_mask = _ALL_REG_mask;
12731273

12741274
_PTR_REG_mask = _ALL_REG_mask;
12751275

12761276
_NO_SPECIAL_REG32_mask = _ALL_REG32_mask;
1277-
_NO_SPECIAL_REG32_mask.SUBTRACT(_NON_ALLOCATABLE_REG32_mask);
1277+
_NO_SPECIAL_REG32_mask.subtract(_NON_ALLOCATABLE_REG32_mask);
12781278

12791279
_NO_SPECIAL_REG_mask = _ALL_REG_mask;
1280-
_NO_SPECIAL_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1280+
_NO_SPECIAL_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);
12811281

12821282
_NO_SPECIAL_PTR_REG_mask = _ALL_REG_mask;
1283-
_NO_SPECIAL_PTR_REG_mask.SUBTRACT(_NON_ALLOCATABLE_REG_mask);
1283+
_NO_SPECIAL_PTR_REG_mask.subtract(_NON_ALLOCATABLE_REG_mask);
12841284

12851285
// r27 is not allocatable when compressed oops is on and heapbase is not
12861286
// zero, compressed klass pointers doesn't use r27 after JDK-8234794
12871287
if (UseCompressedOops && (CompressedOops::base() != nullptr)) {
1288-
_NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1289-
_NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1290-
_NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1288+
_NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1289+
_NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
1290+
_NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(r27->as_VMReg()));
12911291
}
12921292

12931293
// r29 is not allocatable when PreserveFramePointer is on
12941294
if (PreserveFramePointer) {
1295-
_NO_SPECIAL_REG32_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1296-
_NO_SPECIAL_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1297-
_NO_SPECIAL_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1295+
_NO_SPECIAL_REG32_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1296+
_NO_SPECIAL_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1297+
_NO_SPECIAL_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
12981298
}
12991299

13001300
_NO_SPECIAL_NO_RFP_PTR_REG_mask = _NO_SPECIAL_PTR_REG_mask;
1301-
_NO_SPECIAL_NO_RFP_PTR_REG_mask.Remove(OptoReg::as_OptoReg(r29->as_VMReg()));
1301+
_NO_SPECIAL_NO_RFP_PTR_REG_mask.remove(OptoReg::as_OptoReg(r29->as_VMReg()));
13021302
}
13031303

13041304
// Optimizaton of volatile gets and puts
@@ -1734,7 +1734,7 @@ uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
17341734
}
17351735

17361736
//=============================================================================
1737-
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
1737+
const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::EMPTY;
17381738

17391739
int ConstantTable::calculate_table_base_offset() const {
17401740
return 0; // absolute addressing, no offset
@@ -2520,10 +2520,10 @@ uint Matcher::int_pressure_limit()
25202520
// as a spilled LRG. Spilling heuristics(Spill-USE) explicitly skip
25212521
// derived pointers and lastly fail to spill after reaching maximum
25222522
// number of iterations. Lowering the default pressure threshold to
2523-
// (_NO_SPECIAL_REG32_mask.Size() minus 1) forces CallNode to become
2523+
// (_NO_SPECIAL_REG32_mask.size() minus 1) forces CallNode to become
25242524
// a high register pressure area of the code so that split_DEF can
25252525
// generate DefinitionSpillCopy for the derived pointer.
2526-
uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.Size() - 1;
2526+
uint default_int_pressure_threshold = _NO_SPECIAL_REG32_mask.size() - 1;
25272527
if (!PreserveFramePointer) {
25282528
// When PreserveFramePointer is off, frame pointer is allocatable,
25292529
// but different from other SOC registers, it is excluded from
@@ -2538,7 +2538,7 @@ uint Matcher::int_pressure_limit()
25382538
uint Matcher::float_pressure_limit()
25392539
{
25402540
// _FLOAT_REG_mask is generated by adlc from the float_reg register class.
2541-
return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.Size() : FLOATPRESSURE;
2541+
return (FLOATPRESSURE == -1) ? _FLOAT_REG_mask.size() : FLOATPRESSURE;
25422542
}
25432543

25442544
bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {

src/hotspot/cpu/aarch64/aarch64_vector.ad

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7081,29 +7081,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
70817081
%}
70827082

70837083
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
7084-
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
7084+
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
70857085
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
7086-
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
7086+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
70877087
match(Set dst (CompressV src pg));
7088-
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
7088+
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
70897089
ins_encode %{
7090+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
70907091
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
7091-
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
7092-
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
7093-
$ptmp$$PRegister, $pgtmp$$PRegister);
7092+
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
7093+
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
70947094
%}
70957095
ins_pipe(pipe_slow);
70967096
%}
70977097

7098-
instruct vcompressS(vReg dst, vReg src, pReg pg,
7099-
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
7098+
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
71007099
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
71017100
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
71027101
match(Set dst (CompressV src pg));
71037102
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
71047103
ins_encode %{
7104+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
7105+
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
71057106
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
7106-
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
7107+
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
7108+
length_in_bytes);
71077109
%}
71087110
ins_pipe(pipe_slow);
71097111
%}

src/hotspot/cpu/aarch64/aarch64_vector_ad.m4

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5069,29 +5069,31 @@ instruct vcompress(vReg dst, vReg src, pRegGov pg) %{
50695069
%}
50705070

50715071
instruct vcompressB(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2,
5072-
vReg tmp3, vReg tmp4, pReg ptmp, pRegGov pgtmp) %{
5072+
vReg tmp3, pReg ptmp, pRegGov pgtmp) %{
50735073
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_BYTE);
5074-
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ptmp, TEMP pgtmp);
5074+
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP ptmp, TEMP pgtmp);
50755075
match(Set dst (CompressV src pg));
5076-
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, tmp4, $ptmp, $pgtmp" %}
5076+
format %{ "vcompressB $dst, $src, $pg\t# KILL $tmp1, $tmp2, $tmp3, $ptmp, $pgtmp" %}
50775077
ins_encode %{
5078+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
50785079
__ sve_compress_byte($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
5079-
$tmp1$$FloatRegister,$tmp2$$FloatRegister,
5080-
$tmp3$$FloatRegister,$tmp4$$FloatRegister,
5081-
$ptmp$$PRegister, $pgtmp$$PRegister);
5080+
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister,
5081+
$ptmp$$PRegister, $pgtmp$$PRegister, length_in_bytes);
50825082
%}
50835083
ins_pipe(pipe_slow);
50845084
%}
50855085

5086-
instruct vcompressS(vReg dst, vReg src, pReg pg,
5087-
vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
5086+
instruct vcompressS(vReg dst, vReg src, pReg pg, vReg tmp1, vReg tmp2, pRegGov pgtmp) %{
50885087
predicate(UseSVE > 0 && Matcher::vector_element_basic_type(n) == T_SHORT);
50895088
effect(TEMP_DEF dst, TEMP tmp1, TEMP tmp2, TEMP pgtmp);
50905089
match(Set dst (CompressV src pg));
50915090
format %{ "vcompressS $dst, $src, $pg\t# KILL $tmp1, $tmp2, $pgtmp" %}
50925091
ins_encode %{
5092+
uint length_in_bytes = Matcher::vector_length_in_bytes(this);
5093+
__ sve_dup($tmp1$$FloatRegister, __ H, 0);
50935094
__ sve_compress_short($dst$$FloatRegister, $src$$FloatRegister, $pg$$PRegister,
5094-
$tmp1$$FloatRegister,$tmp2$$FloatRegister, $pgtmp$$PRegister);
5095+
$tmp1$$FloatRegister, $tmp2$$FloatRegister, $pgtmp$$PRegister,
5096+
length_in_bytes);
50955097
%}
50965098
ins_pipe(pipe_slow);
50975099
%}

src/hotspot/cpu/aarch64/assembler_aarch64.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3486,6 +3486,7 @@ template<typename R, typename... Rx>
34863486
INSN(sve_smaxv, 0b00000100, 0b001000001); // signed maximum reduction to scalar
34873487
INSN(sve_smin, 0b00000100, 0b001010000); // signed minimum vectors
34883488
INSN(sve_sminv, 0b00000100, 0b001010001); // signed minimum reduction to scalar
3489+
INSN(sve_splice,0b00000101, 0b101100100); // splice two vectors under predicate control, destructive
34893490
INSN(sve_sub, 0b00000100, 0b000001000); // vector sub
34903491
INSN(sve_uaddv, 0b00000100, 0b000001001); // unsigned add reduction to scalar
34913492
INSN(sve_umax, 0b00000100, 0b001001000); // unsigned maximum vectors

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.cpp

Lines changed: 76 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -2203,114 +2203,117 @@ void C2_MacroAssembler::sve_gen_mask_imm(PRegister dst, BasicType bt, uint32_t l
22032203
// Pack active elements of src, under the control of mask, into the lowest-numbered elements of dst.
22042204
// Any remaining elements of dst will be filled with zero.
22052205
// Clobbers: rscratch1
2206-
// Preserves: src, mask
2206+
// Preserves: mask, vzr
22072207
void C2_MacroAssembler::sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
2208-
FloatRegister vtmp1, FloatRegister vtmp2,
2209-
PRegister pgtmp) {
2208+
FloatRegister vzr, FloatRegister vtmp,
2209+
PRegister pgtmp, unsigned vector_length_in_bytes) {
22102210
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
2211-
assert_different_registers(dst, src, vtmp1, vtmp2);
2211+
// When called by sve_compress_byte, src and vtmp may be the same register.
2212+
assert_different_registers(dst, src, vzr);
2213+
assert_different_registers(dst, vtmp, vzr);
22122214
assert_different_registers(mask, pgtmp);
2213-
2214-
// Example input: src = 8888 7777 6666 5555 4444 3333 2222 1111
2215-
// mask = 0001 0000 0000 0001 0001 0000 0001 0001
2216-
// Expected result: dst = 0000 0000 0000 8888 5555 4444 2222 1111
2217-
sve_dup(vtmp2, H, 0);
2215+
// high <-- low
2216+
// Example input: src = hh gg ff ee dd cc bb aa, one character is 8 bits.
2217+
// mask = 01 00 00 01 01 00 01 01, one character is 1 bit.
2218+
// Expected result: dst = 00 00 00 hh ee dd bb aa
22182219

22192220
// Extend lowest half to type INT.
2220-
// dst = 00004444 00003333 00002222 00001111
2221+
// dst = 00dd 00cc 00bb 00aa
22212222
sve_uunpklo(dst, S, src);
2222-
// pgtmp = 00000001 00000000 00000001 00000001
2223+
// pgtmp = 0001 0000 0001 0001
22232224
sve_punpklo(pgtmp, mask);
22242225
// Pack the active elements in size of type INT to the right,
22252226
// and fill the remainings with zero.
2226-
// dst = 00000000 00004444 00002222 00001111
2227+
// dst = 0000 00dd 00bb 00aa
22272228
sve_compact(dst, S, dst, pgtmp);
22282229
// Narrow the result back to type SHORT.
2229-
// dst = 0000 0000 0000 0000 0000 4444 2222 1111
2230-
sve_uzp1(dst, H, dst, vtmp2);
2230+
// dst = 00 00 00 00 00 dd bb aa
2231+
sve_uzp1(dst, H, dst, vzr);
2232+
2233+
// Return if the vector length is no more than MaxVectorSize/2, since the
2234+
// highest half is invalid.
2235+
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
2236+
return;
2237+
}
2238+
22312239
// Count the active elements of lowest half.
22322240
// rscratch1 = 3
22332241
sve_cntp(rscratch1, S, ptrue, pgtmp);
22342242

22352243
// Repeat to the highest half.
2236-
// pgtmp = 00000001 00000000 00000000 00000001
2244+
// pgtmp = 0001 0000 0000 0001
22372245
sve_punpkhi(pgtmp, mask);
2238-
// vtmp1 = 00008888 00007777 00006666 00005555
2239-
sve_uunpkhi(vtmp1, S, src);
2240-
// vtmp1 = 00000000 00000000 00008888 00005555
2241-
sve_compact(vtmp1, S, vtmp1, pgtmp);
2242-
// vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
2243-
sve_uzp1(vtmp1, H, vtmp1, vtmp2);
2244-
2245-
// Compressed low: dst = 0000 0000 0000 0000 0000 4444 2222 1111
2246-
// Compressed high: vtmp1 = 0000 0000 0000 0000 0000 0000 8888 5555
2247-
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
2248-
// TRUE_CNT is the number of active elements in the compressed low.
2249-
neg(rscratch1, rscratch1);
2250-
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
2251-
sve_index(vtmp2, H, rscratch1, 1);
2252-
// vtmp1 = 0000 0000 0000 8888 5555 0000 0000 0000
2253-
sve_tbl(vtmp1, H, vtmp1, vtmp2);
2254-
2255-
// Combine the compressed high(after shifted) with the compressed low.
2256-
// dst = 0000 0000 0000 8888 5555 4444 2222 1111
2257-
sve_orr(dst, dst, vtmp1);
2246+
// vtmp = 00hh 00gg 00ff 00ee
2247+
sve_uunpkhi(vtmp, S, src);
2248+
// vtmp = 0000 0000 00hh 00ee
2249+
sve_compact(vtmp, S, vtmp, pgtmp);
2250+
// vtmp = 00 00 00 00 00 00 hh ee
2251+
sve_uzp1(vtmp, H, vtmp, vzr);
2252+
2253+
// pgtmp = 00 00 00 00 00 01 01 01
2254+
sve_whilelt(pgtmp, H, zr, rscratch1);
2255+
// Compressed low: dst = 00 00 00 00 00 dd bb aa
2256+
// Compressed high: vtmp = 00 00 00 00 00 00 hh ee
2257+
// Combine the compressed low with the compressed high:
2258+
// dst = 00 00 00 hh ee dd bb aa
2259+
sve_splice(dst, H, pgtmp, vtmp);
22582260
}
22592261

22602262
// Clobbers: rscratch1, rscratch2
22612263
// Preserves: src, mask
22622264
void C2_MacroAssembler::sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
2263-
FloatRegister vtmp1, FloatRegister vtmp2,
2264-
FloatRegister vtmp3, FloatRegister vtmp4,
2265-
PRegister ptmp, PRegister pgtmp) {
2265+
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
2266+
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes) {
22662267
assert(pgtmp->is_governing(), "This register has to be a governing predicate register");
2267-
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3, vtmp4);
2268+
assert_different_registers(dst, src, vtmp1, vtmp2, vtmp3);
22682269
assert_different_registers(mask, ptmp, pgtmp);
2269-
// Example input: src = 88 77 66 55 44 33 22 11
2270-
// mask = 01 00 00 01 01 00 01 01
2271-
// Expected result: dst = 00 00 00 88 55 44 22 11
2270+
// high <-- low
2271+
// Example input: src = q p n m l k j i h g f e d c b a, one character is 8 bits.
2272+
// mask = 0 1 0 0 0 0 0 1 0 1 0 0 0 1 0 1, one character is 1 bit.
2273+
// Expected result: dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
2274+
FloatRegister vzr = vtmp3;
2275+
sve_dup(vzr, B, 0);
22722276

2273-
sve_dup(vtmp4, B, 0);
22742277
// Extend lowest half to type SHORT.
2275-
// vtmp1 = 0044 0033 0022 0011
2278+
// vtmp1 = 0h 0g 0f 0e 0d 0c 0b 0a
22762279
sve_uunpklo(vtmp1, H, src);
2277-
// ptmp = 0001 0000 0001 0001
2280+
// ptmp = 00 01 00 00 00 01 00 01
22782281
sve_punpklo(ptmp, mask);
2279-
// Count the active elements of lowest half.
2280-
// rscratch2 = 3
2281-
sve_cntp(rscratch2, H, ptrue, ptmp);
22822282
// Pack the active elements in size of type SHORT to the right,
22832283
// and fill the remainings with zero.
2284-
// dst = 0000 0044 0022 0011
2285-
sve_compress_short(dst, vtmp1, ptmp, vtmp2, vtmp3, pgtmp);
2284+
// dst = 00 00 00 00 00 0g 0c 0a
2285+
unsigned extended_size = vector_length_in_bytes << 1;
2286+
sve_compress_short(dst, vtmp1, ptmp, vzr, vtmp2, pgtmp, extended_size > MaxVectorSize ? MaxVectorSize : extended_size);
22862287
// Narrow the result back to type BYTE.
2287-
// dst = 00 00 00 00 00 44 22 11
2288-
sve_uzp1(dst, B, dst, vtmp4);
2288+
// dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
2289+
sve_uzp1(dst, B, dst, vzr);
2290+
2291+
// Return if the vector length is no more than MaxVectorSize/2, since the
2292+
// highest half is invalid.
2293+
if (vector_length_in_bytes <= (MaxVectorSize >> 1)) {
2294+
return;
2295+
}
2296+
// Count the active elements of lowest half.
2297+
// rscratch2 = 3
2298+
sve_cntp(rscratch2, H, ptrue, ptmp);
22892299

22902300
// Repeat to the highest half.
2291-
// ptmp = 0001 0000 0000 0001
2301+
// ptmp = 00 01 00 00 00 00 00 01
22922302
sve_punpkhi(ptmp, mask);
2293-
// vtmp1 = 0088 0077 0066 0055
2303+
// vtmp2 = 0q 0p 0n 0m 0l 0k 0j 0i
22942304
sve_uunpkhi(vtmp2, H, src);
2295-
// vtmp1 = 0000 0000 0088 0055
2296-
sve_compress_short(vtmp1, vtmp2, ptmp, vtmp3, vtmp4, pgtmp);
2297-
2298-
sve_dup(vtmp4, B, 0);
2299-
// vtmp1 = 00 00 00 00 00 00 88 55
2300-
sve_uzp1(vtmp1, B, vtmp1, vtmp4);
2301-
2302-
// Compressed low: dst = 00 00 00 00 00 44 22 11
2303-
// Compressed high: vtmp1 = 00 00 00 00 00 00 88 55
2304-
// Left shift(cross lane) compressed high with TRUE_CNT lanes,
2305-
// TRUE_CNT is the number of active elements in the compressed low.
2306-
neg(rscratch2, rscratch2);
2307-
// vtmp2 = {4 3 2 1 0 -1 -2 -3}
2308-
sve_index(vtmp2, B, rscratch2, 1);
2309-
// vtmp1 = 00 00 00 88 55 00 00 00
2310-
sve_tbl(vtmp1, B, vtmp1, vtmp2);
2311-
// Combine the compressed high(after shifted) with the compressed low.
2312-
// dst = 00 00 00 88 55 44 22 11
2313-
sve_orr(dst, dst, vtmp1);
2305+
// vtmp1 = 00 00 00 00 00 00 0p 0i
2306+
sve_compress_short(vtmp1, vtmp2, ptmp, vzr, vtmp2, pgtmp, extended_size - MaxVectorSize);
2307+
// vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
2308+
sve_uzp1(vtmp1, B, vtmp1, vzr);
2309+
2310+
// ptmp = 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1
2311+
sve_whilelt(ptmp, B, zr, rscratch2);
2312+
// Compressed low: dst = 0 0 0 0 0 0 0 0 0 0 0 0 0 g c a
2313+
// Compressed high: vtmp1 = 0 0 0 0 0 0 0 0 0 0 0 0 0 0 p i
2314+
// Combine the compressed low with the compressed high:
2315+
// dst = 0 0 0 0 0 0 0 0 0 0 0 p i g c a
2316+
sve_splice(dst, B, ptmp, vtmp1);
23142317
}
23152318

23162319
void C2_MacroAssembler::neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ) {

src/hotspot/cpu/aarch64/c2_MacroAssembler_aarch64.hpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -173,13 +173,12 @@
173173
// lowest-numbered elements of dst. Any remaining elements of dst will
174174
// be filled with zero.
175175
void sve_compress_byte(FloatRegister dst, FloatRegister src, PRegister mask,
176-
FloatRegister vtmp1, FloatRegister vtmp2,
177-
FloatRegister vtmp3, FloatRegister vtmp4,
178-
PRegister ptmp, PRegister pgtmp);
176+
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
177+
PRegister ptmp, PRegister pgtmp, unsigned vector_length_in_bytes);
179178

180179
void sve_compress_short(FloatRegister dst, FloatRegister src, PRegister mask,
181-
FloatRegister vtmp1, FloatRegister vtmp2,
182-
PRegister pgtmp);
180+
FloatRegister vzr, FloatRegister vtmp,
181+
PRegister pgtmp, unsigned vector_length_in_bytes);
183182

184183
void neon_reverse_bits(FloatRegister dst, FloatRegister src, BasicType bt, bool isQ);
185184

src/hotspot/cpu/riscv/interp_masm_riscv.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1841,6 +1841,15 @@ void InterpreterMacroAssembler::load_method_entry(Register cache, Register index
18411841
}
18421842

18431843
#ifdef ASSERT
1844+
void InterpreterMacroAssembler::verify_field_offset(Register reg) {
1845+
// Verify the field offset is not in the header, implicitly checks for 0
1846+
Label L;
1847+
mv(t0, oopDesc::base_offset_in_bytes());
1848+
bge(reg, t0, L);
1849+
stop("bad field offset");
1850+
bind(L);
1851+
}
1852+
18441853
void InterpreterMacroAssembler::verify_access_flags(Register access_flags, uint32_t flag,
18451854
const char* msg, bool stop_by_hit) {
18461855
Label L;

0 commit comments

Comments
 (0)