Skip to content

Commit 207fe55

Browse files
eme64mhaessig
andcommitted
8369902: C2 SuperWord: wrong result because filterin NaN instead of zero in MemPointerParser::canonicalize_raw_summands
Co-authored-by: Manuel Hässig <[email protected]> Reviewed-by: mhaessig, kvn
1 parent eee2908 commit 207fe55

File tree

4 files changed

+252
-2
lines changed

4 files changed

+252
-2
lines changed

src/hotspot/share/opto/mempointer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ void MemPointerParser::canonicalize_raw_summands() {
112112
}
113113
}
114114
// Keep summands with non-zero scale.
115-
if (!scaleI.is_zero() && !scaleL.is_NaN()) {
115+
if (!scaleI.is_zero() && !scaleL.is_zero()) {
116116
_raw_summands.at_put(pos_put++, MemPointerRawSummand(variable, scaleI, scaleL, int_group));
117117
}
118118
}

test/hotspot/jtreg/compiler/loopopts/superword/TestAliasingFuzzer.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,10 @@
112112
* memory and split ranges. But we could alternate between same memory
113113
* and split ranges, and then different memory but overlapping ranges.
114114
* This would also be never aliasing.
115-
*
115+
* - Generate cases that would catch bugs like JDK-8369902:
116+
* - Large long constants, or scales. Probably only possible for MemorySegment.
117+
* - Large number of invar, and reuse of invar so that they could cancle
118+
* to zero, and need to be filtered out.
116119
*/
117120
public class TestAliasingFuzzer {
118121
private static final Random RANDOM = Utils.getRandomInstance();
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
/*
25+
* @test
26+
* @bug 8369902
27+
* @summary Bug in MemPointerParser::canonicalize_raw_summands let to wrong result, because a
28+
* NaN summand was filtered out, instead of making the MemPointer / VPointer invalid.
29+
* @run main/othervm
30+
* -XX:+IgnoreUnrecognizedVMOptions
31+
* -XX:CompileCommand=compileonly,*TestDoNotFilterNaNSummands::test
32+
* -Xbatch
33+
* compiler.loopopts.superword.TestDoNotFilterNaNSummands
34+
* @run main compiler.loopopts.superword.TestDoNotFilterNaNSummands
35+
*/
36+
37+
package compiler.loopopts.superword;
38+
39+
// This was the test found by the fuzzer. If you are looking for a simpler example with the same issue,
40+
// please look at TestMemorySegmentFilterSummands::test2.
41+
public class TestDoNotFilterNaNSummands {
42+
static final int N = 100;
43+
static int zero = 0;
44+
45+
static int[] test() {
46+
int x = -4;
47+
int aI[] = new int[N];
48+
for (int k = 0; k < N; k++) {
49+
// Note that x is always "-4", and N is a compile time constant. The modulo "%"
50+
// gets optimized with magic numbers and shift/mul/sub trick, in the long domain,
51+
// which somehow creates some large long constant that cannot be represented
52+
// as an int.
53+
int idx = (x >>> 1) % N;
54+
// This is the CountedLoop that we may try to auto vectorize.
55+
// We have a linear access (i) and a constant index access (idx), which eventually
56+
// cross, so there is aliasing. If there is vectorization with an aliasing runtime
57+
// check, this check must fail.
58+
for (int i = 1; i < 63; i++) {
59+
aI[i] = 2;
60+
// The MemPointer / VPointer for the accesses below contain a large constant
61+
// long constant offset that cannot be represented as an int, so the scaleL
62+
// NoOverflowInt becomes NaN. In MemPointerParser::canonicalize_raw_summands
63+
// we are supposed to filter out zero summands, but since we WRONGLY filtered
64+
// out NaNs instead, this summand got filtered out, and later we did not detect
65+
// that the MemPointer contains a NaN. Instead, we just get a "valid" looking
66+
// VPointer, and generate runtime checks that are missing the long constant
67+
// offset, leading to wrong decisions, and hence vectorization even though
68+
// we have aliasing. This means that the accesses from above and below get
69+
// reordered in an illegal way, leading to wrong results.
70+
aI[idx] += 1;
71+
}
72+
for (int i = 0; i < 100; i++) {
73+
// It is a no-op, but the compiler can't know statically that zero=0.
74+
// Seems to be required in the graph, no idea why.
75+
x >>= zero;
76+
}
77+
}
78+
return aI;
79+
}
80+
81+
// Use the sum as an easy way to compare the results.
82+
public static int sum(int[] aI) {
83+
int sum = 0;
84+
for (int i = 0; i < aI.length; i++) { sum += aI[i]; }
85+
return sum;
86+
}
87+
88+
public static void main(String[] args) {
89+
// Run once, hopefully before compilation, so get interpreter results.
90+
int[] aIG = test();
91+
int gold = sum(aIG);
92+
93+
// Repeat execution, until eventually compilation happens, compare
94+
// compiler results to interpreter results.
95+
for (int k = 0; k < 1000; k++) {
96+
int[] aI = test();
97+
int val = sum(aI);
98+
if (gold != val) {
99+
System.out.println("Detected wrong result, printing values of arrays:");
100+
for (int i = 0; i < aI.length; i++) {
101+
System.out.println("at " + i + ": " + aIG[i] + " vs " + aI[i]);
102+
}
103+
throw new RuntimeException("wrong result: " + gold + " " + val);
104+
}
105+
}
106+
}
107+
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
/*
2+
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*/
23+
24+
package compiler.loopopts.superword;
25+
26+
import java.lang.foreign.*;
27+
import java.util.Set;
28+
29+
import compiler.lib.ir_framework.*;
30+
import compiler.lib.verify.*;
31+
32+
/*
33+
* @test
34+
* @bug 8369902
35+
* @summary Bug in MemPointerParser::canonicalize_raw_summands let to wrong results or assert.
36+
* @library /test/lib /
37+
* @run driver compiler.loopopts.superword.TestMemorySegmentFilterSummands
38+
*/
39+
40+
public class TestMemorySegmentFilterSummands {
41+
42+
static long init = 1000;
43+
static long limit = 9000;
44+
45+
static long invar0 = 0;
46+
static long invar1 = 0;
47+
static long invar2 = 0;
48+
static long invar3 = 0;
49+
static long invar4 = 0;
50+
static long invarX = 0;
51+
52+
public static final long BIG = 0x200000000L;
53+
public static long big = -BIG;
54+
55+
static MemorySegment a1 = Arena.ofAuto().allocate(10_000);
56+
static MemorySegment b1 = Arena.ofAuto().allocate(10_000);
57+
static {
58+
for (long i = init; i < limit; i++) {
59+
a1.set(ValueLayout.JAVA_BYTE, i, (byte)((i & 0xf) + 1));
60+
}
61+
}
62+
63+
static MemorySegment a2 = MemorySegment.ofArray(new byte[40_000]);
64+
static MemorySegment b2 = a2;
65+
66+
public static void main(String[] args) {
67+
TestFramework f = new TestFramework();
68+
f.addFlags("-XX:+IgnoreUnrecognizedVMOptions");
69+
f.addCrossProductScenarios(Set.of("-XX:-AlignVector", "-XX:+AlignVector"),
70+
Set.of("-XX:-ShortRunningLongLoop", "-XX:+ShortRunningLoop"));
71+
f.start();
72+
}
73+
74+
@Test
75+
@IR(counts = {IRNode.STORE_VECTOR, "> 0",
76+
IRNode.LOAD_VECTOR_B, "> 0",
77+
".*multiversion.*", "= 0"}, // AutoVectorization Predicate SUFFICES, there is no aliasing
78+
phase = CompilePhase.PRINT_IDEAL,
79+
applyIfPlatform = {"64-bit", "true"},
80+
applyIf = {"AlignVector", "false"},
81+
applyIfCPUFeatureOr = {"avx", "true", "asimd", "true"})
82+
public static void test1() {
83+
long invar = 0;
84+
invar += invarX; // cancles out with above
85+
invar += invar0;
86+
invar += invar1;
87+
invar += invar2;
88+
invar += invar3;
89+
invar += invar4;
90+
invar -= invarX; // cancles out with above
91+
// invar contains a raw summand for invarX, which has a scaleL=0. It needs to be filtered out.
92+
// The two occurances of invarX are conveniently put in a long chain, so that IGVN cannot see
93+
// that they cancle out, so that they are not optimized out before loop-opts.
94+
for (long i = init; i < limit; i++) {
95+
byte v = a1.get(ValueLayout.JAVA_BYTE, i + invar);
96+
b1.set(ValueLayout.JAVA_BYTE, i + invar, v);
97+
}
98+
}
99+
100+
@Check(test = "test1")
101+
static void check1() {
102+
Verify.checkEQ(a1, b1);
103+
}
104+
105+
@Test
106+
@IR(failOn = {IRNode.STORE_VECTOR})
107+
// This test could in principle show vectorization, but it would probably need to do some special
108+
// tricks to only vectorize around the overlap. Still, it could happen that at some point we end
109+
// up multiversioning, and having a vectorized loop that is never entered.
110+
//
111+
// For now, the long constant BIG leads to an invalid VPointer, which means we do not vectorize.
112+
static void test2() {
113+
// At runtime, "BIG + big" is zero. But BIG is a long constant that cannot be represented as
114+
// an int, and so the scaleL NoOverflowInt is a NaN. We should not filter it out from the summands,
115+
// but instead make the MemPointer / VPointer invalid, which prevents vectorization.
116+
long adr = 4L * 5000 + BIG + big;
117+
118+
for (long i = init; i < limit; i++) {
119+
// The reference to a2 iterates linearly, while the reference to "b2" stays at the same adr.
120+
// But the two alias: in the middle of the "a2" range it crosses over "b2" adr, so the
121+
// aliasing runtime check (if we generate one) should fail. But if "BIG" is just filtered
122+
// out from the summands, we instead just create a runtime check without it, which leads
123+
// to a wrong answer, and the check does not fail, and we get wrong results.
124+
a2.set(ValueLayout.JAVA_INT_UNALIGNED, 4L * i, 0);
125+
int v = b2.get(ValueLayout.JAVA_INT_UNALIGNED, adr);
126+
b2.set(ValueLayout.JAVA_INT_UNALIGNED, adr, v + 1);
127+
}
128+
}
129+
130+
@Check(test = "test2")
131+
static void check2() {
132+
int s = 0;
133+
for (long i = init; i < limit; i++) {
134+
s += a2.get(ValueLayout.JAVA_INT_UNALIGNED, 4L * i);
135+
}
136+
if (s != 4000) {
137+
throw new RuntimeException("wrong value");
138+
}
139+
}
140+
}

0 commit comments

Comments
 (0)