Skip to content

Commit 5b28e7e

Browse files
committed
[Fix][Arith] Analyzer simplification starts with canonical
This PR updates the order of arithmetic analyzer simplification, by adding a stage of canonical simplification at the very beginning so that every simplification always starts with a canonical round. This is because the rewrite simplification may destroy some PrimExpr property that the canonical simplification can make use of. Therefore, adding the canonical one in the front can maximize the use of canonical simplification.
1 parent 803207c commit 5b28e7e

File tree

7 files changed

+61
-17
lines changed

7 files changed

+61
-17
lines changed

src/arith/analyzer.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ bool Analyzer::CanProve(const PrimExpr& expr) {
129129
PrimExpr Analyzer::Simplify(const PrimExpr& expr, int steps) {
130130
PrimExpr res = expr;
131131

132+
// Always starts with a canonical simplification, as some structural property
133+
// of an expression might be destroyed by rewrite simplification.
134+
res = this->canonical_simplify(res);
135+
132136
for (int i = 0; i < steps; ++i) {
133137
if (tir::is_const_int(res)) {
134138
return res;

src/tir/op/op.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,20 @@ PrimExpr min_value(const DataType& dtype, Span span) {
218218
// floatimm min bug)
219219
return (*f)(dtype.bits());
220220
} else if (dtype.is_int()) {
221+
// Here we use the actual min value + 1.
222+
// This is because the in integer system, the actual min value and
223+
// max value are not symmetric. In arithmetic analyzer and integer
224+
// expression simplification methods, it is very common to take
225+
// the negative value of integers. If the actual min value of an
226+
// integer dtype is taken the negative value, the result will be
227+
// out of the integer dtype range and lead to many other issues.
228+
// So here we use the actual min value + 1 to avoid the issues of
229+
// "taking the negative of the min value".
221230
if (dtype.bits() == 64) {
222-
return IntImm(dtype, std::numeric_limits<int64_t>::lowest(), span);
231+
return IntImm(dtype, std::numeric_limits<int64_t>::lowest() + 1, span);
223232
} else if (dtype.bits() < 64) {
224233
int64_t val = 1;
225-
val = -(val << (dtype.bits() - 1));
234+
val = -(val << (dtype.bits() - 1)) + 1;
226235
return IntImm(dtype, val, span);
227236
}
228237
} else if (dtype.is_uint()) {

tests/python/unittest/test_arith_intset.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@ def check_region_bound(expect_region, var_dom, mode, predicate=None):
182182
expect_begin, expect_end = expect_desc[binding]
183183
result_begin = analyzer.simplify(intset.min_value, 3)
184184
result_end = analyzer.simplify(intset.max_value + 1, 3)
185-
print(result_end)
186185
assert analyzer.can_prove_equal(
187186
result_begin - expect_begin, 0
188187
), f"{result_begin} vs {expect_begin}"
@@ -306,10 +305,7 @@ def test_region_lower_bound_for_non_perfect_tile():
306305
+ h2: {
307306
(): (
308307
tvm.tir.max(h3 * 8, 1),
309-
tvm.tir.max(h3 * 8, 1)
310-
- tvm.tir.max(h3 * 8, 214)
311-
- tvm.tir.max(1 - h3 * 8, 0)
312-
+ 224,
308+
tvm.tir.min(0, h3 * 8 - 214) + 224,
313309
),
314310
((h3, 0),): (1, 10), # h3 == 0: region is [1, 10)
315311
((h3, 10),): (h3 * 8, h3 * 8 + 10), # 0 < h3 <= 26: region is [h3 * 8, h3 * 8 + 10)
@@ -333,10 +329,7 @@ def test_region_lower_bound_for_non_perfect_tile():
333329
+ h1: {
334330
(): (
335331
tvm.tir.max(h3 * 8, 1),
336-
tvm.tir.max(h3 * 8, 1)
337-
- tvm.tir.max(h3 * 8, 214)
338-
- tvm.tir.max(1 - h3 * 8, 0)
339-
+ 224,
332+
tvm.tir.min(0, h3 * 8 - 214) + 224,
340333
),
341334
((h3, 0),): (1, 10),
342335
((h3, 10),): (h3 * 8, h3 * 8 + 10),
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
import tvm
18+
import tvm.testing
19+
from tvm import tir
20+
21+
22+
def test_simplify_reshape_flattened_index():
23+
ana = tvm.arith.Analyzer()
24+
25+
i0 = tir.Var("i0", "int64")
26+
i1 = tir.Var("i1", "int64")
27+
ana.bind(i0, tvm.ir.Range(0, 8))
28+
ana.bind(i1, tvm.ir.Range(0, 3))
29+
30+
i_flattened = i0 * 3 + i1
31+
assert tvm.ir.structural_equal(
32+
ana.simplify((i_flattened) // 12 * 12 + (i_flattened) % 12 // 4 * 4 + (i_flattened) % 4),
33+
i_flattened,
34+
)
35+
36+
37+
if __name__ == "__main__":
38+
tvm.testing.main()

tests/python/unittest/test_tir_buffer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def assert_simplified_equal(index_simplified, index_direct):
150150
index_simplified = A.offset_of(
151151
(idxd(idxm(k0, idxd(k1, s)), n), idxm(idxm(k0, idxd(k1, s)), n) + idxm(k0, k1))
152152
)
153-
index_direct = A.offset_of((0, idxm(k0, k1) + idxm(k0, idxd(k1, s))))
153+
index_direct = A.offset_of((0, idxm(k0, idxd(k1, s)) + idxm(k0, k1)))
154154
assert_simplified_equal(index_simplified, index_direct)
155155
# Test Case3
156156
index_simplified = A.offset_of(

tests/python/unittest/test_tir_schedule_analysis.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_suggest_index_map_winograd():
126126
floordiv(i0, 2),
127127
floordiv(i1, 2),
128128
floormod(i0, 2),
129-
floormod(((i1 * 4) + floordiv(i2, 32)), 8),
129+
floormod(i1, 2) * 4 + floordiv(i2, 32),
130130
floormod(i2, 32),
131131
floordiv(i3, 32),
132132
floormod(i3, 32),
@@ -137,8 +137,8 @@ def test_suggest_index_map_winograd():
137137
expected_inverse_index_map = IndexMap.from_func(
138138
lambda i0, i1, i2, i3, i4, i5, i6: (
139139
((i0 * 2) + i2),
140-
((i1 * 2) + floordiv(((i3 * 32) + i4), 128)),
141-
floormod(((i3 * 32) + i4), 128),
140+
i1 * 2 + floordiv(i3, 4),
141+
floormod(i3, 4) * 32 + i4,
142142
((i5 * 32) + i6),
143143
)
144144
)

tests/python/unittest/test_tir_schedule_rfactor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,7 +1147,7 @@ def argmax_topi_rfactor(
11471147
T.writes(placeholder_red_temp_v0_rf[ax0, vi1_1], placeholder_red_temp_v1_rf[ax0, vi1_1])
11481148
with T.init():
11491149
placeholder_red_temp_v0_rf[ax0, vi1_1] = -1
1150-
placeholder_red_temp_v1_rf[ax0, vi1_1] = -2147483648
1150+
placeholder_red_temp_v1_rf[ax0, vi1_1] = T.min_value("int32")
11511151
v_placeholder_red_temp_v0_rf: T.int32 = T.Select(
11521152
placeholder_red_temp_v1_rf[ax0, vi1_1] > placeholder[ax0, vi1_0 * 8 + vi1_1]
11531153
or placeholder_red_temp_v1_rf[ax0, vi1_1] == placeholder[ax0, vi1_0 * 8 + vi1_1]
@@ -1169,7 +1169,7 @@ def argmax_topi_rfactor(
11691169
T.writes(placeholder_red_temp_v0[ax0], placeholder_red_temp_v1[ax0])
11701170
with T.init():
11711171
placeholder_red_temp_v0[ax0] = -1
1172-
placeholder_red_temp_v1[ax0] = -2147483648
1172+
placeholder_red_temp_v1[ax0] = T.min_value("int32")
11731173
v_placeholder_red_temp_v0: T.int32 = T.Select(
11741174
placeholder_red_temp_v1[ax0] > placeholder_red_temp_v1_rf[ax0, vi1_1]
11751175
or placeholder_red_temp_v1[ax0] == placeholder_red_temp_v1_rf[ax0, vi1_1]

0 commit comments

Comments
 (0)