Skip to content

Commit fb2315a

Browse files
authored
[Fix][Arith] Analyzer simplification starts with canonical (#13875)
This PR updates the order of arithmetic analyzer simplification, by adding a stage of canonical simplification at the very beginning so that every simplification always starts with a canonical round. This is because the rewrite simplification may destroy some PrimExpr property that the canonical simplification can make use of. Therefore, adding the canonical one in the front can maximize the use of canonical simplification.
1 parent 62a69a6 commit fb2315a

File tree

7 files changed

+70
-17
lines changed

7 files changed

+70
-17
lines changed

src/arith/analyzer.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,10 @@ bool Analyzer::CanProve(const PrimExpr& expr) {
129129
PrimExpr Analyzer::Simplify(const PrimExpr& expr, int steps) {
130130
PrimExpr res = expr;
131131

132+
// Always starts with a canonical simplification, as some structural property
133+
// of an expression might be destroyed by rewrite simplification.
134+
res = this->canonical_simplify(res);
135+
132136
for (int i = 0; i < steps; ++i) {
133137
if (tir::is_const_int(res)) {
134138
return res;

src/arith/canonical_simplify.cc

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -335,6 +335,8 @@ class SumExprNode : public CanonicalExprNode {
335335
* \return whether the cast can be safely pushed to children
336336
*/
337337
bool CanPushCastToChildren(DataType dtype, Analyzer* analyzer) const {
338+
bool is_min_value = dtype.bits() == 64 ? base == std::numeric_limits<int64_t>::lowest()
339+
: base == -(1LL << (dtype.bits() - 1));
338340
// cast(dtype, arg_1 + arg_2 + ... arg_n) ==
339341
// cast(dtype, arg_1) + ... + cast(dtype, arg_n)
340342
// iff it is an upcast (dtype.bits >= self.dtype.bits) or all of
@@ -351,7 +353,7 @@ class SumExprNode : public CanonicalExprNode {
351353
}
352354
}
353355
}
354-
if (base > 0) {
356+
if (base > 0 || is_min_value) {
355357
res = res + make_const(dtype, base);
356358
if (!CastIsSafe(dtype, res, analyzer)) {
357359
return false;
@@ -366,7 +368,7 @@ class SumExprNode : public CanonicalExprNode {
366368
}
367369
}
368370
}
369-
if (base < 0) {
371+
if (base < 0 && !is_min_value) {
370372
res = res - make_const(dtype, -base);
371373
if (!CastIsSafe(dtype, res, analyzer)) {
372374
return false;
@@ -497,14 +499,16 @@ class SumExprNode : public CanonicalExprNode {
497499
return args;
498500
}
499501
static PrimExpr Normalize_(DataType dtype, const std::vector<SplitExpr>& args, int64_t base) {
502+
bool is_min_value = dtype.bits() == 64 ? base == std::numeric_limits<int64_t>::lowest()
503+
: base == -(1LL << (dtype.bits() - 1));
500504
// Positive scales first
501505
PrimExpr res = make_const(dtype, 0);
502506
for (size_t i = 0; i < args.size(); ++i) {
503507
if (args[i]->scale > 0) {
504508
res = res + args[i]->Normalize();
505509
}
506510
}
507-
if (base > 0) {
511+
if (base > 0 || is_min_value) {
508512
res = res + make_const(dtype, base);
509513
}
510514
// negative scales follows using sub.
@@ -513,7 +517,7 @@ class SumExprNode : public CanonicalExprNode {
513517
res = res - args[i]->NormalizeWithScale(-1);
514518
}
515519
}
516-
if (base < 0) {
520+
if (base < 0 && !is_min_value) {
517521
res = res - make_const(dtype, -base);
518522
}
519523
return res;

tests/python/unittest/test_arith_canonical_simplify.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -372,5 +372,19 @@ def test_simplify_cast():
372372
ck.verify(res, 2)
373373

374374

375+
def test_simplify_normalize_min_value_expr():
376+
ck = CanonicalChecker()
377+
x = te.var("x", "int32")
378+
379+
ck.verify(te.min_value("int32") - x == 0, x == te.min_value("int32"))
380+
ck.verify(te.min_value("int32") + x == 0, False)
381+
ck.verify(0 == te.min_value("int32") - x, x == te.min_value("int32"))
382+
ck.verify(0 == te.min_value("int32") + x, False)
383+
ck.verify(-x + te.min_value("int32") == 0, x == te.min_value("int32"))
384+
ck.verify(x + te.min_value("int32") == 0, False)
385+
ck.verify(0 == -x + te.min_value("int32"), x == te.min_value("int32"))
386+
ck.verify(0 == x + te.min_value("int32"), False)
387+
388+
375389
if __name__ == "__main__":
376390
tvm.testing.main()

tests/python/unittest/test_arith_intset.py

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,6 @@ def check_region_bound(expect_region, var_dom, mode, predicate=None):
182182
expect_begin, expect_end = expect_desc[binding]
183183
result_begin = analyzer.simplify(intset.min_value, 3)
184184
result_end = analyzer.simplify(intset.max_value + 1, 3)
185-
print(result_end)
186185
assert analyzer.can_prove_equal(
187186
result_begin - expect_begin, 0
188187
), f"{result_begin} vs {expect_begin}"
@@ -306,10 +305,7 @@ def test_region_lower_bound_for_non_perfect_tile():
306305
+ h2: {
307306
(): (
308307
tvm.tir.max(h3 * 8, 1),
309-
tvm.tir.max(h3 * 8, 1)
310-
- tvm.tir.max(h3 * 8, 214)
311-
- tvm.tir.max(1 - h3 * 8, 0)
312-
+ 224,
308+
tvm.tir.min(0, h3 * 8 - 214) + 224,
313309
),
314310
((h3, 0),): (1, 10), # h3 == 0: region is [1, 10)
315311
((h3, 10),): (h3 * 8, h3 * 8 + 10), # 0 < h3 <= 26: region is [h3 * 8, h3 * 8 + 10)
@@ -333,10 +329,7 @@ def test_region_lower_bound_for_non_perfect_tile():
333329
+ h1: {
334330
(): (
335331
tvm.tir.max(h3 * 8, 1),
336-
tvm.tir.max(h3 * 8, 1)
337-
- tvm.tir.max(h3 * 8, 214)
338-
- tvm.tir.max(1 - h3 * 8, 0)
339-
+ 224,
332+
tvm.tir.min(0, h3 * 8 - 214) + 224,
340333
),
341334
((h3, 0),): (1, 10),
342335
((h3, 10),): (h3 * 8, h3 * 8 + 10),
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
#
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
#
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
import tvm
18+
import tvm.testing
19+
from tvm import tir
20+
21+
22+
def test_simplify_reshape_flattened_index():
23+
ana = tvm.arith.Analyzer()
24+
25+
i0 = tir.Var("i0", "int64")
26+
i1 = tir.Var("i1", "int64")
27+
ana.bind(i0, tvm.ir.Range(0, 8))
28+
ana.bind(i1, tvm.ir.Range(0, 3))
29+
30+
i_flattened = i0 * 3 + i1
31+
assert tvm.ir.structural_equal(
32+
ana.simplify((i_flattened) // 12 * 12 + (i_flattened) % 12 // 4 * 4 + (i_flattened) % 4),
33+
i_flattened,
34+
)
35+
36+
37+
if __name__ == "__main__":
38+
tvm.testing.main()

tests/python/unittest/test_tir_buffer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def assert_simplified_equal(index_simplified, index_direct):
150150
index_simplified = A.offset_of(
151151
(idxd(idxm(k0, idxd(k1, s)), n), idxm(idxm(k0, idxd(k1, s)), n) + idxm(k0, k1))
152152
)
153-
index_direct = A.offset_of((0, idxm(k0, k1) + idxm(k0, idxd(k1, s))))
153+
index_direct = A.offset_of((0, idxm(k0, idxd(k1, s)) + idxm(k0, k1)))
154154
assert_simplified_equal(index_simplified, index_direct)
155155
# Test Case3
156156
index_simplified = A.offset_of(

tests/python/unittest/test_tir_schedule_analysis.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_suggest_index_map_winograd():
126126
floordiv(i0, 2),
127127
floordiv(i1, 2),
128128
floormod(i0, 2),
129-
floormod(((i1 * 4) + floordiv(i2, 32)), 8),
129+
floormod(i1, 2) * 4 + floordiv(i2, 32),
130130
floormod(i2, 32),
131131
floordiv(i3, 32),
132132
floormod(i3, 32),
@@ -137,8 +137,8 @@ def test_suggest_index_map_winograd():
137137
expected_inverse_index_map = IndexMap.from_func(
138138
lambda i0, i1, i2, i3, i4, i5, i6: (
139139
((i0 * 2) + i2),
140-
((i1 * 2) + floordiv(((i3 * 32) + i4), 128)),
141-
floormod(((i3 * 32) + i4), 128),
140+
i1 * 2 + floordiv(i3, 4),
141+
floormod(i3, 4) * 32 + i4,
142142
((i5 * 32) + i6),
143143
)
144144
)

0 commit comments

Comments
 (0)