[ARITH] Canonicalize mul-coefficient to rhs

tqchen · Ghosts381937 · tqchen · commit 492fc5b7ea20 · 2025-06-02T11:02:05.000-04:00
This PR updates the rewrite simplify logic to canonicalize mul-coefficient to rhs.
This change is consistent with rest of the code base and allows better simplification
of more cases. A test case of floormod with linear offset is added.

Co-authored-by: Ghosts381937 &lt;she.xing88@gmail.com&gt;
diff --git a/src/arith/rewrite_simplify.cc b/src/arith/rewrite_simplify.cc
@@ -446,10 +446,10 @@ PrimExpr RewriteSimplifier::Impl::VisitExpr_(const AddNode* op) {
     // mul co-efficient folding
     TVM_TRY_REWRITE(x + x, x * 2);
 
-    TVM_TRY_REWRITE(matches_one_of(x * y + x, y * x + x, x + y * x, x + x * y), x * (y + 1));
+    TVM_TRY_REWRITE(matches_one_of(x * y + x, y * x + x, x + y * x, x + x * y), (y + 1) * x);
 
     TVM_TRY_REWRITE(matches_one_of(x * y + x * z, y * x + x * z, x * y + z * x, y * x + z * x),
-                    x * (y + z));
+                    (y + z) * x);
 
     // DivMod rules
     // truc div
@@ -563,12 +563,12 @@ PrimExpr RewriteSimplifier::Impl::VisitExpr_(const SubNode* op) {
     TVM_TRY_REWRITE(matches_one_of(max(x, y) - y, x - min(y, x)), max(x - y, 0));
     TVM_TRY_REWRITE(matches_one_of(x - min(x, y), max(y, x) - y), max(0, x - y));
 
-    // mul co-efficient folding
+    // mul co-efficient folding: pefer co-effiicent to stay at rhs
     TVM_TRY_REWRITE(x - x, ZeroWithTypeLike(x));
-    TVM_TRY_REWRITE(matches_one_of(x * y - x, y * x - x), x * (y - 1));
-    TVM_TRY_REWRITE(matches_one_of(x - y * x, x - x * y), x * (1 - y));
+    TVM_TRY_REWRITE(matches_one_of(x * y - x, y * x - x), (y - 1) * x);
+    TVM_TRY_REWRITE(matches_one_of(x - y * x, x - x * y), (1 - y) * x);
     TVM_TRY_REWRITE(matches_one_of(x * y - x * z, y * x - x * z, x * y - z * x, y * x - z * x),
-                    x * (y - z));
+                    (y - z) * x);
 
     // constant cancelation
     TVM_TRY_REWRITE((x + c1) - c2, x + (c1 - c2));
diff --git a/tests/python/arith/test_arith_rewrite_simplify.py b/tests/python/arith/test_arith_rewrite_simplify.py
@@ -391,28 +391,28 @@ class TestAddIndex(BaseCompare):
         TestCase(tvm.te.max(2 - x * 4, 0) + x * 4, tvm.te.max(x * 4, 2)),
         TestCase(tvm.te.min(0, 1 - x * 4) + x * 4, tvm.te.min(x * 4, 1)),
         TestCase(tvm.te.min(2 - x * 4, 0) + x * 4, tvm.te.min(x * 4, 2)),
-        TestCase(x * y + x * 10, x * (y + 10)),
-        TestCase(y * x + x * 10, x * (y + 10)),
-        TestCase(y * x + 10 * x, x * (y + 10)),
-        TestCase(x * y + 10 * x, x * (y + 10)),
+        TestCase(x * y + x * 10, (y + 10) * x),
+        TestCase(y * x + x * 10, (y + 10) * x),
+        TestCase(y * x + 10 * x, (y + 10) * x),
+        TestCase(x * y + 10 * x, (y + 10) * x),
         TestCase((2 * z) + tvm.te.min(x, y - (2 * z)), tvm.te.min(x + (z * 2), y)),
-        TestCase(y * x + x, x * (y + 1)),
-        TestCase(x * y + x, x * (y + 1)),
+        TestCase(y * x + x, (y + 1) * x),
+        TestCase(x * y + x, (y + 1) * x),
         TestCase((x + 10) + 13, x + 23),
         TestCase((x + 10) + (13 + z), x + z + 23),
-        TestCase(x * y + 10 * x, x * (y + 10)),
-        TestCase(y * x + x * 3, x * (y + 3)),
+        TestCase(x * y + 10 * x, (y + 10) * x),
+        TestCase(y * x + x * 3, (y + 3) * x),
         TestCase(x + 3 + y, x + y + 3),
         TestCase((3 - y) + x, x - y + 3),
         # canonicalization
         TestCase(x + 2 + 3 + 4 + x, x * 2 + 9),
         TestCase(x + 2 + 3 + 4 + x * 3, x * 4 + 9),
         # DivMod rules
         # trunc div
-        TestCase(y * tmod(x, 8) + 10 * tmod(x, 8), tmod(x, 8) * (y + 10)),
+        TestCase(y * tmod(x, 8) + 10 * tmod(x, 8), (y + 10) * tmod(x, 8)),
         TestCase(tdiv(x, 8) * 8 + tmod(x, 8), x),
         # floor div
-        TestCase(y * flm(x, 8) + 10 * flm(x, 8), flm(x, 8) * (y + 10)),
+        TestCase(y * flm(x, 8) + 10 * flm(x, 8), (y + 10) * flm(x, 8)),
         TestCase(fld(x, 8) * 8 + flm(x, 8), x),
         TestCase(fld(flm(x, 2) + 7, 2) + fld(x, 2), fld(x + 7, 2)),
     )
@@ -436,10 +436,10 @@ class TestSubIndex(BaseCompare):
         TestCase(y - tvm.te.max(x, y), tvm.te.min(y - x, 0)),
         # mul co-efficient foldng
         TestCase(x - x, 0),
-        TestCase(x * y - x, x * (y + (-1))),
-        TestCase(x * y - 10 * x, x * (y + (-10))),
-        TestCase(y * x - x * z, x * (y - z)),
-        TestCase(y * x - z * x, x * (y - z)),
+        TestCase(x * y - x, (y + (-1)) * x),
+        TestCase(x * y - 10 * x, (y + (-10)) * x),
+        TestCase(y * x - x * z, (y - z) * x),
+        TestCase(y * x - z * x, (y - z) * x),
         TestCase(x + 10 - 20, x + (-10)),
         # 4-operands pattern
         TestCase((x + y) - (x + z), y - z),
diff --git a/tests/python/arith/test_arith_simplify.py b/tests/python/arith/test_arith_simplify.py
@@ -131,5 +131,18 @@ def test_regression_simplify_inf_recursion():
     ana.rewrite_simplify(res)
 
 
+def test_simplify_floor_mod_with_linear_offset():
+    """
+    Test that the floor_mod is simplified correctly when the offset is linear.
+    """
+    ana = tvm.arith.Analyzer()
+    past_decoder_sequence_length = tir.Var("past_decoder_sequence_length", "int64")
+    expr1 = (past_decoder_sequence_length + 1) * 64
+    divisor1 = (past_decoder_sequence_length + 1) * 32
+    assert ana.can_prove_equal(tvm.tir.floormod(expr1, divisor1), 0)
+    divisor2 = 32 * (past_decoder_sequence_length + 1)
+    assert ana.can_prove_equal(tvm.tir.floormod(expr1, divisor2), 0)
+
+
 if __name__ == "__main__":
     tvm.testing.main()
diff --git a/tests/python/tir-transform/test_tir_transform_common_subexpr_elim.py b/tests/python/tir-transform/test_tir_transform_common_subexpr_elim.py
@@ -352,7 +352,7 @@ def test_no_normalization_without_commoning():
 def func_distributivity(
     B: T.Buffer((50,), "int32"), i1: T.int32, i2: T.int32, x: T.int32, y: T.int32, z: T.int32
 ) -> None:
-    B[i1] = x * (y + z)
+    B[i1] = (y + z) * x
     B[i2] = x * y + x * z