[SLM][Bugfix] Output debug functions as impure

Lunderberg · Lunderberg · commit ffd47e672c76 · 2024-03-11T10:37:20.000-05:00
Prior to this commit, debug functions were generated with
`relax.call_pure_packed`.  This resulted in unexpected behavior, as
`nn.op.print_` can be optimized away as a pure function.r

This commit updates debug functions to be generated as impure
functions.  This requires removing the `with bb.dataflow()` blocks in
the SLM-to-relax conversions, as impure functions may not be used in a
dataflow block.  To restore dataflow blocks when legal, the
`ConvertToDataflow` pass is applied.
diff --git a/include/tvm/relax/expr.h b/include/tvm/relax/expr.h
@@ -983,16 +983,21 @@ class FunctionNode : public BaseFuncNode {
 class Function : public BaseFunc {
  public:
   TVM_DLL explicit Function(Array<Var> params, Expr body, Optional<StructInfo> ret_struct_info,
-                            bool is_pure = true, DictAttrs attrs = NullValue<DictAttrs>(),
-                            Span span = Span());
+                            bool is_pure, DictAttrs attrs = NullValue<DictAttrs>(),
+                            Span span = Span())
+      : Function(params, body, ret_struct_info, Optional<Bool>(Bool(is_pure)), attrs, span) {}
+
+  TVM_DLL explicit Function(Array<Var> params, Expr body,
+                            Optional<StructInfo> ret_struct_info = NullOpt,
+                            Optional<Bool> is_pure = NullOpt,
+                            DictAttrs attrs = NullValue<DictAttrs>(), Span span = Span());
 
   /*!
    * \brief Mimics the constructor but without body Expr.
-   * \note ret_struct_info is required, since it can not deduced by the body.
+   * \note `ret_struct_info` and `is_pure` are required, since it can not deduced by the body.
    */
-  TVM_DLL static Function CreateEmpty(Array<Var> params, StructInfo ret_struct_info,
-                                      bool is_pure = true, DictAttrs attrs = NullValue<DictAttrs>(),
-                                      Span span = Span());
+  TVM_DLL static Function CreateEmpty(Array<Var> params, StructInfo ret_struct_info, bool is_pure,
+                                      DictAttrs attrs = NullValue<DictAttrs>(), Span span = Span());
 
   TVM_DEFINE_OBJECT_REF_METHODS(Function, BaseFunc, FunctionNode);
   TVM_DEFINE_OBJECT_REF_COW_METHOD(FunctionNode);
diff --git a/python/tvm/relax/block_builder.py b/python/tvm/relax/block_builder.py
@@ -638,8 +638,8 @@ def emit_func_output(
         finally:
             self.end_scope()
 
-        # do not specify ret_struct_info and let constructor deduce
-        # from seqe.struct_info
+        # Do not specify ret_struct_info or purity, and let the
+        # constructor deduce from seqe.struct_info.
         func = rx.Function(self._func._params, seqe)
         for key, value in self._func._attrs.items():
             func = func.with_attr(key, value)
diff --git a/python/tvm/relax/expr.py b/python/tvm/relax/expr.py
@@ -887,7 +887,7 @@ def __init__(
         params: List[Var],
         body: Expr,
         ret_struct_info: Optional[StructInfo] = None,
-        is_pure: Optional[bool] = True,
+        is_pure: Optional[bool] = None,
         attrs: Optional[tvm.ir.DictAttrs] = None,
         span: Optional[Span] = None,
     ) -> None:
diff --git a/python/tvm/relax/frontend/nn/exporter.py b/python/tvm/relax/frontend/nn/exporter.py
@@ -117,8 +117,7 @@ def _effects() -> typing.List[typing.Tuple[str, core.Effect]]:
         with self:
             if effects:
                 with self.builder.function("_initialize_effect"):
-                    with self.builder.dataflow():
-                        outputs = _emit_effect_init(self.builder, effects)
+                    outputs = _emit_effect_init(self.builder, effects)
                     self.builder.emit_func_output(outputs, params=[])
             for method_name, method_spec in zip(spec.method_names, spec.method_specs):
                 params = _params()  # Re-initialize so symbolic shapes not shared across methods
@@ -132,12 +131,12 @@ def _effects() -> typing.List[typing.Tuple[str, core.Effect]]:
                     method_name,
                     attrs={"num_input": len_args + len_effects},  # type: ignore
                 ):
-                    with self.builder.dataflow():
-                        outputs, inputs = _emit_method(self.builder, method_spec, params, effects)
+                    outputs, inputs = _emit_method(self.builder, method_spec, params, effects)
                     self.builder.emit_func_output(outputs, inputs)
         mod = self.builder.finalize()
         assert rx.analysis.well_formed(mod)
 
+        mod = rx.transform.ConvertToDataflow(min_size=1)(mod)
         return mod, params, ext_mods
 
 
@@ -150,7 +149,7 @@ def _emit_effect_init(
         inits = effect.emit_init(prefix, builder)
         assert isinstance(inits, list)
         outputs.extend(inits)
-    outputs = builder.emit_output(builder.emit(rx.Tuple(outputs)))
+    outputs = builder.emit(rx.Tuple(outputs))
     return outputs
 
 
@@ -281,9 +280,9 @@ def _detuple(arg, var: rx.Var, builder: BlockBuilder):
     for _, effect in effects:
         effect_outputs.extend(effect.finalize())
     if effect_outputs and spec.effect_mode != "none":
-        outputs = builder.emit_output(rx.Tuple([_unwrap_ret(outputs), rx.Tuple(effect_outputs)]))
+        outputs = builder.emit(rx.Tuple([_unwrap_ret(outputs), rx.Tuple(effect_outputs)]))
     else:
-        outputs = builder.emit_output(_unwrap_ret(outputs))
+        outputs = builder.emit(_unwrap_ret(outputs))
     return outputs, inputs
 
 
diff --git a/python/tvm/relax/frontend/nn/op.py b/python/tvm/relax/frontend/nn/op.py
@@ -1897,15 +1897,14 @@ def debug_func(lineno: str, arg_0, arg_1, ...) -> None:
         else:
             raise TypeError(f"Unsupported type {type(arg)}")
 
+    func = rx.ExternFunc("vm.builtin.invoke_debug_func")
+    call = rx.Call(
+        func,
+        [io.effect, rx.StringImm(name), rx.StringImm(_line_info), *converted_args],
+        sinfo_args=[rx.ObjectStructInfo()],
+    )
     io.effect = BlockBuilder.current().emit(
-        rx.call_pure_packed(
-            "vm.builtin.invoke_debug_func",
-            io.effect,
-            rx.StringImm(name),
-            rx.StringImm(_line_info),
-            *converted_args,
-            sinfo_args=[rx.ObjectStructInfo()],
-        ),
+        call,
         name_hint=io.effect.name_hint,
     )
 
diff --git a/src/relax/ir/expr.cc b/src/relax/ir/expr.cc
@@ -441,8 +441,8 @@ TVM_REGISTER_GLOBAL("relax.SeqExpr")
 
 TVM_REGISTER_NODE_TYPE(FunctionNode);
 
-Function::Function(Array<Var> params, Expr body, Optional<StructInfo> ret_struct_info, bool is_pure,
-                   DictAttrs attrs, Span span) {
+Function::Function(Array<Var> params, Expr body, Optional<StructInfo> ret_struct_info,
+                   Optional<Bool> is_pure_override, DictAttrs attrs, Span span) {
   // Set the function type.
   // For function, we take a conservative approach and require the function type
   // to be known at construction time.
@@ -473,6 +473,13 @@ Function::Function(Array<Var> params, Expr body, Optional<StructInfo> ret_struct
     ret_struct_info = body_sinfo;
   }
 
+  bool is_pure;
+  if (is_pure_override.defined()) {
+    is_pure = is_pure_override.value()->value;
+  } else {
+    is_pure = !ContainsImpureCall(body);
+  }
+
   FuncStructInfo func_sinfo(param_sinfo, ret_struct_info.value(), is_pure);
 
   // set the fields
@@ -490,7 +497,7 @@ Function::Function(Array<Var> params, Expr body, Optional<StructInfo> ret_struct
 
 TVM_REGISTER_GLOBAL("relax.Function")
     .set_body_typed([](Array<Var> params, Expr body, Optional<StructInfo> ret_struct_info,
-                       bool is_pure, DictAttrs attrs, Span span) {
+                       Optional<Bool> is_pure, DictAttrs attrs, Span span) {
       return Function(params, body, ret_struct_info, is_pure, attrs, span);
     });
 
diff --git a/tests/python/relax/test_frontend_nn_debug.py b/tests/python/relax/test_frontend_nn_debug.py
@@ -24,6 +24,8 @@
 from tvm.relax.frontend.nn import op, spec
 from tvm.runtime import NDArray
 
+from tvm.script import ir as I, relax as R
+
 
 def test_debug_print():
     class Layer(nn.Module):
@@ -42,6 +44,62 @@ def forward(self, x: nn.Tensor):  # pylint: disable=invalid-name
     assert isinstance(y, torch.Tensor)
 
 
+def test_debug_print_well_formed():
+    class Layer(nn.Module):
+        def forward(self, state: nn.Tensor):
+            state = state * 2.0
+            op.print_(state)
+            state = state * 2.0
+            return state
+
+    forward_code = Layer.forward.__wrapped__.__code__
+    debug_location = f"{forward_code.co_filename}:{forward_code.co_firstlineno+2}"
+
+    model, _ = Layer().export_tvm(
+        spec={
+            "forward": {"state": spec.Tensor([10, 5], dtype="float32")},
+        },
+        debug=True,
+    )
+
+    @I.ir_module
+    class Expected:
+        @R.function
+        def _initialize_effect() -> R.Tuple(R.Object):
+            with R.dataflow():
+                _io = R.null_value()
+                gv = (_io,)
+                R.output(gv)
+            return gv
+
+        @R.function(pure=False)
+        def forward(
+            state: R.Tensor((10, 5), dtype="float32"), _io: R.Object
+        ) -> R.Tuple(R.Tensor((10, 5), dtype="float32"), R.Tuple(R.Object)):
+            R.func_attr({"num_input": 2})
+            with R.dataflow():
+                mul = R.multiply(state, R.const(2, "float32"))
+                R.output(mul)
+
+            _io1 = R.call_packed(
+                "vm.builtin.invoke_debug_func",
+                _io,
+                R.str("vm.builtin.debug_print"),
+                R.str(debug_location),
+                mul,
+                sinfo_args=(R.Object,),
+            )
+
+            with R.dataflow():
+                mul1 = R.multiply(mul, R.const(2, "float32"))
+                gv1 = mul1, (_io1,)
+                R.output(gv1)
+
+            return gv1
+
+    tvm.ir.assert_structural_equal(Expected, model)
+
+
 def test_debug_func():
     @tvm.register_func("testing.relax.frontend.nn.test_debug_func")
     def _debug(  # pylint: disable=too-many-arguments
@@ -79,5 +137,4 @@ def forward(self, x: nn.Tensor, v: tir.Var):  # pylint: disable=invalid-name
 
 
 if __name__ == "__main__":
-    test_debug_print()
-    test_debug_func()
+    tvm.testing.main()
diff --git a/tests/python/relax/test_frontend_nn_extern_module.py b/tests/python/relax/test_frontend_nn_extern_module.py
@@ -91,10 +91,9 @@ def scalar_add(
         ) -> R.Tensor((), dtype="float32"):
             R.func_attr({"num_input": 2})
             with R.dataflow():
-                ext_scalar_add = R.call_dps_packed(
+                gv = R.call_dps_packed(
                     "ext_scalar_add", (a, b), out_sinfo=R.Tensor((), dtype="float32")
                 )
-                gv: R.Tensor((), dtype="float32") = ext_scalar_add
                 R.output(gv)
             return gv
 
@@ -107,10 +106,9 @@ def test_sym(
             z = T.int64()
             R.func_attr({"num_input": 2})
             with R.dataflow():
-                ext_test_sym = R.call_dps_packed(
+                gv1 = R.call_dps_packed(
                     "ext_test_sym", (a, b), out_sinfo=R.Tensor((x, y, z, 9), dtype="float32")
                 )
-                gv1: R.Tensor((x, y, z, 9), dtype="float32") = ext_test_sym
                 R.output(gv1)
             return gv1
 
diff --git a/tests/python/relax/test_frontend_nn_op.py b/tests/python/relax/test_frontend_nn_op.py
@@ -532,8 +532,7 @@ def add_one(A: T.Buffer((T.int64(10), T.int64(10)), "float32"), T_add: T.Buffer(
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -605,8 +604,7 @@ def llama_fused_rope(var_qkv: T.handle, offset: T.int64, var_q: T.handle, var_k:
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -693,8 +691,7 @@ def inplace_take(
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -711,13 +708,12 @@ def test(
             R.func_attr({"num_input": 4})
             cls = Expected
             with R.dataflow():
-                lv1 = R.call_tir(
+                gv1 = R.call_tir(
                     cls.inplace_take,
                     (embedding_table, input_ids, embedding_dst),
                     out_sinfo=R.Tensor((total_seq_len, hidden_size), dtype),
                     tir_vars=R.shape([offset_1]),
                 )
-                gv1: R.Tensor((total_seq_len, hidden_size), dtype) = lv1
                 R.output(gv1)
             return gv1
 
@@ -766,8 +762,7 @@ def test(A: R.Tensor((16, 16), dtype="float32")) -> R.Tensor((16, 16), dtype="fl
             R.func_attr({"num_input": 1})
             cls = Expected
             with R.dataflow():
-                lv = R.call_tir(cls.tir_func, (A,), out_sinfo=R.Tensor((16, 16), dtype="float32"))
-                gv: R.Tensor((16, 16), dtype="float32") = lv
+                gv = R.call_tir(cls.tir_func, (A,), out_sinfo=R.Tensor((16, 16), dtype="float32"))
                 R.output(gv)
             return gv
 
@@ -794,8 +789,7 @@ class Expected:
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -845,7 +839,6 @@ def test(self):
 
 @tvm.testing.requires_gpu
 def test_multinomial_from_uniform():
-
     prob_shape = (3, 5)
     sample_shape = (6, 1)
 
@@ -882,8 +875,7 @@ def get_sample_index(A: T.handle, B: T.handle, C: T.handle, D: T.handle):
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -1009,8 +1001,7 @@ def get_renorm_prob(A: T.handle, B: T.handle, C: T.handle, D: T.handle):
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
@@ -1124,8 +1115,7 @@ def get_renorm_cutoff(A: T.handle, B: T.handle, C: T.handle, D: T.handle, E: T.h
         def _initialize_effect() -> R.Tuple(R.Object):
             with R.dataflow():
                 _io: R.Object = R.null_value()
-                lv: R.Tuple(R.Object) = (_io,)
-                gv: R.Tuple(R.Object) = lv
+                gv = (_io,)
                 R.output(gv)
             return gv
 
diff --git a/tests/python/relax/test_frontend_nn_packing.py b/tests/python/relax/test_frontend_nn_packing.py
@@ -59,8 +59,7 @@ def forward(
                 matmul = R.matmul(x, matmul_1_weight)
                 matmul_2_weight = R.permute_dims(linear_2_weight)
                 matmul1 = R.matmul(x, matmul_2_weight)
-                add = R.add(matmul, matmul1)
-                gv = add
+                gv = R.add(matmul, matmul1)
                 R.output(gv)
             return gv
 
diff --git a/tests/python/relax/test_frontend_nn_subroutines.py b/tests/python/relax/test_frontend_nn_subroutines.py