[TVMC][TRANSFORMS] ToMixedPrecision transform support with custom options enabled

srkreddy1238 · srkreddy1238 · commit a9ac14bb1bb0 · 2023-02-23T07:47:17.000+05:30
Adds new command line options
 --mixed-precision
 --mixed-precision-ops
 --mixed-precision-input
 --mixed-precision-output
This PR also enhances the python interface by replacing alter_layout to transform_args.
transform_args is a dict with all tranform related options including existing alter_layout option.
diff --git a/python/tvm/driver/tvmc/autotuner.py b/python/tvm/driver/tvmc/autotuner.py
@@ -39,7 +39,7 @@
 from .model import TVMCModel
 from .target import target_from_cli, generate_target_args, reconstruct_target_args
 from .shape_parser import parse_shape_string
-from .transform import convert_graph_layout
+from .transform import generate_transform_args, parse_graph_transform_args, apply_graph_transforms
 
 
 # pylint: disable=invalid-name
@@ -127,12 +127,7 @@ def add_tune_parser(subparsers, _, json_params):
         metavar="PATH",
         help="path to an auto-tuning log file by AutoTVM.",
     )
-    parser.add_argument(
-        "--desired-layout",
-        choices=["NCHW", "NHWC"],
-        default=None,
-        help="change the data layout of the whole graph",
-    )
+    generate_transform_args(parser)
     parser.add_argument(
         "--enable-autoscheduler",
         help="enable tuning the graph through the AutoScheduler tuner",
@@ -269,6 +264,8 @@ def drive_tune(args):
         rpc_hostname = None
         rpc_port = None
 
+    transform_args = parse_graph_transform_args(args)
+
     tune_model(
         tvmc_model,
         args.target,
@@ -283,7 +280,7 @@ def drive_tune(args):
         tuner=args.tuner,
         min_repeat_ms=args.min_repeat_ms,
         early_stopping=args.early_stopping,
-        desired_layout=args.desired_layout,
+        transform_args=transform_args,
         timeout=args.timeout,
         repeat=args.repeat,
         number=args.number,
@@ -309,7 +306,7 @@ def tune_model(
     tuner: str = "xgb",
     min_repeat_ms: Optional[int] = None,
     early_stopping: Optional[int] = None,
-    desired_layout: Optional[str] = None,
+    transform_args: Optional[Dict[str, Any]] = None,
     timeout: int = 10,
     repeat: int = 1,
     number: int = 10,
@@ -354,10 +351,8 @@ def tune_model(
         Minimum time to run each trial. Defaults to 0 on x86 and 1000 on other targets.
     early_stopping : int, optional
         When specified, stop tuning after this number of trials if results aren't improving.
-    desired_layout : str, optional
-        Can be one of "NCHW" or "NHWC". When specified, compatible operations in the graph
-        will have their layout set to this format. Tasks will then be tuned using this
-        specified layout.
+    transform_args: dict, optional
+        Graph transformation arguments that are applied to the relay module.
     timeout : int, optional,
         If a kernel trial lasts longer than this duration in seconds, it will be
         considered a failure.
@@ -453,7 +448,7 @@ def tune_model(
                 mod=mod,
                 params=params,
                 target=target,
-                alter_layout=desired_layout,
+                transform_args=transform_args,
                 hardware_params=hardware_params,
                 include_simple_tasks=include_simple_tasks,
             )
@@ -475,7 +470,7 @@ def tune_model(
                 mod=mod,
                 params=params,
                 target=target,
-                alter_layout=desired_layout,
+                transform_args=transform_args,
             )
 
             # In autotvm, trials is specified per task. We can convert the per-model input
@@ -504,7 +499,7 @@ def autotvm_get_tuning_tasks(
     params: Dict[str, tvm.nd.NDArray],
     target: str,
     target_host: Optional[str] = None,
-    alter_layout: Optional[str] = None,
+    transform_args: Optional[Dict[str, Any]] = None,
 ):
     """Get the autotvm tuning tasks for a given relay module.
 
@@ -518,10 +513,8 @@ def autotvm_get_tuning_tasks(
         The compilation target.
     target_host : str, optional
         The compilation target for the host.
-    alter_layout : str, optional
-        The layout to convert the graph to. Note, the convert layout
-        pass doesn't currently guarantee the whole of the graph will
-        be converted to the chosen layout.
+    transform_args: dict, optional
+        Graph transformation arguments that are applied to the relay module.
 
     Returns
     -------
@@ -530,8 +523,7 @@ def autotvm_get_tuning_tasks(
     """
     target, target_host = Target.canon_target_and_host(target, target_host)
 
-    if alter_layout:
-        mod = convert_graph_layout(mod, alter_layout)
+    mod = apply_graph_transforms(mod, transform_args)
 
     tasks = autotvm.task.extract_from_program(
         mod["main"],
@@ -547,7 +539,7 @@ def autoscheduler_get_tuning_tasks(
     params: Dict[str, tvm.nd.NDArray],
     target: str,
     target_host: Optional[str] = None,
-    alter_layout: Optional[str] = None,
+    transform_args: Optional[Dict[str, Any]] = None,
     hardware_params: Optional[HardwareParams] = None,
     include_simple_tasks: bool = False,
 ):
@@ -563,10 +555,8 @@ def autoscheduler_get_tuning_tasks(
         The compilation target.
     target_host : str, optional
         The compilation target for the host.
-    alter_layout : str, optional
-        The layout to convert the graph to. Note, the convert layout
-        pass doesn't currently guarantee the whole of the graph will
-        be converted to the chosen layout.
+    transform_args: dict, optional
+        Graph transformation arguments that are applied to the relay module.
     hardware_params : Optional[HardwareParams]
         Hardware parameters used for the search tasks
 
@@ -579,8 +569,7 @@ def autoscheduler_get_tuning_tasks(
     """
     target, target_host = Target.canon_target_and_host(target, target_host)
 
-    if alter_layout:
-        mod = convert_graph_layout(mod, alter_layout)
+    mod = apply_graph_transforms(mod, transform_args)
 
     # Extract the tasks
     tasks, task_weights = auto_scheduler.extract_tasks(
diff --git a/python/tvm/driver/tvmc/compiler.py b/python/tvm/driver/tvmc/compiler.py
@@ -37,7 +37,7 @@
 from .target import target_from_cli, generate_target_args, reconstruct_target_args
 from .pass_config import parse_configs
 from .pass_list import parse_pass_list_str
-from .transform import convert_graph_layout
+from .transform import generate_transform_args, parse_graph_transform_args, apply_graph_transforms
 from .shape_parser import parse_shape_string
 from .workspace_pools import generate_workspace_pools_args, workspace_pools_recombobulate
 
@@ -61,12 +61,7 @@ def add_compile_parser(subparsers, _, json_params):
         default="",
         help="the cross compiler options to generate target libraries, e.g. '-mfpu=neon-vfpv4'.",
     )
-    parser.add_argument(
-        "--desired-layout",
-        choices=["NCHW", "NHWC"],
-        default=None,
-        help="change the data layout of the whole graph.",
-    )
+    generate_transform_args(parser)
     parser.add_argument(
         "--dump-code",
         metavar="FORMAT",
@@ -177,6 +172,7 @@ def drive_compile(args):
 
     additional_targets = reconstruct_target_args(args)
     workspace_pools_target, extra_targets = target_from_cli(args.target, additional_targets)
+    transform_args = parse_graph_transform_args(args)
 
     compile_model(
         tvmc_model,
@@ -191,7 +187,7 @@ def drive_compile(args):
         output_format=args.output_format,
         dump_code=dump_code,
         target_host=None,
-        desired_layout=args.desired_layout,
+        transform_args=transform_args,
         disabled_pass=args.disabled_pass,
         pass_context_configs=args.pass_config,
         mod_name=args.module_name,
@@ -217,7 +213,7 @@ def compile_model(
     output_format: str = "so",
     dump_code: Optional[List[str]] = None,
     target_host: Optional[str] = None,
-    desired_layout: Optional[str] = None,
+    transform_args: Optional[Dict[str, Any]] = None,
     disabled_pass: Optional[str] = None,
     pass_context_configs: Optional[List[str]] = None,
     additional_target_options: Optional[Dict[str, Dict[str, Any]]] = None,
@@ -260,10 +256,8 @@ def compile_model(
     target_host : str, optional
         The target of the host machine if host-side code
         needs to be generated.
-    desired_layout: str, optional
-        The layout to convert the graph to. Note, the convert layout
-        pass doesn't currently guarantee the whole of the graph will
-        be converted to the chosen layout.
+    transform_args: dict, optional
+        Graph transformation arguments that are applied to the relay module.
     disabled_pass: str, optional
         Comma-separated list of passes which needs to be disabled
         during compilation
@@ -310,8 +304,7 @@ def compile_model(
         disabled_pass=disabled_pass,
         instruments=instruments,
     ):
-        if desired_layout:
-            mod = convert_graph_layout(mod, desired_layout)
+        mod = apply_graph_transforms(mod, transform_args)
 
         for partition_function, opts in zip(partition_functions, partition_opts):
             mod = partition_function(mod, params, mod_name=mod_name, **opts)
diff --git a/python/tvm/driver/tvmc/transform.py b/python/tvm/driver/tvmc/transform.py
@@ -13,13 +13,76 @@
 # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 # KIND, either express or implied.  See the License for the
 # specific language
+# pylint: disable=unused-argument
 """
 TVMC Graph Transforms
 """
 
 from tvm import relay, transform
 from tvm.driver.tvmc import TVMCException
 
+# ToMixedPrecision
+ACC_DTYPE = "float32"
+
+
+def mixed_precision_rule(call_node: "relay.Call", mixed_precision_type: str):
+    global ACC_DTYPE
+    return [
+        relay.transform.mixed_precision.MIXED_PRECISION_ALWAYS,
+        ACC_DTYPE,
+        mixed_precision_type,
+    ]
+
+
+class MixedPrecision(object):
+    """Temporarily changes attr of ops to enable required precision."""
+
+    def __init__(self, ops):
+        """Saves the required info for RAII pattern usage.
+
+        Parameters
+        ----------
+        ops : list
+            list of operators
+        """
+        self.older_attr = {}
+        self.ops = ops
+        self.attr_key = "FTVMMixedPrecisionConversionType"
+
+    def __enter__(self):
+        for op_name in self.ops:
+            op = relay.op.get(op_name)
+            self.older_attr[op_name] = op.get_attr(self.attr_key)
+            op.reset_attr(self.attr_key)
+            op.set_attr(self.attr_key, mixed_precision_rule)
+        return self
+
+    def __exit__(self, ptype, value, trace):
+        for op_name in self.ops:
+            op = relay.op.get(op_name)
+            op.reset_attr(self.attr_key)
+            if self.older_attr[op_name]:
+                op.set_attr(self.attr_key, self.older_attr[op_name])
+
+
+def convert_to_mixed_precision(mod, ops, input_type, out_type):
+    """Converts the operator datatypes"""
+
+    global ACC_DTYPE
+    ACC_DTYPE = out_type
+
+    with MixedPrecision(ops.split(",")):
+        seq = transform.Sequential(
+            [relay.transform.InferType(), relay.transform.ToMixedPrecision()]
+        )
+        with transform.PassContext(
+            config={"relay.ToMixedPrecision.keep_orig_output_dtype": True}, opt_level=3
+        ):
+            try:
+                return seq(mod)
+            except Exception as err:
+                raise TVMCException("Error converting mixed precision : {0}".format(str(err)))
+
 
 def convert_graph_layout(mod, desired_layout):
     """Alter the layout of the input graph.
@@ -58,3 +121,99 @@ def convert_graph_layout(mod, desired_layout):
         return seq(mod)
     except Exception as err:
         raise TVMCException("Error converting layout to {0}: {1}".format(desired_layout, str(err)))
+
+
+def apply_graph_transforms(mod, args):
+    """Alter the layout of the input graph.
+
+    Parameters
+    ----------
+    mod : tvm.IRModule
+        The relay module to convert.
+    args : dict
+        The transform arguments.
+
+    Returns
+    -------
+    mod : tvm.IRModule
+        The converted module.
+    """
+    if not args:
+        return mod
+
+    # AlterLayout
+    if args.get("desired_layout", False):
+        mod = convert_graph_layout(mod, args["desired_layout"])
+
+    # ToMixedPrecision
+    if args.get("mixed_precision", False):
+        mod = convert_to_mixed_precision(
+            mod,
+            args.get("mixed_precision_ops", "nn.conv2d,nn.dense"),
+            args.get("mixed_precision_input", "float16"),
+            args.get("mixed_precision_output", "float16"),
+        )
+    return mod
+
+
+def parse_graph_transform_args(args):
+    """Parse incoming options for graph transform arguments.
+
+    Parameters
+    ----------
+    args: argparse.Namespace
+        Arguments from command line parser.
+
+    Returns
+    -------
+    transform_args : dict
+        Graph transform arguments
+    """
+
+    args_dict = vars(args)
+
+    transform_args = [
+        "desired_layout",
+        "mixed_precision",
+        "mixed_precision_ops",
+        "mixed_precision_input",
+        "mixed_precision_output",
+    ]
+    transform_args = {key: args_dict[key] for key in transform_args}
+    return transform_args
+
+
+def generate_transform_args(parser):
+    """Add graph transform related args"""
+
+    # AlterLayout
+    parser.add_argument(
+        "--desired-layout",
+        choices=["NCHW", "NHWC"],
+        default=None,
+        help="Change the data layout of the whole graph.",
+    )
+
+    # ToMixedPrecision
+    parser.add_argument(
+        "--mixed-precision",
+        help="Enable mixed precision conversion",
+        action="store_true",
+    )
+    parser.add_argument(
+        "--mixed-precision-ops",
+        default="nn.conv2d,nn.dense",
+        help="List of operators to be converted to mixed precision",
+    )
+    parser.add_argument(
+        "--mixed-precision-input",
+        choices=["float16", "float32"],
+        default="float16",
+        help="Input precision type",
+    )
+    parser.add_argument(
+        "--mixed-precision-output",
+        choices=["float16", "float32"],
+        default="float16",
+        help="Output or accumulator precision type",
+    )
diff --git a/tests/python/driver/tvmc/test_transform.py b/tests/python/driver/tvmc/test_transform.py