[Doc] IRModule (#17298)

Siyuan Feng · web-flow · commit e1da4651df0a · 2024-08-23T08:05:55.000-04:00
diff --git a/docs/get_started/tutorials/ir_module.py b/docs/get_started/tutorials/ir_module.py
@@ -0,0 +1,281 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+.. _ir_module:
+
+IRModule
+========
+This tutorial presents the core abstraction of Apache TVM Unity, the IRModule.
+The IRModule encompasses the **entirety** of the ML models, incorporating the
+computational graph, tensor programs, and potential calls to external libraries.
+
+.. contents:: Table of Contents
+    :local:
+    :depth: 1
+"""
+
+import numpy as np
+import tvm
+from tvm import relax
+
+######################################################################
+# Create IRModule
+# ---------------
+# IRModules can be initialized in various ways. We demonstrate a few of them
+# below.
+
+import torch
+from torch import fx, nn
+from tvm.relax.frontend.torch import from_fx
+
+######################################################################
+# Import from existing models
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# The most common way to initialize an IRModule is to import from an existing
+# model. Apache TVM Unity accommodates imports from a range of frameworks,
+# such as PyTorch and ONNX. This tutorial solely demonstrates the import process
+# from PyTorch.
+
+
+# Create a dummy model
+class TorchModel(nn.Module):
+    def __init__(self):
+        super(TorchModel, self).__init__()
+        self.fc1 = nn.Linear(784, 256)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Linear(256, 10)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.relu1(x)
+        x = self.fc2(x)
+        return x
+
+
+# Give the input shape and data type
+input_info = [((1, 784), "float32")]
+
+# Convert the model to IRModule
+with torch.no_grad():
+    torch_fx_model = fx.symbolic_trace(TorchModel())
+    mod_from_torch = from_fx(torch_fx_model, input_info, keep_params_as_input=True)
+
+mod_from_torch, params_from_torch = relax.frontend.detach_params(mod_from_torch)
+# Print the IRModule
+mod_from_torch.show()
+
+######################################################################
+# Write with Relax NN Module
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Apache TVM Unity also provides a set of PyTorch-liked APIs, to help users
+# write the IRModule directly.
+
+from tvm.relax.frontend import nn
+
+
+class RelaxModel(nn.Module):
+    def __init__(self):
+        super(RelaxModel, self).__init__()
+        self.fc1 = nn.Linear(784, 256)
+        self.relu1 = nn.ReLU()
+        self.fc2 = nn.Linear(256, 10)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.relu1(x)
+        x = self.fc2(x)
+        return x
+
+
+mod_from_relax, params_from_relax = RelaxModel().export_tvm(
+    {"forward": {"x": nn.spec.Tensor((1, 784), "float32")}}
+)
+mod_from_relax.show()
+
+######################################################################
+# Create via TVMScript
+# ~~~~~~~~~~~~~~~~~~~~
+# TVMScript is a Python-based DSL for IRModules. We are able to
+# directly output the IRModule in the TVMScript syntax, or alternatively,
+# parse the TVMScript to obtain an IRModule.
+
+from tvm.script import ir as I
+from tvm.script import relax as R
+
+
+@I.ir_module
+class TVMScriptModule:
+    @R.function
+    def main(
+        x: R.Tensor((1, 784), dtype="float32"),
+        fc1_weight: R.Tensor((256, 784), dtype="float32"),
+        fc1_bias: R.Tensor((256,), dtype="float32"),
+        fc2_weight: R.Tensor((10, 256), dtype="float32"),
+        fc2_bias: R.Tensor((10,), dtype="float32"),
+    ) -> R.Tensor((1, 10), dtype="float32"):
+        R.func_attr({"num_input": 1})
+        with R.dataflow():
+            permute_dims = R.permute_dims(fc1_weight, axes=None)
+            matmul = R.matmul(x, permute_dims, out_dtype="void")
+            add = R.add(matmul, fc1_bias)
+            relu = R.nn.relu(add)
+            permute_dims1 = R.permute_dims(fc2_weight, axes=None)
+            matmul1 = R.matmul(relu, permute_dims1, out_dtype="void")
+            add1 = R.add(matmul1, fc2_bias)
+            gv = add1
+            R.output(gv)
+        return gv
+
+
+mod_from_script = TVMScriptModule
+mod_from_script.show()
+
+######################################################################
+# Attributes of an IRModule
+# -------------------------
+# An IRModule is a collection of functions, indexed by GlobalVars.
+
+mod = mod_from_torch
+print(mod.get_global_vars())
+
+######################################################################
+# We can access the functions in the IRModule by indexing with the GlobalVars
+# or their names
+
+# index by global var name
+print(mod["main"])
+# index by global var, and checking they are the same function
+(gv,) = mod.get_global_vars()
+assert mod[gv] == mod["main"]
+
+######################################################################
+# Transformations on IRModules
+# ----------------------------
+# Transformations are the import component of Apache TVM Unity. One transformation
+# takes in an IRModule and outputs another IRModule. We can apply a sequence of
+# transformations to an IRModule to obtain a new IRModule. That is the common way to
+# optimize a model.
+#
+# In this getting started tutorial, we only demonstrate how to apply transformations
+# to an IRModule. For details of each transformation, please refer to the
+# :ref:`Transformation API Reference <api-relax-transformation>`
+
+######################################################################
+# We first apply **LegalizeOps** transformation to the IRModule. This transformation
+# will convert the Relax module into a mixed stage, with both Relax and TensorIR function
+# within the same module. Meanwhile, the Relax operators will be converted into ``call_tir``.
+
+mod = mod_from_torch
+mod = relax.transform.LegalizeOps()(mod)
+mod.show()
+
+######################################################################
+# After the transformation, there are much more functions inside the module. Let's print
+# the global vars again.
+
+print(mod.get_global_vars())
+
+######################################################################
+# Next, Apache TVM Unity provides a set of default transformation pipelines for users,
+# to simplify the transformation process. We can then apply the default pipeline to the module.
+# The default **zero** pipeline contains very fundamental transformations, including:
+#
+# - **LegalizeOps**: This transform converts the Relax operators into `call_tir` functions
+#   with the corresponding TensorIR Functions. After this transform, the IRModule will
+#   contain both Relax functions and TensorIR functions.
+# - **AnnotateTIROpPattern**: This transform annotates the pattern of the TensorIR functions,
+#   preparing them for subsequent operator fusion.
+# - **FoldConstant**: This pass performs constant folding, optimizing operations
+#   involving constants.
+# - **FuseOps and FuseTIR**: These two passes work together to fuse operators based on the
+#   patterns annotated in the previous step (AnnotateTIROpPattern). These passes transform
+#   both Relax functions and TensorIR functions.
+#
+# .. note::
+#
+#   Here, we have applied **LegalizeOps** twice in the flow. The second time is useless but
+#   harmless.
+#
+#   Every passes can be duplicated in the flow, since we ensure the passes can handle all legal
+#   IRModule inputs. This design can help users to construct their own pipeline.
+
+mod = relax.get_pipeline("zero")(mod)
+mod.show()
+
+######################################################################
+# Deploy the IRModule Universally
+# -------------------------------
+# After the optimization, we can compile the model into a TVM runtime module.
+# Notably, Apache TVM Unity provides the ability of universal deployment, which means
+# we can deploy the same IRModule on different backends, including CPU, GPU, and other emerging
+# backends.
+#
+# Deploy on CPU
+# ~~~~~~~~~~~~~
+# We can deploy the IRModule on CPU by specifying the target as ``llvm``.
+
+exec = relax.build(mod, target="llvm")
+dev = tvm.cpu()
+vm = relax.VirtualMachine(exec, dev)
+
+raw_data = np.random.rand(1, 784).astype("float32")
+data = tvm.nd.array(raw_data, dev)
+cpu_out = vm["main"](data, *params_from_torch["main"]).numpy()
+print(cpu_out)
+
+######################################################################
+# Deploy on GPU
+# ~~~~~~~~~~~~~
+# Besides, CPU backend, we can also deploy the IRModule on GPU. GPU requires
+# programs containing extra information, such as the thread bindings and shared memory
+# allocations. We need a further transformation to generate the GPU programs.
+#
+# We use ``DLight`` to generate the GPU programs. In this tutorial, we won't go into
+# the details of ``DLight``.
+#
+
+from tvm import dlight as dl
+
+with tvm.target.Target("cuda"):
+    gpu_mod = dl.ApplyDefaultSchedule(
+        dl.gpu.Matmul(),
+        dl.gpu.Fallback(),
+    )(mod)
+
+######################################################################
+# Now we can compile the IRModule on GPU, the similar way as we did on CPU.
+
+exec = relax.build(gpu_mod, target="cuda")
+dev = tvm.device("cuda", 0)
+vm = relax.VirtualMachine(exec, dev)
+# Need to allocate data and params on GPU device
+data = tvm.nd.array(raw_data, dev)
+gpu_params = [tvm.nd.array(p, dev) for p in params_from_torch["main"]]
+gpu_out = vm["main"](data, *gpu_params).numpy()
+print(gpu_out)
+
+# Check the correctness of the results
+assert np.allclose(cpu_out, gpu_out, atol=1e-3)
+
+######################################################################
+# Deploy on Other Backends
+# ~~~~~~~~~~~~~~~~~~~~~~~~
+# Apache TVM Unity also supports other backends, such as different kinds of GPUs
+# (Metal, ROCm, Vulkan and OpenCL), different kinds of CPUs (x86, ARM), and other
+# emerging backends (e.g., WebAssembly). The deployment process is similar to the
+# GPU backend.
diff --git a/docs/index.rst b/docs/index.rst
@@ -34,6 +34,7 @@ driving its costs down.
 
    install/index
    get_started/tutorials/quick_start
+   get_started/tutorials/ir_module
    contribute/index
 
 .. toctree::