apache · tqchen · Aug 29, 2024 · Aug 27, 2024
diff --git a/docs/conf.py b/docs/conf.py
@@ -423,6 +423,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     tvm_path.joinpath("vta", "tutorials"),
     # New tutorial structure under docs folder
     tvm_path.joinpath("docs", "get_started", "tutorials"),
+    tvm_path.joinpath("docs", "how_to", "tutorials"),
 ]
 
 gallery_dirs = [
@@ -440,6 +441,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     "topic/vta/tutorials",
     # New tutorial structure under docs folder
     "get_started/tutorials/",
+    "how_to/tutorials/",
 ]
 
 

diff --git a/docs/dev/how_to/how_to.rst b/docs/dev/how_to/how_to.rst
@@ -29,5 +29,3 @@ various areas of the TVM stack.
    relay_add_op
    relay_add_pass
    relay_bring_your_own_codegen
-   pytest_target_parametrization
-   setup_rpc_system
diff --git a/docs/how_to/dev/index.rst b/docs/how_to/dev/index.rst
@@ -0,0 +1,28 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Develope Apache TVM
+===================
+This section contains a collection of tips about how to work on
+various areas of the TVM stack.
+
+.. toctree::
+   :maxdepth: 1
+
+   pytest_target_parametrization
+   setup_rpc_system
+   ../../errors
diff --git a/.../how_to/pytest_target_parametrization.rst → ..._to/dev/pytest_target_parametrization.rst b/.../how_to/pytest_target_parametrization.rst → ..._to/dev/pytest_target_parametrization.rst
diff --git a/docs/dev/how_to/setup_rpc_system.rst → docs/how_to/dev/setup_rpc_system.rst b/docs/dev/how_to/setup_rpc_system.rst → docs/how_to/dev/setup_rpc_system.rst
@@ -76,7 +76,7 @@ In our community, there is multiple RPC server implementations, e.g., ``apps/and
 
 RPC server need to be run on device machine, and it usually will depend on xPU driver, the enhanced TVM runtime with xPU support, and other libraries, so please setup the dependent components first, e.g., install the KMD driver, ensure the required dynamic libraries can be found from environment variable ``LD_LIBRARY_PATH``.
 
-If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`luanch-rpc-server`.
+If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`launch-rpc-server`.
 
 1. Cross Compile TVM Runtime
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -134,9 +134,9 @@ Then copy the compress package ``tvm_runtime.tar.gz`` to your concrete device ma
   $ export PYTHONPATH=`pwd`/python:${PYTHONPATH}
 
 
-.. _luanch-rpc-server:
+.. _launch-rpc-server:
 
-3. Luanch RPC Server
+3. Launch RPC Server
 ^^^^^^^^^^^^^^^^^^^^
 
 The RPC server can be launched on your device machine through the commands like something below, please modify the *RPC_TRACKER_IP*, *RPC_TRACKER_PORT*, *RPC_PROXY_IP*, *RPC_PROXY_PORT*, and *RPC_KEY* according to your concrete environment.

diff --git a/docs/how_to/index.rst b/docs/how_to/index.rst
@@ -15,25 +15,9 @@
     specific language governing permissions and limitations
     under the License.
 
-How To Guides
-=============
-
-These user-focused "how to" guides are designed to help you find answers to
-specific questions, like "How do I compile a model?" or "How to I optimize a
-schedule with tesor expressions?"
-
 .. toctree::
    :maxdepth: 1
 
-   compile_models/index
-   deploy/index
-   work_with_relay/index
-   work_with_schedules/index
-   optimize_operators/index
-   tune_with_autotvm/index
-   tune_with_autoscheduler/index
-   work_with_microtvm/index
-   extend_tvm/index
-   profile/index
-   ../errors
-   ../faq
+   tutorials/e2e_opt_model
+   tutorials/cross_compilation_and_rpc
+   dev/index
diff --git a/docs/how_to/legacy_index.rst b/docs/how_to/legacy_index.rst
@@ -0,0 +1,38 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+How To Guides
+=============
+
+These user-focused "how to" guides are designed to help you find answers to
+specific questions, like "How do I compile a model?" or "How to I optimize a
+schedule with tesor expressions?"
+
+.. toctree::
+   :maxdepth: 1
+
+   compile_models/index
+   deploy/index
+   work_with_relay/index
+   work_with_schedules/index
+   optimize_operators/index
+   tune_with_autotvm/index
+   tune_with_autoscheduler/index
+   work_with_microtvm/index
+   extend_tvm/index
+   profile/index
+   ../faq
diff --git a/docs/how_to/tutorials/README.txt b/docs/how_to/tutorials/README.txt
@@ -0,0 +1,2 @@
+HOW TO
+------
diff --git a/...ery/tutorial/cross_compilation_and_rpc.py → ...to/tutorials/cross_compilation_and_rpc.py b/...ery/tutorial/cross_compilation_and_rpc.py → ...to/tutorials/cross_compilation_and_rpc.py
diff --git a/docs/how_to/tutorials/e2e_opt_model.py b/docs/how_to/tutorials/e2e_opt_model.py
@@ -0,0 +1,139 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+.. _optimize_model:
+
+End-to-End Optimize Model
+=========================
+This tutorial demonstrates how to optimize a machine learning model using Apache TVM. We will
+use a pre-trained ResNet-18 model from PyTorch and end-to-end optimize it using TVM's Relax API.
+Please note that default end-to-end optimization may not suit complex models.
+"""
+
+######################################################################
+# Preparation
+# -----------
+# First, we prepare the model and input information. We use a pre-trained ResNet-18 model from
+# PyTorch.
+
+import os
+import sys
+import numpy as np
+import torch
+from torch import fx
+from torchvision.models.resnet import ResNet18_Weights, resnet18
+
+torch_model = resnet18(weights=ResNet18_Weights.DEFAULT)
+
+######################################################################
+# Review Overall Flow
+# -------------------
+# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg
+#    :align: center
+#    :width: 80%
+#
+# The overall flow consists of the following steps:
+#
+# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained
+#   model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains
+#   all the information needed for compilation, including high-level Relax functions for
+#   computational graph, and low-level TensorIR functions for tensor program.
+# - **Perform Composable Optimizations**: Perform a series of optimization transformations,
+#   such as graph optimizations, tensor program optimizations, and library dispatching.
+# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the
+#   universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators.
+#
+
+
+######################################################################
+# Convert the model to IRModule
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Next step, we convert the model to an IRModule using the Relax frontend for PyTorch for further
+# optimization. Besides the model, we also need to provide the input shape and data type.
+
+import tvm
+from tvm import relax
+from tvm.relax.frontend.torch import from_fx
+
+torch_model = resnet18(weights=ResNet18_Weights.DEFAULT)
+
+# Give the input shape and data type
+input_info = [((1, 3, 224, 224), "float32")]
+
+# Convert the model to IRModule
+with torch.no_grad():
+    torch_fx_model = fx.symbolic_trace(torch_model)
+    mod = from_fx(torch_fx_model, input_info, keep_params_as_input=True)
+
+mod, params = relax.frontend.detach_params(mod)
+mod.show()
+
+######################################################################
+# IRModule Optimization
+# ---------------------
+# Apache TVM Unity provides a flexible way to optimize the IRModule. Everything centered
+# around IRModule optimization can be composed with existing pipelines. Note that each
+# transformation can be combined as an optimization pipeline via ``tvm.ir.transform.Sequential``.
+#
+# In this tutorial, we focus on the end-to-end optimization of the model via auto-tuning. We
+# leverage MetaSchedule to tune the model and store the tuning logs to the database. We also
+# apply the database to the model to get the best performance.
+#
+
+TOTAL_TRIALS = 8000  # Change to 20000 for better performance if needed
+target = tvm.target.Target("nvidia/geforce-rtx-3090-ti")  # Change to your target device
+work_dir = "tuning_logs"
+
+# Skip running in CI environment
+IS_IN_CI = os.getenv("CI", "") == "true"
+if IS_IN_CI:
+    sys.exit(0)
+
+with target:
+    mod = tvm.ir.transform.Sequential(
+        [
+            # Convert BatchNorm into a sequence of simpler ops for fusion
+            relax.transform.DecomposeOpsForInference(),
+            # Canonicalize the bindings
+            relax.transform.CanonicalizeBindings(),
+            # Run default optimization pipeline
+            relax.get_pipeline("zero"),
+            # Tune the model and store the log to database
+            relax.transform.MetaScheduleTuneIRMod({}, work_dir, TOTAL_TRIALS),
+            # Apply the database
+            relax.transform.MetaScheduleApplyDatabase(work_dir),
+        ]
+    )(mod)
+
+# Only show the main function
+mod["main"].show()
+
+######################################################################
+# Build and Deploy
+# ----------------
+# Finally, we build the optimized model and deploy it to the target device.
+
+ex = relax.build(mod, target="cuda")
+dev = tvm.device("cuda", 0)
+vm = relax.VirtualMachine(ex, dev)
+# Need to allocate data and params on GPU device
+gpu_data = tvm.nd.array(np.random.rand(1, 3, 224, 224).astype("float32"), dev)
+gpu_params = [tvm.nd.array(p, dev) for p in params["main"]]
+gpu_out = vm["main"](gpu_data, *gpu_params).numpy()
+
+print(gpu_out.shape)
diff --git a/docs/index.rst b/docs/index.rst
@@ -36,22 +36,13 @@ driving its costs down.
    install/index
    get_started/tutorials/quick_start
    get_started/tutorials/ir_module
-   contribute/index
 
 .. toctree::
    :maxdepth: 1
-   :caption: User Guide
+   :caption: How To
 
-   tutorial/index
    how_to/index
 
-.. toctree::
-   :maxdepth: 1
-   :caption: Developer Guide
-
-   dev/tutorial/index
-   dev/how_to/how_to.rst
-
 .. toctree::
    :maxdepth: 1
    :caption: API Reference
@@ -63,6 +54,10 @@ driving its costs down.
    :maxdepth: 1
    :caption: Legacy
 
+   tutorial/index
+   how_to/legacy_index
+   dev/tutorial/index
+   dev/how_to/how_to.rst
    reference/langref/index
    arch/index
    topic/microtvm/index
@@ -72,6 +67,7 @@ driving its costs down.
    :maxdepth: 1
    :caption: About
 
+   contribute/index
    reference/publications
    reference/security
 

diff --git a/gallery/tutorial/install.py b/gallery/tutorial/install.py
diff --git a/gallery/tutorial/introduction.py b/gallery/tutorial/introduction.py
@@ -35,13 +35,11 @@
 --------
 
 #. :doc:`Introduction <introduction>`
-#. :doc:`Installing TVM <install>`
 #. :doc:`Compiling and Optimizing a Model with the Command Line Interface <tvmc_command_line_driver>`
 #. :doc:`Compiling and Optimizing a Model with the Python Interface <autotvm_relay_x86>`
 #. :doc:`Working with Operators Using Tensor Expression <tensor_expr_get_started>`
 #. :doc:`Optimizing Operators with Templates and AutoTVM <autotvm_matmul_x86>`
 #. :doc:`Optimizing Operators with Template-free AutoScheduler <auto_scheduler_matmul_x86>`
-#. :doc:`Cross Compilation and Remote Procedure Calls (RPC) <cross_compilation_and_rpc>`
 #. :doc:`Compiling Deep Learning Models for GPUs <relay_quick_start>`
 """