apache
diff --git a/‎docs/conf.py‎
Lines changed: 2 additions & 0 deletions b/‎docs/conf.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/dev/how_to/how_to.rst‎
Lines changed: 0 additions & 2 deletions b/‎docs/dev/how_to/how_to.rst‎
Lines changed: 0 additions & 2 deletions
diff --git a/‎docs/how_to/dev/index.rst‎
Lines changed: 28 additions & 0 deletions b/‎docs/how_to/dev/index.rst‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎docs/dev/how_to/pytest_target_parametrization.rst‎ renamed to ‎docs/how_to/dev/pytest_target_parametrization.rst‎ b/‎docs/dev/how_to/pytest_target_parametrization.rst‎ renamed to ‎docs/how_to/dev/pytest_target_parametrization.rst‎
diff --git a/‎docs/dev/how_to/setup_rpc_system.rst‎ renamed to ‎docs/how_to/dev/setup_rpc_system.rst‎
Lines changed: 3 additions & 3 deletions b/‎docs/dev/how_to/setup_rpc_system.rst‎ renamed to ‎docs/how_to/dev/setup_rpc_system.rst‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/how_to/index.rst‎
Lines changed: 3 additions & 19 deletions b/‎docs/how_to/index.rst‎
Lines changed: 3 additions & 19 deletions
diff --git a/‎docs/how_to/legacy_index.rst‎
Lines changed: 38 additions & 0 deletions b/‎docs/how_to/legacy_index.rst‎
Lines changed: 38 additions & 0 deletions
diff --git a/‎docs/how_to/tutorials/README.txt‎
Lines changed: 2 additions & 0 deletions b/‎docs/how_to/tutorials/README.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎gallery/tutorial/cross_compilation_and_rpc.py‎ renamed to ‎docs/how_to/tutorials/cross_compilation_and_rpc.py‎ b/‎gallery/tutorial/cross_compilation_and_rpc.py‎ renamed to ‎docs/how_to/tutorials/cross_compilation_and_rpc.py‎
diff --git a/‎docs/how_to/tutorials/e2e_opt_model.py‎
Lines changed: 140 additions & 0 deletions b/‎docs/how_to/tutorials/e2e_opt_model.py‎
Lines changed: 140 additions & 0 deletions
@@ -423,6 +423,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     tvm_path.joinpath("vta", "tutorials"),
     # New tutorial structure under docs folder
     tvm_path.joinpath("docs", "get_started", "tutorials"),
+    tvm_path.joinpath("docs", "how_to", "tutorials"),
 ]
 
 gallery_dirs = [
@@ -440,6 +441,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):
     "topic/vta/tutorials",
     # New tutorial structure under docs folder
     "get_started/tutorials/",
+    "how_to/tutorials/",
 ]
 
 
 
@@ -29,5 +29,3 @@ various areas of the TVM stack.
    relay_add_op
    relay_add_pass
    relay_bring_your_own_codegen
-   pytest_target_parametrization
-   setup_rpc_system
@@ -0,0 +1,28 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+Develope Apache TVM
+===================
+This section contains a collection of tips about how to work on
+various areas of the TVM stack.
+
+.. toctree::
+   :maxdepth: 1
+
+   pytest_target_parametrization
+   setup_rpc_system
+   ../../errors
@@ -76,7 +76,7 @@ In our community, there is multiple RPC server implementations, e.g., ``apps/and
 
 RPC server need to be run on device machine, and it usually will depend on xPU driver, the enhanced TVM runtime with xPU support, and other libraries, so please setup the dependent components first, e.g., install the KMD driver, ensure the required dynamic libraries can be found from environment variable ``LD_LIBRARY_PATH``.
 
-If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`luanch-rpc-server`.
+If the required compilation environment can be setup on your device machine, i.e., you needn't to do the cross compilation, then just follow the instruction of `<https://tvm.apache.org/docs/install/from_source.html>`_ to compile the TVM runtime and directly jump to the step :ref:`launch-rpc-server`.
 
 1. Cross Compile TVM Runtime
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -134,9 +134,9 @@ Then copy the compress package ``tvm_runtime.tar.gz`` to your concrete device ma
   $ export PYTHONPATH=`pwd`/python:${PYTHONPATH}
 
 
-.. _luanch-rpc-server:
+.. _launch-rpc-server:
 
-3. Luanch RPC Server
+3. Launch RPC Server
 ^^^^^^^^^^^^^^^^^^^^
 
 The RPC server can be launched on your device machine through the commands like something below, please modify the *RPC_TRACKER_IP*, *RPC_TRACKER_PORT*, *RPC_PROXY_IP*, *RPC_PROXY_PORT*, and *RPC_KEY* according to your concrete environment.
 
@@ -15,25 +15,9 @@
     specific language governing permissions and limitations
     under the License.
 
-How To Guides
-=============
-
-These user-focused "how to" guides are designed to help you find answers to
-specific questions, like "How do I compile a model?" or "How to I optimize a
-schedule with tesor expressions?"
-
 .. toctree::
    :maxdepth: 1
 
-   compile_models/index
-   deploy/index
-   work_with_relay/index
-   work_with_schedules/index
-   optimize_operators/index
-   tune_with_autotvm/index
-   tune_with_autoscheduler/index
-   work_with_microtvm/index
-   extend_tvm/index
-   profile/index
-   ../errors
-   ../faq
+   tutorials/e2e_opt_model
+   tutorials/cross_compilation_and_rpc
+   dev/index
@@ -0,0 +1,38 @@
+..  Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+
+..    http://www.apache.org/licenses/LICENSE-2.0
+
+..  Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+
+How To Guides
+=============
+
+These user-focused "how to" guides are designed to help you find answers to
+specific questions, like "How do I compile a model?" or "How to I optimize a
+schedule with tesor expressions?"
+
+.. toctree::
+   :maxdepth: 1
+
+   compile_models/index
+   deploy/index
+   work_with_relay/index
+   work_with_schedules/index
+   optimize_operators/index
+   tune_with_autotvm/index
+   tune_with_autoscheduler/index
+   work_with_microtvm/index
+   extend_tvm/index
+   profile/index
+   ../faq
@@ -0,0 +1,2 @@
+HOW TO
+------
@@ -0,0 +1,140 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+.. _optimize_model:
+
+End-to-End Optimize Model
+=========================
+This tutorial demonstrates how to optimize a machine learning model using Apache TVM. We will
+use a pre-trained ResNet-18 model from PyTorch and end-to-end optimize it using TVM's Relax API.
+Please note that default end-to-end optimization may not suit complex models.
+"""
+
+######################################################################
+# Preparation
+# -----------
+# First, we prepare the model and input information. We use a pre-trained ResNet-18 model from
+# PyTorch.
+
+import numpy as np
+import torch
+from torch import fx
+from torchvision.models.resnet import ResNet18_Weights, resnet18
+
+torch_model = resnet18(weights=ResNet18_Weights.DEFAULT)
+
+######################################################################
+# Review Overall Flow
+# -------------------
+# .. figure:: https://raw.githubusercontent.com/tlc-pack/web-data/main/images/design/tvm_overall_flow.svg
+#    :align: center
+#    :width: 80%
+#
+# The overall flow consists of the following steps:
+#
+# - **Construct or Import a Model**: Construct a neural network model or import a pre-trained
+#   model from other frameworks (e.g. PyTorch, ONNX), and create the TVM IRModule, which contains
+#   all the information needed for compilation, including high-level Relax functions for
+#   computational graph, and low-level TensorIR functions for tensor program.
+# - **Perform Composable Optimizations**: Perform a series of optimization transformations,
+#   such as graph optimizations, tensor program optimizations, and library dispatching.
+# - **Build and Universal Deployment**: Build the optimized model to a deployable module to the
+#   universal runtime, and execute it on different devices, such as CPU, GPU, or other accelerators.
+#
+
+
+######################################################################
+# Convert the model to IRModule
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# Next step, we convert the model to an IRModule using the Relax frontend for PyTorch for further
+# optimization. Besides the model, we also need to provide the input shape and data type.
+
+import tvm
+from tvm import relax
+from tvm.relax.frontend.torch import from_fx
+
+torch_model = resnet18(weights=ResNet18_Weights.DEFAULT)
+
+# Give the input shape and data type
+input_info = [((1, 3, 224, 224), "float32")]
+
+# Convert the model to IRModule
+with torch.no_grad():
+    torch_fx_model = fx.symbolic_trace(torch_model)
+    mod = from_fx(torch_fx_model, input_info, keep_params_as_input=True)
+
+mod, params = relax.frontend.detach_params(mod)
+mod.show()
+
+######################################################################
+# IRModule Optimization
+# ---------------------
+# Apache TVM Unity provides a flexible way to optimize the IRModule. Everything centered
+# around IRModule optimization can be composed with existing pipelines. Note that each
+# transformation can be combined as an optimization pipeline via ``tvm.ir.transform.Sequential``.
+#
+# In this tutorial, we focus on the end-to-end optimization of the model via auto-tuning. We
+# leverage MetaSchedule to tune the model and store the tuning logs to the database. We also
+# apply the database to the model to get the best performance.
+#
+# .. note::
+#
+#     To save CI time, we disable the tuning by default and only load the pre-tuned log.
+#     You can enable it by turning on the ``enable_tune`` flag.
+#
+
+TOTAL_TRIALS = 8000  # Change to 20000 for better performance if needed
+enable_tune = False  # Enable this flag to tune the model
+target = tvm.target.Target("nvidia/geforce-rtx-3090-ti")  # Change to your target device
+work_dir = "tuning_logs"
+
+with target:
+    mod = tvm.ir.transform.Sequential(
+        [
+            # Convert BatchNorm into a sequence of simpler ops for fusion
+            relax.transform.DecomposeOpsForInference(),
+            # Canonicalize the bindings
+            relax.transform.CanonicalizeBindings(),
+            # Run default optimization pipeline
+            relax.get_pipeline("zero"),
+            # Tune the model and store the log to database
+            relax.transform.MetaScheduleTuneIRMod({}, work_dir, TOTAL_TRIALS)
+            if enable_tune
+            else tvm.transform.Sequential([]),
+            # Apply the database
+            relax.transform.MetaScheduleApplyDatabase(work_dir),
+        ]
+    )(mod)
+
+# Only show the main function
+mod["main"].show()
+
+######################################################################
+# Build and Deploy
+# ----------------
+# Finally, we build the optimized model and deploy it to the target device.
+
+ex = relax.build(mod, target="cuda")
+dev = tvm.device("cuda", 0)
+vm = relax.VirtualMachine(ex, dev)
+# Need to allocate data and params on GPU device
+gpu_data = tvm.nd.array(np.random.rand(1, 3, 224, 224).astype("float32"), dev)
+gpu_params = [tvm.nd.array(p, dev) for p in params["main"]]
+gpu_out = vm["main"](gpu_data, *gpu_params).numpy()
+
+print(gpu_out.shape)
Original file line number	Diff line number	Diff line change
`@@ -423,6 +423,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):`
`423`	`423`	`tvm_path.joinpath("vta", "tutorials"),`
`424`	`424`	`# New tutorial structure under docs folder`
`425`	`425`	`tvm_path.joinpath("docs", "get_started", "tutorials"),`
	`426`	`+ tvm_path.joinpath("docs", "how_to", "tutorials"),`
`426`	`427`	`]`
`427`	`428`
`428`	`429`	`gallery_dirs = [`
`@@ -440,6 +441,7 @@ def jupyter_notebook(script_blocks, gallery_conf, target_dir, real_func):`
`440`	`441`	`"topic/vta/tutorials",`
`441`	`442`	`# New tutorial structure under docs folder`
`442`	`443`	`"get_started/tutorials/",`
	`444`	`+ "how_to/tutorials/",`
`443`	`445`	`]`
`444`	`446`
`445`	`447`