diff --git a/about.html b/about.html
index 6a3ce251d38f8..9c3734fa2394b 100644
--- a/about.html
+++ b/about.html
@@ -88,7 +88,7 @@ <h1 class="mb-3 blue-text">About</h1>
                                 <div class="col-12 col-md-6 pr-10">
                                     <h2>Optimization and acceleration</h2>
                                     <p>
-                                        Run any ONNX model using a single set of inference <a href="https://www.onnxruntime.ai/docs/api/" target="_blank" class="link"><abbr title="Application Program Interface">API</abbr>s</a> that provide access to the best hardware acceleration available. Built-in optimization features trim and consolidate nodes without impacting model accuracy. Additionally, full backwards <a href="https://www.onnxruntime.ai/docs/resources/compatibility.html" target="_blank" class="link">compatibility</a> for ONNX and ONNX-<abbr>ML</abbr> ensures all ONNX models can be inferenced.
+                                        Run any ONNX model using a single set of inference <a href="https://www.onnxruntime.ai/docs/api/" target="_blank" class="link"><abbr title="Application Program Interface">API</abbr>s</a> that provide access to the best hardware acceleration available. Built-in optimization features trim and consolidate nodes without impacting model accuracy. Additionally, full backwards <a href="https://www.onnxruntime.ai/docs/reference/compatibility.html" target="_blank" class="link">compatibility</a> for ONNX and ONNX-<abbr>ML</abbr> ensures all ONNX models can be inferenced.
                                     </p>
                                 </div>
                             </div>
diff --git a/docs/api/csharp-api.md b/docs/api/csharp-api.md
index 17c14ae3da0ae..29fd50750398a 100644
--- a/docs/api/csharp-api.md
+++ b/docs/api/csharp-api.md
@@ -1,7 +1,6 @@
 ---
 title: C# API
-parent: API Docs
-nav_order: 2
+nav_exclude: true
 ---
 
 # C# API Reference
diff --git a/docs/api/index.md b/docs/api/index.md
index fa430d756df7f..dd13b4f437562 100644
--- a/docs/api/index.md
+++ b/docs/api/index.md
@@ -1,16 +1,16 @@
 ---
 title: API Docs
-has_children: true
 nav_order: 5
 ---
 # ORT API docs
 {: .no_toc }
 
 |:----------------------------------------------------------------------------------|
-| <span class="fs-5"> [Python API Docs](./python/api_summary.html){: .btn } </span>  | 
-| <span class="fs-5"> [Java API Docs](./java/index.html){: .btn} </span>   | 
-| <span class="fs-5"> [Objective-C Docs](./objectivec/index.html){: .btn} </span> |
-| <span class="fs-5"> [WinRT API Docs](https://docs.microsoft.com/windows/ai/windows-ml/api-reference){: .btn} </span>|
-| <span class="fs-5"> [C# API Docs](./csharp-api){: .btn} </span>|
-| <span class="fs-5"> [JavaScript API Docs](./js/index.html){: .btn} </span>|
-| <span class="fs-5"> [Other API Docs](./other-apis){: .btn} </span>|
\ No newline at end of file
+| <span class="fs-5"> [Python API Docs](https://onnxruntime.ai/docs/api/python/api_summary.html){: .btn target="_blank"} </span>  | 
+| <span class="fs-5"> [Java API Docs](https://onnxruntime.ai/docs/api/java/index.html){: .btn target="_blank"} </span>   | 
+| <span class="fs-5"> [C# API Docs](./csharp-api){: .btn target="_blank"} </span>|
+| <span class="fs-5"> [C/C++ API Docs](https://onnxruntime.ai/docs/api/c/){: .btn target="_blank"} </span>|
+| <span class="fs-5"> [WinRT API Docs](https://docs.microsoft.com/en-us/windows/ai/windows-ml/api-reference){: .btn target="_blank"} </span>|
+| <span class="fs-5"> [Objective-C Docs](https://onnxruntime.ai/docs/api/objectivec/index.html){: .btn target="_blank"} </span> |
+| <span class="fs-5"> [JavaScript API Docs](https://onnxruntime.ai/docs/api/js/index.html){: .btn target="_blank"} </span>|
+| <span class="fs-5"> [Other API Docs](./other-apis){: .btn target="_blank"} </span>|
\ No newline at end of file
diff --git a/docs/api/java-api.md b/docs/api/java-api.md
deleted file mode 100644
index ddbc83a4a03fd..0000000000000
--- a/docs/api/java-api.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-title: Java API
-parent: API Docs
-nav_order: 3
----
-
-# ORT Java Docs
-{: .no_toc }
-
-<span class="fs-5"> [Go to the Java Docs](./java/index.html){: .btn  .mr-4 target="_blank"} </span> 
diff --git a/docs/api/js-api.md b/docs/api/js-api.md
deleted file mode 100644
index 438de4ebdbb0d..0000000000000
--- a/docs/api/js-api.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-title: JavaScript API
-parent: API Docs
-nav_order: 6
----
-
-# ONNX Runtime JavaScript API
-{: .no_toc }
-
-<span class="fs-5"> [Go to the JavaScript API Docs](./js/index.html){: .btn  .mr-4 target="_blank"} </span> 
diff --git a/docs/api/objectivec-api.md b/docs/api/objectivec-api.md
deleted file mode 100644
index a8322f6be3492..0000000000000
--- a/docs/api/objectivec-api.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-title: Objective-C API
-parent: API Docs
-nav_order: 4
----
-
-# ORT Objective-C Docs
-{: .no_toc }
-
-<span class="fs-5"> [Go to the Objective-C Docs](./objectivec/index.html){: .btn  .mr-4 target="_blank"} </span> 
diff --git a/docs/api/other-apis.md b/docs/api/other-apis.md
index bc0ec994c464f..aa3443f1fc668 100644
--- a/docs/api/other-apis.md
+++ b/docs/api/other-apis.md
@@ -1,7 +1,6 @@
 ---
 title: Other Inference APIs
-parent: API Docs
-nav_order: 7
+nav_exclude: true
 ---
 
 # Other APIs
diff --git a/docs/api/python-api.md b/docs/api/python-api.md
deleted file mode 100644
index 9b822ea424a29..0000000000000
--- a/docs/api/python-api.md
+++ /dev/null
@@ -1,11 +0,0 @@
----
-title: Python API
-parent: API Docs
-nav_order: 1
----
-
-# ORT Python API
-{: .no_toc }
-
-
- <span class="fs-5"> [Go to the ORT Python API Docs](./python/api_summary.html){: .btn  .mr-4 target="_blank"} </span> 
\ No newline at end of file
diff --git a/docs/api/winrt-api.md b/docs/api/winrt-api.md
deleted file mode 100644
index 4cb951becec41..0000000000000
--- a/docs/api/winrt-api.md
+++ /dev/null
@@ -1,10 +0,0 @@
----
-title: WinRT API
-parent: API Docs
-nav_order: 5
----
-# ORT WinRT API on Microsoft Docs
-{: .no_toc }
-
-
- <span class="fs-5"> [Go to the WinRT API on Microsoft Docs](https://docs.microsoft.com/windows/ai/windows-ml/api-reference){: .btn  .mr-4 target="_blank"} </span> 
diff --git a/docs/build/eps.md b/docs/build/eps.md
index eaeabd22eea66..274eb018428a7 100644
--- a/docs/build/eps.md
+++ b/docs/build/eps.md
@@ -188,7 +188,7 @@ These instructions are for JetPack SDK 4.6.
 
 ## oneDNN
 
-See more information on ondDNN (formerly DNNL) [here](../execution-providers/DNNL-ExecutionProvider.md).
+See more information on ondDNN (formerly DNNL) [here](../execution-providers/oneDNN-ExecutionProvider.md).
 
 ### Build Instructions
 {: .no_toc }
diff --git a/docs/build/inferencing.md b/docs/build/inferencing.md
index f3d955d099e97..8d57f1d118775 100644
--- a/docs/build/inferencing.md
+++ b/docs/build/inferencing.md
@@ -58,7 +58,7 @@ Also, if you want to cross-compile for Apple Silicon in an Intel-based MacOS mac
 #### Notes
 
 * Please note that these instructions build the debug build, which may have performance tradeoffs
-* To build the version from each release (which include Windows, Linux, and Mac variants), see these .yml files for reference: [CPU](https://github.com/microsoft/onnxruntime/blob/master/tools/ci_build/github/azure-pipelines/nuget/cpu-esrp-pipeline.yml), [GPU](https://github.com/microsoft/onnxruntime/blob/master/tools/ci_build/github/azure-pipelines/nuget/gpu-esrp-pipeline.yml)
+* To build the version from each release (which include Windows, Linux, and Mac variants), see these [.yml files](https://github.com/microsoft/onnxruntime/tree/master/tools/ci_build/github/azure-pipelines/nuget) for reference
 * The build script runs all unit tests by default for native builds and skips tests by default for cross-compiled builds.
   To skip the tests, run with `--build` or `--update --build`.
 * If you need to install protobuf 3.6.1 from source code (cmake/external/protobuf), please note:
diff --git a/docs/build/reduced.md b/docs/build/reduced.md
index 8956b9a659cf8..0c7ad9c5c6a8e 100644
--- a/docs/build/reduced.md
+++ b/docs/build/reduced.md
@@ -11,7 +11,7 @@ For applications where package binary size is important, ONNX Runtime provides o
 
 To reduce the compiled binary size of ONNX Runtime, the operator kernels included in the build can be reduced to just the kernels required by your model/s.
 
-For deployment on mobile devices specifically, please read more detailed guidance on [How to: Build for mobile](./mobile.md).
+For deployment on mobile devices specifically, please read more detailed guidance on [Deploy ONNX Runtime Mobile](../tutorials/mobile/).
 
 ## Contents
 {: .no_toc }
diff --git a/docs/ecosystem/index.md b/docs/ecosystem/index.md
index 7d309a2f6b547..9d3da18692358 100644
--- a/docs/ecosystem/index.md
+++ b/docs/ecosystem/index.md
@@ -19,7 +19,7 @@ ONNX Runtime functions as part of an ecosystem of tools and platforms to deliver
 * [Azure Container Instance: Facial Expression Recognition](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-facial-expression-recognition-deploy.ipynb){:target="_blank"}
 * [Azure Container Instance: MNIST](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-inference-mnist-deploy.ipynb){:target="_blank"}
 * [Azure Container Instance: Image classification (Resnet)](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/deployment/onnx/onnx-modelzoo-aml-deploy-resnet50.ipynb){:target="_blank"}
-* [Azure Kubernetes Services: FER+](https://github.com/microsoft/onnxruntime/tree/master/docs/python/notebooks/onnx-inference-byoc-gpu-cpu-aks.ipynb){:target="_blank"}
+* [Azure Kubernetes Services: FER+](https://github.com/microsoft/onnxruntime/blob/master/docs/python/inference/notebooks/onnx-inference-byoc-gpu-cpu-aks.ipynb){:target="_blank"}
 * [Azure IoT Sedge (Intel UP2 device with OpenVINO)](https://github.com/Azure-Samples/onnxruntime-iot-edge/blob/master/AzureML-OpenVINO/README.md){:target="_blank"}
 * [Automated Machine Learning](https://github.com/Azure/MachineLearningNotebooks/blob/master/how-to-use-azureml/automated-machine-learning/classification-bank-marketing-all-features/auto-ml-classification-bank-marketing-all-features.ipynb){:target="_blank"}
 
@@ -31,7 +31,7 @@ ONNX Runtime functions as part of an ecosystem of tools and platforms to deliver
 * [Azure Video Analytics: YOLOv3 and TinyYOLOv3](https://github.com/Azure/live-video-analytics/tree/master/utilities/video-analysis/yolov3-onnx){:target="_blank"}
 
 ## Azure SQL Edge
-* [ML predictions in Azure SQL Edge and Azure SQL Managed Instance](https://docs.microsoft.com/en-us/azure/azure-sql-edge/deploy-onnxJ){:target="_blank"}
+* [ML predictions in Azure SQL Edge and Azure SQL Managed Instance](https://docs.microsoft.com/en-us/azure/azure-sql-edge/deploy-onnx){:target="_blank"}
 
 ## Azure Synapse Analytics
 * [ML predictions in Synapse SQL](https://docs.microsoft.com/en-us/azure/synapse-analytics/sql-data-warehouse/sql-data-warehouse-predict){:target="_blank"}
diff --git a/docs/execution-providers/ArmNN-ExecutionProvider.md b/docs/execution-providers/ArmNN-ExecutionProvider.md
index 71bdd192c67c2..9c234215a749c 100644
--- a/docs/execution-providers/ArmNN-ExecutionProvider.md
+++ b/docs/execution-providers/ArmNN-ExecutionProvider.md
@@ -16,7 +16,7 @@ nav_order: 3
 [ArmNN](https://github.com/ARM-software/armnn) is an open source inference engine maintained by Arm and Linaro companies. The integration of ArmNN as an execution provider (EP) into ONNX Runtime accelerates performance of ONNX model workloads across Armv8 cores.
 
 ## Build
-For build instructions, please see the [BUILD page](./build/eps.md#armnn).
+For build instructions, please see the [BUILD page](../build/eps.md#armnn).
 
 ## Usage
 ### C/C++
@@ -27,9 +27,9 @@ Ort::SessionOptions so;
 bool enable_cpu_mem_arena = true;
 Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ArmNN(so, enable_cpu_mem_arena));
 ```
-The C API details are [here](./get-started/with-c.html.md).
+The C API details are [here](../get-started/with-c.md).
 
 ## Performance Tuning
-For performance tuning, please see guidance on this page: [ONNX Runtime Perf Tuning](./performance/tune-performance.md)
+For performance tuning, please see guidance on this page: [ONNX Runtime Perf Tuning](../performance/tune-performance.md)
 
 When/if using [onnxruntime_perf_test](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/test/perftest), use the flag -e armnn
diff --git a/docs/execution-providers/CUDA-ExecutionProvider.md b/docs/execution-providers/CUDA-ExecutionProvider.md
index de142ca917efd..8658dce1c13be 100644
--- a/docs/execution-providers/CUDA-ExecutionProvider.md
+++ b/docs/execution-providers/CUDA-ExecutionProvider.md
@@ -17,7 +17,7 @@ The CUDA Execution Provider enables hardware accelerated computation on Nvidia C
 {:toc}
 
 ## Install
-Pre-built binaries of ONNX Runtime with CUDA EP are published for most language bindings. Please reference [How to - Install ORT](../install.html#inference).
+Pre-built binaries of ONNX Runtime with CUDA EP are published for most language bindings. Please reference [Install ORT](../install).
 
 
 ## Requirements
@@ -25,6 +25,7 @@ Please reference table below for official GPU packages dependencies for the ONNX
 
 |ONNX Runtime|CUDA|cuDNN|Notes|
 |---|---|---|---|
+|1.9|11.4|8.2.4 (Linux)<br/>8.2.2.26 (Windows)|libcudart 11.4.43<br/>libcufft 10.5.2.100<br/>libcurand 10.2.5.120<br/>libcublasLt 11.6.1.51<br/>libcublas 11.6.1.51<br/>libcudnn 8.2.4<br/>libcupti.so 2021.2.2|
 |1.8|11.0.3|8.0.4 (Linux)<br/>8.0.2.39 (Windows)|libcudart 11.0.221<br/>libcufft 10.2.1.245<br/>libcurand 10.2.1.245<br/>libcublasLt 11.2.0.252<br/>libcublas 11.2.0.252<br/>libcudnn 8.0.4<br/>libcupti.so 2020.1.1|
 |1.7|11.0.3|8.0.4 (Linux)<br/>8.0.2.39 (Windows)|libcudart 11.0.221<br/>libcufft 10.2.1.245<br/>libcurand 10.2.1.245<br/>libcublasLt 11.2.0.252<br/>libcublas 11.2.0.252<br/>libcudnn 8.0.4|
 |1.5-1.6|10.2|8.0.3|CUDA 11 can be built from source|
diff --git a/docs/execution-providers/DirectML-ExecutionProvider.md b/docs/execution-providers/DirectML-ExecutionProvider.md
index ffdec5a124eaf..2253816b6c14c 100644
--- a/docs/execution-providers/DirectML-ExecutionProvider.md
+++ b/docs/execution-providers/DirectML-ExecutionProvider.md
@@ -22,7 +22,7 @@ The DirectML Execution Provider currently uses DirectML version 1.4.2.
 {:toc}
 
 ## Install
-Pre-built packages of ORT with the DirectML EP is published on Nuget.org. See [How to: Install ORT](./install).
+Pre-built packages of ORT with the DirectML EP is published on Nuget.org. See: [Install ORT](../install).
 
 ## Requirements
 
diff --git a/docs/execution-providers/MIGraphX-ExecutionProvider.md b/docs/execution-providers/MIGraphX-ExecutionProvider.md
index 41618a165ccf5..311840bdd543e 100644
--- a/docs/execution-providers/MIGraphX-ExecutionProvider.md
+++ b/docs/execution-providers/MIGraphX-ExecutionProvider.md
@@ -31,12 +31,12 @@ Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MiGraphX(sf, device_i
 
 You can check [here](https://github.com/scxiao/ort_test/tree/master/char_rnn) for a specific c/c++ program.
 
-The C API details are [here](../get-started/with-c.html.md).
+The C API details are [here](../get-started/with-c.md).
 
 ### Python
 When using the Python wheel from the ONNX Runtime build with MIGraphX execution provider, it will be automatically
 prioritized over the default GPU or CPU execution providers. There is no need to separately register the execution
-provider. Python APIs details are [here](/python/api_summary).
+provider. Python APIs details are [here](https://onnxruntime.ai/docs/api/python/api_summary.html).
 *Note that the next release (ORT 1.10) will require explicitly setting the providers parameter if you want to use execution provider other than the default CPU provider when instantiating InferenceSession.*
 
 You can check [here](https://github.com/scxiao/ort_test/tree/master/python/run_onnx) for a python script to run an
diff --git a/docs/execution-providers/TensorRT-ExecutionProvider.md b/docs/execution-providers/TensorRT-ExecutionProvider.md
index 57c08d5faaeb2..66982d999ee99 100644
--- a/docs/execution-providers/TensorRT-ExecutionProvider.md
+++ b/docs/execution-providers/TensorRT-ExecutionProvider.md
@@ -20,6 +20,17 @@ With the TensorRT execution provider, the ONNX Runtime delivers better inferenci
 ## Install
 Pre-built packages and Docker images are available for Jetpack in the [Jetson Zoo](https://elinux.org/Jetson_Zoo#ONNX_Runtime).
 
+## Requirements
+
+|ONNX Runtime|TensorRT|CUDA|
+|---|---|---|
+|1.9|8.0|11.4|
+|1.7-1.8|7.2|11.0.3|
+|1.5-1.6|7.1|10.2|
+|1.2-1.4|7.0|10.1|
+|1.0-1.1|6.0|10.0|
+
+For more details on CUDA/cuDNN versions, please see [CUDA EP requirements](./CUDA-ExecutionProvider.md#requirements).
 
 ## Build
 
@@ -38,7 +49,7 @@ Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(sf, device_id));
 Ort::Session session(env, model_path, sf);
 ```
 
-The C API details are [here](../get-started/with-c.html.md).
+The C API details are [here](../get-started/with-c.md).
 
 #### Shape Inference for TensorRT Subgraphs
 If some operators in the model are not supported by TensorRT, ONNX Runtime will partition the graph and only send supported subgraphs to TensorRT execution provider. Because TensorRT requires that all inputs of the subgraphs have shape specified, ONNX Runtime will throw error if there is no input shape info. In this case please run shape inference for the entire model first by running script [here](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/symbolic_shape_infer.py).
@@ -175,7 +186,7 @@ sess.set_providers(["TensorrtExecutionProvider"],[{'device_id': '1', 'trt_max_wo
 ```
 
 ## Performance Tuning
-For performance tuning, please see guidance on this page: [ONNX Runtime Perf Tuning](../tutorials/mobile/tune-performance.md)
+For performance tuning, please see guidance on this page: [ONNX Runtime Perf Tuning](../performance/tune-performance.md)
 
 When/if using [onnxruntime_perf_test](https://github.com/microsoft/onnxruntime/tree/master/onnxruntime/test/perftest#onnxruntime-performance-test), use the flag `-e tensorrt` 
 
diff --git a/docs/execution-providers/index.md b/docs/execution-providers/index.md
index 07d6dae560faa..33debbf8d5222 100644
--- a/docs/execution-providers/index.md
+++ b/docs/execution-providers/index.md
@@ -35,7 +35,7 @@ ONNX Runtime supports many different execution providers today. Some of the EPs
 
 ### Add an Execution Provider
 
-Developers of specialized HW acceleration solutions can integrate with ONNX Runtime to execute ONNX models on their stack. To create an EP to interface with ONNX Runtime you must first identify a unique name for the EP. Follow the steps outlined [here](../execution-provider/add-execution-provider.md) to integrate your code in the repo.
+Developers of specialized HW acceleration solutions can integrate with ONNX Runtime to execute ONNX models on their stack. To create an EP to interface with ONNX Runtime you must first identify a unique name for the EP. See: [Add a new execution provider](add-execution-provider.md) for detailed instructions.
 
 ### Build ONNX Runtime package with EPs
 
diff --git a/docs/get-started/training-pytorch.md b/docs/get-started/training-pytorch.md
index 0aafeed4905cb..37f1acac9c2f2 100644
--- a/docs/get-started/training-pytorch.md
+++ b/docs/get-started/training-pytorch.md
@@ -1,7 +1,7 @@
 ---
 title: ORT Training with PyTorch
 parent: Get Started
-nav_order: 9
+nav_order: 11
 ---
 
 # Get started with ORT for Training API (PyTorch)
diff --git a/docs/get-started/with-c.md b/docs/get-started/with-c.md
index caf8687546c8d..11411a024352b 100644
--- a/docs/get-started/with-c.md
+++ b/docs/get-started/with-c.md
@@ -1,7 +1,7 @@
 ---
 title: C
 parent: Get Started
-nav_exclude: true
+nav_order: 4
 ---
 
 # Get started with ORT for C
@@ -47,7 +47,7 @@ Refer to [onnxruntime_c_api.h](https://github.com/microsoft/onnxruntime/blob/mas
 * Converting an in-memory ONNX Tensor encoded in protobuf format to a pointer that can be used as model input.
 * Setting the thread pool size for each session.
 * Setting graph optimization level for each session.
-* Dynamically loading custom ops. [Instructions](../tutorials/mobile/add-custom-op.md)
+* Dynamically loading custom ops. [Instructions](../reference/operators/add-custom-op.md)
 * Ability to load a model from a byte array. See ```OrtCreateSessionFromArray``` in [onnxruntime_c_api.h](https://github.com/microsoft/onnxruntime/blob/master/include/onnxruntime/core/session/onnxruntime_c_api.h).
 * **Global/shared threadpools:** By default each session creates its own set of threadpools. In situations where multiple
 sessions need to be created (to infer different models) in the same process, you end up with several threadpools created
diff --git a/docs/get-started/with-cpp.md b/docs/get-started/with-cpp.md
index 169c25769601a..b815fabedd10d 100644
--- a/docs/get-started/with-cpp.md
+++ b/docs/get-started/with-cpp.md
@@ -17,15 +17,15 @@ nav_order: 2
 
 | Artifact  | Description | Supported Platforms |
 |-----------|-------------|---------------------|
-| [Microsoft.ML.OnnxRuntime](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime) | CPU (Release) |Windows, Linux,  Mac, X64, X86 (Windows-only), ARM64 (Windows-only)...more details: [compatibility](../references/compatibility) |
-| [Microsoft.ML.OnnxRuntime.Gpu](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.gpu) | GPU - CUDA (Release) | Windows, Linux, Mac, X64...more details: [compatibility](../references/compatibility) |
+| [Microsoft.ML.OnnxRuntime](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime) | CPU (Release) |Windows, Linux,  Mac, X64, X86 (Windows-only), ARM64 (Windows-only)...more details: [compatibility](../reference/compatibility.md) |
+| [Microsoft.ML.OnnxRuntime.Gpu](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.gpu) | GPU - CUDA (Release) | Windows, Linux, Mac, X64...more details: [compatibility](../reference/compatibility.md) |
 | [Microsoft.ML.OnnxRuntime.DirectML](https://www.nuget.org/packages/Microsoft.ML.OnnxRuntime.directml) | GPU - DirectML (Release) | Windows 10 1709+ |
 | [ort-nightly](https://aiinfra.visualstudio.com/PublicPackages/_packaging?_a=feed&feed=ORT-Nightly) | CPU, GPU (Dev) | Same as Release versions |
 
 .zip and .tgz files are also included as assets in each [Github release](https://github.com/microsoft/onnxruntime/releases).
 
 ## API Reference
-The C++ API is a thin wrapper of the C API. Please refer to [C API](./with-c.html) for more details.
+The C++ API is a thin wrapper of the C API. Please refer to [C API](./with-c.md) for more details.
 
 ## Samples
 See [Tutorials: API Basics - C++](../tutorials/api-basics)
\ No newline at end of file
diff --git a/docs/get-started/with-iot.md b/docs/get-started/with-iot.md
index 530c112442265..28c15ac4ab537 100644
--- a/docs/get-started/with-iot.md
+++ b/docs/get-started/with-iot.md
@@ -2,7 +2,7 @@
 title: IoT and Edge
 parent: Get Started
 toc: true
-nav_order: 7
+nav_order: 10
 ---
 # Get Started with ORT for IoT
 {: .no_toc }
diff --git a/docs/get-started/with-javascript.md b/docs/get-started/with-javascript.md
index e1a87f689c00c..19558e33764a6 100644
--- a/docs/get-started/with-javascript.md
+++ b/docs/get-started/with-javascript.md
@@ -2,7 +2,7 @@
 title: JavaScript
 parent: Get Started
 toc: true
-nav_order: 4
+nav_order: 6
 ---
 
 # Get started with ORT for JavaScript
diff --git a/docs/get-started/with-mobile.md b/docs/get-started/with-mobile.md
index 8c8ef4e1e447c..432b4573a2196 100644
--- a/docs/get-started/with-mobile.md
+++ b/docs/get-started/with-mobile.md
@@ -2,7 +2,7 @@
 title: Mobile
 parent: Get Started
 toc: true
-nav_order: 6
+nav_order: 7
 ---
 # Get Started with ORT for Mobile
 {: .no_toc }
@@ -46,4 +46,4 @@ OrtSession session = env.createSession(<path to model>, opsession_optionstions);
 ```
 
 ## Learn More
-- [Mobile Tutorial](./Tutorials/Mobile/)
\ No newline at end of file
+- [Mobile Tutorial](../tutorials/mobile)
\ No newline at end of file
diff --git a/docs/get-started/with-obj-c.md b/docs/get-started/with-obj-c.md
index e991697fbadd7..03a4498d8266d 100644
--- a/docs/get-started/with-obj-c.md
+++ b/docs/get-started/with-obj-c.md
@@ -1,7 +1,7 @@
 ---
 title: Objective-C
 parent: Get Started
-nav_exclude: true
+nav_order: 8
 ---
 # Get started with ORT for Objective-C
 {: .no_toc }
diff --git a/docs/get-started/with-python.md b/docs/get-started/with-python.md
index a9d201ba3fc51..81ee912478d4e 100644
--- a/docs/get-started/with-python.md
+++ b/docs/get-started/with-python.md
@@ -238,7 +238,7 @@ If using pip, run pip install `--upgrade pip` prior to downloading.
 |[onnxruntime](https://pypi.org/project/onnxruntime)|CPU (Release)| Windows (x64), Linux (x64, ARM64), Mac (X64),  |
 |[ort-nightly](https://test.pypi.org/project/ort-nightly)|CPU (Dev)    | Same as above |
 |[onnxruntime-gpu](https://pypi.org/project/onnxruntime-gpu)|GPU (Release)| Windows (x64), Linux (x64, ARM64) |
-|[ort-gpu-nightly](https://test.pypi.org/project/ort-gpu-nightly)|GPU (Dev) | Same as above |
+|[ort-nightly-gpu](https://test.pypi.org/project/ort-nightly-gpu)|GPU (Dev) | Same as above |
 
 
 For Python compiler version notes, see [this page](https://github.com/microsoft/onnxruntime/tree/master/docs/Python_Dev_Notes.md)
@@ -251,5 +251,5 @@ Python 3.6 - 3.9
 - [Python Tutorials](../tutorials/api-basics)
 * [TensorFlow with ONNX Runtime](../tutorials/tf-get-started.md)
 * [PyTorch with ONNX Runtime](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html)
-* [scikit-learn with ONNX Runtime](https://www.onnxruntime.ai/python/tutorial.html)
+* [scikit-learn with ONNX Runtime](http://onnx.ai/sklearn-onnx/index_tutorial.html)
  
\ No newline at end of file
diff --git a/docs/get-started/with-winrt.md b/docs/get-started/with-winrt.md
index b2d92789e28e9..06ba9b631dcee 100644
--- a/docs/get-started/with-winrt.md
+++ b/docs/get-started/with-winrt.md
@@ -2,7 +2,7 @@
 title: WinRT
 parent: Get Started
 toc: true
-nav_order: 8
+nav_order: 9
 ---
 
 
diff --git a/docs/performance/tune-performance.md b/docs/performance/tune-performance.md
index 25589d34863fd..6030a6e038f6b 100644
--- a/docs/performance/tune-performance.md
+++ b/docs/performance/tune-performance.md
@@ -170,7 +170,7 @@ sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL
   * When `sess_options.execution_mode = rt.ExecutionMode.ORT_PARALLEL`, you can set `sess_options.inter_op_num_threads` to control the
 number of threads used to parallelize the execution of the graph (across nodes).
 
-* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is already ORT_ENABLE_ALL(99). Please see [onnxruntime_c_api.h](https://github.com/microsoft/onnxruntime/tree/master/include/onnxruntime/core/session/onnxruntime_c_api.h#L241)  (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](../resources/graph-optimizations.md).
+* sess_options.graph_optimization_level = rt.GraphOptimizationLevel.ORT_ENABLE_ALL. Default is already ORT_ENABLE_ALL(99). Please see [onnxruntime_c_api.h](https://github.com/microsoft/onnxruntime/tree/master/include/onnxruntime/core/session/onnxruntime_c_api.h#L241)  (enum GraphOptimizationLevel) for the full list of all optimization levels. For details regarding available optimizations and usage please refer to the [Graph Optimizations Doc](graph-optimizations.md).
 
 ### MKL_DNN/nGraph Execution Provider
 
diff --git a/docs/reference/compatibility.md b/docs/reference/compatibility.md
index 324003ab78003..899bafcac4fd4 100644
--- a/docs/reference/compatibility.md
+++ b/docs/reference/compatibility.md
@@ -27,7 +27,7 @@ ONNX Runtime is not explicitly tested with every variation/combination of enviro
 
   * Tested with Windows 10 and Windows Server 2019
   * May be compatible with Windows 7+
-  * Windows Machine Learning ([WinRT](https://www.onnxruntime.ai/docs/reference/api/winrt-api.html))
+  * Windows Machine Learning ([WinRT](../get-started/with-winrt.md))
     * CPU: Windows 8.1+
     * GPU: Windows 10 1709+
 
@@ -53,7 +53,7 @@ ONNX Runtime is not explicitly tested with every variation/combination of enviro
 
 ### Dependent Libraries
 * [Submodules](https://github.com/microsoft/onnxruntime/tree/master/cgmanifests)
-* See the [Execution Provider page](https://www.onnxruntime.ai/docs/reference/execution-providers/) for details on specific hardware libary version requirements
+* See the [Execution Provider page](../execution-providers) for details on specific hardware libary version requirements
 
 
 ## ONNX opset support
@@ -64,7 +64,7 @@ ONNX Runtime supports all opsets from the latest released version of the [ONNX](
 
 * [Supported Operator Data Types](https://github.com/microsoft/onnxruntime/blob/master/docs/OperatorKernels.md)
   * *Operators not supported in the current ONNX spec may be available as a [Contrib Operator](https://github.com/microsoft/onnxruntime/blob/master/docs/ContribOperators.md)*
-  * [How to add a custom operator/kernel](tutorials/mobile/add-custom-op.md)
+  * [How to add a custom operator/kernel](operators/add-custom-op.md)
 
 | ONNX Runtime version | [ONNX version](https://github.com/onnx/onnx/blob/master/docs/Versioning.md) | ONNX opset version | ONNX ML opset version | ONNX IR version | [Windows ML Availability](https://docs.microsoft.com/en-us/windows/ai/windows-ml/release-notes/)|
 |------------------------------|--------------------|--------------------|----------------------|------------------|------------------|
diff --git a/docs/reference/high-level-design.md b/docs/reference/high-level-design.md
index 413d8095be57d..ddf717b0a4b62 100644
--- a/docs/reference/high-level-design.md
+++ b/docs/reference/high-level-design.md
@@ -58,7 +58,7 @@ Conceptually, each partition is reduced to a single fused operator. It is create
 ## Key design decisions
 
 * Multiple threads can invoke the Run() method on the same
-inference session object. See [API doc](../get-started/with-c.html.md) for more details.
+inference session object. See [API doc](../get-started/with-c.md) for more details.
 * To facilitate this, the Compute() function of all kernels is const
 implying the kernels are stateless.
 * Implementations of the operators by execution providers are called
@@ -72,9 +72,9 @@ different representation if they choose to, but it is their responsibility to co
 
 ## Extensibility Options
 
-* [Add a custom operator/kernel](tutorials/mobile/add-custom-op.md)
-* [Add an execution provider](tutorials/mobile/add-execution-provider.md)
-* [Add a new graphtransform](https://github.com/microsoft/onnxruntime/tree/master/include//onnxruntime/core/optimizer/graph_transformer.h)
+* [Add a custom operator/kernel](operators/add-custom-op.md)
+* [Add an execution provider](../execution-providers/add-execution-provider.md)
+* [Add a new graph transform](https://github.com/microsoft/onnxruntime/tree/master/include//onnxruntime/core/optimizer/graph_transformer.h)
 * [Add a new rewrite rule](https://github.com/microsoft/onnxruntime/tree/master/include//onnxruntime/core/optimizer/rewrite_rule.h)
 
 ## The ONNX Runtime and Windows OS integration
diff --git a/docs/reference/operators/add-custom-op.md b/docs/reference/operators/add-custom-op.md
index cdb00099ca066..d41882502edd4 100644
--- a/docs/reference/operators/add-custom-op.md
+++ b/docs/reference/operators/add-custom-op.md
@@ -32,7 +32,7 @@ A new op can be registered with ONNX Runtime using the Custom Operator API in [o
 
 * [Custom op shared library with Python API](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/test/python/onnxruntime_test_python.py): `testRegisterCustomOpsLibrary` uses the Python API to register a shared library with custom op kernels. Currently, the only supported Execution Providers (EPs) for custom ops registered via this approach are CUDA and CPU.
 
-* [E2E example: Export PyTorch model with custom ONNX operators](../tutorials/tutorials/export-pytorch-model.html).
+* [E2E example: Export PyTorch model with custom ONNX operators](../../tutorials/export-pytorch-model.md).
 
 ## CUDA custom ops
 When a model is run on a GPU, ONNX Runtime will insert a `MemcpyToHost` op before a CPU custom op and append a `MemcpyFromHost` after it to make sure tensors are accessible throughout calling.
diff --git a/docs/tutorials/accelerate-pytorch/pytorch.md b/docs/tutorials/accelerate-pytorch/pytorch.md
index 459c9f78df090..79777eeb60a78 100644
--- a/docs/tutorials/accelerate-pytorch/pytorch.md
+++ b/docs/tutorials/accelerate-pytorch/pytorch.md
@@ -14,9 +14,9 @@ ONNX Runtime can be used to accelerate PyTorch models inferencing.
 
 * [Basic PyTorch export through torch.onnx](https://pytorch.org/docs/stable/onnx.html)
 * [Super-resolution with ONNX Runtime](https://pytorch.org/tutorials/advanced/super_resolution_with_onnxruntime.html)
-* [Export PyTorch model with custom ops](../tutorials/export-pytorch-model.md)
+* [Export PyTorch model with custom ops](../export-pytorch-model.md)
 
 ## Accelerate PyTorch model inferencing
 {: .no_toc }
 
-* [Accelerate reduced size BERT model through quantization](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb)
+* [Accelerate reduced size BERT model through quantization](https://github.com/microsoft/onnxruntime-inference-examples/blob/main/quantization/notebooks/bert/Bert-GLUE_OnnxRuntime_quantization.ipynb)
diff --git a/docs/tutorials/api-basics.md b/docs/tutorials/api-basics.md
index 2759523d27742..0a7e440fc28b7 100644
--- a/docs/tutorials/api-basics.md
+++ b/docs/tutorials/api-basics.md
@@ -6,7 +6,7 @@ nav_order: 1
 # ONNX Runtime Inferencing: API Basics
 {: .no_toc }
 
-These tutorials demonstrate basic inferencing with ONNX Runtime with each language API. 
+These tutorials demonstrate basic inferencing with ONNX Runtime with each language API. More examples can be found on [microsoft/onnxruntime-inference-examples](https://github.com/microsoft/onnxruntime-inference-examples).
 
 
 ## Contents
@@ -22,14 +22,10 @@ These tutorials demonstrate basic inferencing with ONNX Runtime with each langua
 
 
 ## C++
-* [Number recognition (MNIST)](../tutorials/mnist_cpp.html)
-* [Image classification (Squeezenet)](https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/CXX_Api_Sample.cpp)
-
-## C
-* [Image classification (Squeezenet)](https://github.com/microsoft/onnxruntime/blob/master/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests.Capi/C_Api_Sample.cpp)
+* [C/C++ examples](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/c_cxx)
 
 ## C#
-* [Object detection (Faster RCNN)](../tutorials/fasterrcnn_csharp.html)
+* [Object detection (Faster RCNN)]()
 * [Image recognition (ResNet50 v2)](../tutorials/resnet50_csharp.html)
 
 ## Java
diff --git a/docs/tutorials/huggingface.md b/docs/tutorials/huggingface.md
index 1b4f11fa2e8ce..7458098fe64d4 100644
--- a/docs/tutorials/huggingface.md
+++ b/docs/tutorials/huggingface.md
@@ -17,7 +17,7 @@ ONNX Runtime can accelerate training and inferencing popular Hugging Face NLP mo
 
 ## Accelerate Hugging Face model inferencing
 
-* [General export and inference: Hugging Face Transformers](https://github.com/huggingface/transformers/blob/master/notebooks/04-onnx-export.ipynb)
+* [General export and inference: Hugging Face Transformers](https://github.com/huggingface/notebooks/blob/master/examples/onnx-export.ipynb)
 * [Accelerate GPT2 model on CPU](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/Inference_GPT2_with_OnnxRuntime_on_CPU.ipynb)
 * [Accelerate BERT model on CPU](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_CPU.ipynb)
 * [Accelerate BERT model on GPU](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/transformers/notebooks/PyTorch_Bert-Squad_OnnxRuntime_GPU.ipynb)
diff --git a/docs/tutorials/tf-get-started.md b/docs/tutorials/tf-get-started.md
index 918d0bd7672c1..5f0009325a442 100644
--- a/docs/tutorials/tf-get-started.md
+++ b/docs/tutorials/tf-get-started.md
@@ -106,4 +106,4 @@ If your model fails to convert please read our [README](https://github.com/onnx/
 
 ## Next Steps
 
-- [More tutorials: accelerate Tensorflow models](../tensorflow)
+- [More tutorials: accelerate Tensorflow models](./tensorflow)