diff --git a/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt b/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt
index ec5c0321b29..1a6ac05ed08 100644
--- a/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt
+++ b/.azure-pipelines/scripts/codeScan/pyspelling/lpot_dict.txt
@@ -2378,3 +2378,4 @@ constfold
 grappler
 amsgrad
 qoperator
+apis
diff --git a/README.md b/README.md
index ebe3e37ed57..2a9a8cfd787 100644
--- a/README.md
+++ b/README.md
@@ -167,7 +167,7 @@ Intel® Neural Compressor validated 420+ [examples](./examples) for quantization
       <td colspan="3" align="center"><a href="./docs/source/design.md">Architecture</a></td>
       <td colspan="2" align="center"><a href="https://github.com/intel/neural-compressor/tree/master/examples">Examples</a></td>
       <td colspan="2" align="center"><a href="./docs/source/bench.md">GUI</a></td>
-      <td colspan="2" align="center"><a href="./docs/source/api-introduction.md">APIs</a></td>
+      <td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/api-documentation/apis.html">APIs</a></td>
     </tr>
     <tr>
       <td colspan="5" align="center"><a href="https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html">Intel oneAPI AI Analytics Toolkit</a></td>
@@ -194,7 +194,7 @@ Intel® Neural Compressor validated 420+ [examples](./examples) for quantization
   </thead>
   <tbody>
     <tr>
-        <td colspan="2" align="center"><a href="./docs/source/Quantization.md">Quantization</a></td>
+        <td colspan="2" align="center"><a href="./docs/source/quantization.md">Quantization</a></td>
         <td colspan="1" align="center"><a href="./docs/source/pruning.md">Pruning(Sparsity)</a></td> 
         <td colspan="2" align="center"><a href="./docs/source/distillation.md">Knowledge Distillation</a></td>
         <td colspan="2" align="center"><a href="./docs/source/mixed_precision.md">Mixed Precision</a></td>
diff --git a/docs/Makefile b/docs/Makefile
index 45894af519e..cf810c3c2a1 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -19,17 +19,11 @@ help:
 
 html:
 	# cp README.md to docs, modify response-link
-	cp -f "../README.md" "./source/README.md"
+	cp -f "../README.md" "./source/getting_started.md"
 	cp -f "../SECURITY.md" "./source/SECURITY.md"
-	cp -f "./source/README.md" "./source/README.md.tmp"
-	sed 's/.md/.html/g; s/.\/docs\/source\//.\//g; s/.\/neural_coder\/extensions\/screenshots/imgs/g; s/.\/docs\/source\/_static/..\/\/_static/g; s/.\/examples/https:\/\/github.com\/intel\/neural-compressor\/tree\/master\/examples/g; s/.md/.html/g; ' "./source/README.md.tmp" > "./source/README.md"
-	rm -f "./source/README.md.tmp"
-
-	# modify docList
-	cp -f "./source/doclist.rst" "./source/doclist.rst.tmp"
-	sed 's/.md/.html/g;' "./source/doclist.rst.tmp" > "./source/doclist.rst"
-	rm -f "./source/doclist.rst.tmp"
-
+	cp -f "./source/getting_started.md" "./source/getting_started.md.tmp"
+	sed 's/.md/.html/g; s/.\/docs\/source\//.\//g; s/.\/neural_coder\/extensions\/screenshots/imgs/g; s/.\/docs\/source\/_static/..\/\/_static/g; s/.\/examples/https:\/\/github.com\/intel\/neural-compressor\/tree\/master\/examples/g; s/.md/.html/g; ' "./source/getting_started.md.tmp" > "./source/getting_started.md"
+	rm -f "./source/getting_started.md.tmp"
 	
 	# make sure other png can display normal
 	$(SPHINXBUILD) -b html "$(SOURCEDIR)" "$(BUILDDIR)/html" $(SPHINXOPTS) $(O)
diff --git a/docs/source/README.md b/docs/source/README.md
index c912168f5a7..4b03ada961c 100644
--- a/docs/source/README.md
+++ b/docs/source/README.md
@@ -167,7 +167,7 @@ Intel® Neural Compressor validated 420+ [examples](./examples) for quantization
       <td colspan="3" align="center"><a href="./design.html">Architecture</a></td>
       <td colspan="2" align="center"><a href="https://github.com/intel/neural-compressor/tree/master/examples">Examples</a></td>
       <td colspan="2" align="center"><a href="./bench.html">GUI</a></td>
-      <td colspan="2" align="center"><a href="./api-introduction.html">APIs</a></td>
+      <td colspan="2" align="center"><a href="https://intel.github.io/neural-compressor/api-documentation/apis.html">APIs</a></td>
     </tr>
     <tr>
       <td colspan="5" align="center"><a href="https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html">Intel oneAPI AI Analytics Toolkit</a></td>
diff --git a/docs/source/_static/index.html b/docs/source/_static/index.html
index 5f62e3d9bef..22a56287809 100644
--- a/docs/source/_static/index.html
+++ b/docs/source/_static/index.html
@@ -1 +1 @@
-<meta http-equiv="refresh" content="0; URL='README.html'" />
\ No newline at end of file
+<meta http-equiv="refresh" content="0; URL='getting_started.html'" />
\ No newline at end of file
diff --git a/docs/source/api-documentation/api-introduction.md b/docs/source/api-documentation/api-introduction.md
deleted file mode 100644
index e5fc85f5d21..00000000000
--- a/docs/source/api-documentation/api-introduction.md
+++ /dev/null
@@ -1,210 +0,0 @@
-API Documentation
-=================
-
-## Introduction
-
-Intel® Neural Compressor is an open-source Python library designed to help users quickly deploy low-precision inference solutions on popular deep learning (DL) frameworks such as TensorFlow*, PyTorch*, MXNet, and ONNX Runtime. It automatically optimizes low-precision recipes for deep learning models in order to achieve optimal product objectives, such as inference performance and memory usage, with expected accuracy criteria.
-
-
-## User-facing APIs
-
-These APIs are intended to unify low-precision quantization interfaces cross multiple DL frameworks for the best out-of-the-box experiences.
-
-> **Note**
->
-> Neural Compressor is continuously improving user-facing APIs to create a better user experience. 
-
-> Two sets of user-facing APIs exist. One is the default one supported from Neural Compressor v1.0 for backwards compatibility. The other set consists of new APIs in 
-the `neural_compressor.experimental` package.
-
-> We recommend that you use the APIs located in neural_compressor.experimental. All examples have been updated to use the experimental APIs.
-
-The major differences between the default user-facing APIs and the experimental APIs are:
-
-1. The experimental APIs abstract the `neural_compressor.experimental.common.Model` concept to cover those cases whose weight and graph files are stored separately.
-2. The experimental APIs unify the calling style of the `Quantization`, `Pruning`, and `Benchmark` classes by setting model, calibration dataloader, evaluation dataloader, and metric through class attributes rather than passing them as function inputs.
-3. The experimental APIs refine Neural Compressor built-in transforms/datasets/metrics by unifying the APIs cross different framework backends.
-
-## Experimental user-facing APIs
-
-Experimental user-facing APIs consist of the following components:
-
-### Quantization-related APIs
-
-```python
-# neural_compressor.experimental.Quantization
-class Quantization(object):
-    def __init__(self, conf_fname_or_obj):
-        ...
-
-    def __call__(self):
-        ...
-
-    @property
-    def calib_dataloader(self):
-        ...
-
-    @property
-    def eval_dataloader(self):
-        ...
-
-    @property
-    def model(self):
-        ...
-
-    @property
-    def metric(self):
-        ...
-
-    @property
-    def postprocess(self, user_postprocess):
-        ...
-
-    @property
-    def q_func(self):
-        ...
-
-    @property
-    def eval_func(self):
-        ...
-
-```
-The `conf_fname_or_obj` parameter used in the class initialization is the path to the user yaml configuration file or Quantization_Conf class. This yaml file is used to control the entire tuning behavior on the model.
-
-**Neural Compressor User YAML Syntax**
-
-> Intel® Neural Compressor provides template yaml files for [Post-Training Quantization](../neural_compressor/template/ptq.yaml), [Quantization-Aware Training](../neural_compressor/template/qat.yaml), and [Pruning](../neural_compressor/template/pruning.yaml) scenarios. Refer to these template files to understand the meaning of each field.
-
-> Note that most fields in the yaml templates are optional. View the [HelloWorld Yaml](../examples/helloworld/tf_example2/conf.yaml) example for reference.
-
-```python
-# Typical Launcher code
-from neural_compressor.experimental import Quantization, common
-
-# optional if Neural Compressor built-in dataset could be used as model input in yaml
-class dataset(object):
-  def __init__(self, *args):
-      ...
-
-  def __getitem__(self, idx):
-      # return single sample and label tuple without collate. label should be 0 for label-free case
-      ...
-
-  def len(self):
-      ...
-
-# optional if Neural Compressor built-in metric could be used to do accuracy evaluation on model output in yaml
-class custom_metric(object):
-    def __init__(self):
-        ...
-
-    def update(self, predict, label):
-        # metric update per mini-batch
-        ...
-
-    def result(self):
-        # final metric calculation invoked only once after all mini-batch are evaluated
-        # return a scalar to neural_compressor for accuracy-driven tuning.
-        # by default the scalar is higher-is-better. if not, set tuning.accuracy_criterion.higher_is_better to false in yaml.
-        ...
-
-quantizer = Quantization(conf.yaml)
-quantizer.model = '/path/to/model'
-# below two lines are optional if Neural Compressor built-in dataset is used as model calibration input in yaml
-cal_dl = dataset('/path/to/calibration/dataset')
-quantizer.calib_dataloader = common.DataLoader(cal_dl, batch_size=32)
-# below two lines are optional if Neural Compressor built-in dataset is used as model evaluation input in yaml
-dl = dataset('/path/to/evaluation/dataset')
-quantizer.eval_dataloader = common.DataLoader(dl, batch_size=32)
-# optional if Neural Compressor built-in metric could be used to do accuracy evaluation in yaml
-quantizer.metric = common.Metric(custom_metric) 
-q_model = quantizer.fit()
-q_model.save('/path/to/output/dir') 
-```
-
-`model` attribute in `Quantization` class is an abstraction of model formats across different frameworks. Neural Compressor supports passing the path of `keras model`, `frozen pb`, `checkpoint`, `saved model`, `torch.nn.model`, `mxnet.symbol.Symbol`, `gluon.HybirdBlock`, and `onnx model` to instantiate a `neural_compressor.experimental.` class and set to `quantizer.model`.
-
-`calib_dataloader` and `eval_dataloader` attribute in `Quantization` class is used to set up a calibration dataloader by code. It is optional to set if the user sets corresponding fields in yaml.
-
-`metric` attribute in `Quantization` class is used to set up a custom metric by code. It is optional to set if user finds Neural Compressor built-in metric could be used with their model and sets corresponding fields in yaml.
-
-`postprocess` attribute in `Quantization` class is not necessary in most of the use cases. It is only needed when the user wants to use the built-in metric but the model output can not directly be handled by Neural Compressor built-in metrics. In this case, the user can register a transform to convert the model output to the expected one required by the built-in metric.
-
-`q_func` attribute in `Quantization` class is only for `Quantization Aware Training` case, in which the user needs to register a function that takes `model` as the input parameter and executes the entire training process with self-contained training hyper-parameters. 
-
-`eval_func` attribute in `Quantization` class is reserved for special cases. If the user had an evaluation function when train a model, the user must implement a `calib_dataloader` and leave `eval_dataloader` as None. Then, modify this evaluation function to take `model` as the input parameter and return a higher-is-better scaler. In some scenarios, it may reduce development effort.
-
-
-### Pruning-related APIs (POC)
-
-```python
-class Pruning(object):
-    def __init__(self, conf_fname_or_obj):
-        ...
-
-    def on_epoch_begin(self, epoch):
-        ...
-
-    def on_step_begin(self, batch_id):
-        ...
-
-    def on_step_end(self):
-        ...
-
-    def on_epoch_end(self):
-        ...
-
-    def __call__(self):
-        ...
-
-    @property
-    def model(self):
-        ...
-
-    @property
-    def q_func(self):
-        ...
-
-```
-
-This API is used to do sparsity pruning. Currently, it is a Proof of Concept; Neural Compressor only supports `magnitude pruning` on PyTorch.
-
-To learn how to use this API, refer to the [pruning document](../pruning.md).
-
-### Benchmarking-related APIs
-```python
-class Benchmark(object):
-    def __init__(self, conf_fname_or_obj):
-        ...
-
-    def __call__(self):
-        ...
-
-    @property
-    def model(self):
-        ...
-
-    @property
-    def metric(self):
-        ...
-
-    @property
-    def b_dataloader(self):
-        ...
-
-    @property
-    def postprocess(self, user_postprocess):
-        ...
-```
-
-This API is used to measure model performance and accuracy.
-
-To learn how to use this API, refer to the [benchmarking document](../docs/benchmark.md).
-
-## Default user-facing APIs
-
-The default user-facing APIs exist for backwards compatibility from the v1.0 release. Refer to [v1.1 API](https://github.com/intel/neural-compressor/blob/v1.1/docs/introduction.md) to understand how the default user-facing APIs work.
-
-View the [HelloWorld example](/examples/helloworld/tf_example6) that uses default user-facing APIs for user reference. 
-
-Full examples using default user-facing APIs can be found [here](https://github.com/intel/neural-compressor/tree/v1.1/examples).
diff --git a/docs/source/doclist.rst b/docs/source/doclist.rst
deleted file mode 100644
index d5be5857470..00000000000
--- a/docs/source/doclist.rst
+++ /dev/null
@@ -1,68 +0,0 @@
-Developer Documentation
-#######################
-
-Read the following material as you learn how to use Neural Compressor.
-
-Get Started
-===========
-
-* `Transform <transform.md>`__ introduces how to utilize Neural Compressor's built-in data processing and how to develop a custom data processing method. 
-* `Dataset <dataset.md>`__ introduces how to utilize Neural Compressor's built-in dataset and how to develop a custom dataset.
-* `Metrics <metric.md>`__ introduces how to utilize Neural Compressor's built-in metrics and how to develop a custom metric.
-* `UX <bench.md>`__ is a web-based system used to simplify Neural Compressor usage.
-* `Intel oneAPI AI Analytics Toolkit Get Started Guide <https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html>`__ explains the AI Kit components, installation and configuration guides, and instructions for building and running sample apps.
-* `AI and Analytics Samples <https://github.com/oneapi-src/oneAPI-samples/tree/master/AI-and-Analytics>`__ includes code samples for Intel oneAPI libraries.
-
-.. toctree::
-    :maxdepth: 1
-    :hidden:
-
-    transform.md
-    dataset.md
-    metric.md
-    ux.md
-    Intel oneAPI AI Analytics Toolkit Get Started Guide <https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html>
-    AI and Analytics Samples <https://github.com/oneapi-src/oneAPI-samples/tree/master/AI-and-Analytics>
-
-
-Deep Dive
-=========
-
-* `Quantization <Quantization.md>`__ are processes that enable inference and training by performing computations at low-precision data types, such as fixed-point integers. Neural Compressor supports Post-Training Quantization (`PTQ <PTQ.md>`__) and Quantization-Aware Training (`QAT <QAT.md>`__). Note that `Dynamic Quantization <dynamic_quantization.md>`__ currently has limited support.
-* `Pruning <pruning.md>`__ provides a common method for introducing sparsity in weights and activations.
-* `Benchmarking <benchmark.md>`__ introduces how to utilize the benchmark interface of Neural Compressor.
-* `Mixed precision <mixed_precision.md>`__ introduces how to enable mixed precision, including BFP16 and int8 and FP32, on Intel platforms during tuning.
-* `Graph Optimization <graph_optimization.md>`__ introduces how to enable graph optimization for FP32 and auto-mixed precision.
-* `Model Conversion <model_conversion.md>` introduces how to convert TensorFlow QAT model to quantized model running on Intel platforms.
-* `TensorBoard <tensorboard.md>`__ provides tensor histograms and execution graphs for tuning debugging purposes. 
-
-
-.. toctree::
-    :maxdepth: 1
-    :hidden:
-
-    Quantization.md
-    PTQ.md
-    QAT.md
-    dynamic_quantization.md
-    pruning.md
-    benchmark.md
-    mixed_precision.md
-    graph_optimization.md
-    model_conversion.md
-    tensorboard.md
-  
-    
-Advanced Topics
-===============
-
-* `Adaptor <adaptor.md>`__ is the interface between Neural Compressor and framework. The method to develop adaptor extension is introduced with ONNX Runtime as example. 
-* `Tuning strategies <tuning_strategies.md>`__ can automatically optimized low-precision recipes for deep learning models to achieve optimal product objectives like inference performance and memory usage with expected accuracy criteria. The method to develop a new strategy is introduced.
-
-
-.. toctree::
-    :maxdepth: 1
-    :hidden:
-
-    adaptor.md
-    tuning_strategies.md
diff --git a/docs/source/getting_started.md b/docs/source/getting_started.md
deleted file mode 100644
index e320126de94..00000000000
--- a/docs/source/getting_started.md
+++ /dev/null
@@ -1,451 +0,0 @@
-Getting Started
-===============
-
-## Installation
-
-The Intel® Neural Compressor library is released as part of the
-[Intel® oneAPI AI Analytics Toolkit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit.html) (AI Kit).
-The AI Kit provides a consolidated package of Intel's latest deep learning and
-machine optimizations all in one place for ease of development. Along with
-Neural Compressor, the AI Kit includes Intel-optimized versions of deep learning frameworks
-(such as TensorFlow and PyTorch) and high-performing Python libraries to
-streamline end-to-end data science and AI workflows on Intel architectures.
-
-
-### Linux Installation
-
-You can install just the library from binary or source, or you can get
-the Intel-optimized framework together with the library by installing the
-Intel® oneAPI AI Analytics Toolkit.
-
-#### Install from binary
-
-  ```Shell
-  # install from pip
-  pip install neural-compressor
-
-  # install from conda
-  conda install neural-compressor -c conda-forge -c intel 
-  ```
-
-#### Install from source
-
-  ```Shell
-  git clone https://github.com/intel/neural-compressor.git
-  cd neural-compressor
-  pip install -r requirements.txt
-  python setup.py install
-  ```
-
-#### Install from AI Kit
-
-The AI Kit, which includes the 
-library, is distributed through many common channels,
-including from Intel's website, YUM, APT, Anaconda, and more.
-Select and [download](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit/download.html)
-the AI Kit distribution package that's best suited for you and follow the
-[Get Started Guide](https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html)
-for post-installation instructions.
-
-|[Download AI Kit](https://software.intel.com/content/www/us/en/develop/tools/oneapi/ai-analytics-toolkit/) |[AI Kit Get Started Guide](https://software.intel.com/content/www/us/en/develop/documentation/get-started-with-ai-linux/top.html) |
-|---|---|
-
-### Windows Installation
-
-**Prerequisites**
-
-The following prerequisites and requirements must be satisfied for a successful installation:
-
-- Python version: 3.6 or 3.7 or 3.8 or 3.9
-
-- Download and install [anaconda](https://anaconda.org/).
-
-- Create a virtual environment named nc in anaconda:
-
-    ```shell
-    # Here we install python 3.7 for instance. You can also choose python 3.6, 3.8, or 3.9.
-    conda create -n nc python=3.7
-    conda activate nc 
-    ```
-
-#### Install from binary
-
-  ```Shell
-  # install from pip
-  pip install neural-compressor
-
-  # install from conda
-  conda install neural-compressor -c conda-forge -c intel 
-  ```
-
-#### Install from source
-
-```shell
-git clone https://github.com/intel/neural-compressor.git
-cd neural-compressor
-pip install -r requirements.txt
-python setup.py install
-```
-
-## Examples
-
-[Examples](examples_readme.md) are provided to demonstrate the usage of Intel® Neural Compressor in different frameworks: TensorFlow, PyTorch, MXNet, and ONNX Runtime. Hello World examples are also available.
-
-## Developer Documentation
-
-View Neural Compressor [Documentation](doclist.rst) for getting started, deep dive, and advanced resources to help you use and develop Neural Compressor.
-
-## System Requirements
-
-Intel® Neural Compressor supports systems based on [Intel 64 architecture or compatible processors](https://en.wikipedia.org/wiki/X86-64), specially optimized for the following CPUs:
-
-* Intel Xeon Scalable processor (formerly Skylake, Cascade Lake, Cooper Lake, and Icelake)
-* future Intel Xeon Scalable processor (code name Sapphire Rapids)
-
-Intel® Neural Compressor requires installing the Intel-optimized framework version for the supported DL framework you use: TensorFlow, PyTorch, MXNet, or ONNX runtime. 
-
-Note: Intel Neural Compressor supports Intel-optimized and official frameworks for some TensorFlow versions. Refer to [Supported Frameworks](../README.md#Supported-Frameworks) for specifics.
-
-### Validated Hardware/Software Environment
-
-<table class="docutils">
-<thead>
-  <tr>
-    <th class="tg-bobw">Platform</th>
-    <th class="tg-bobw">OS</th>
-    <th class="tg-bobw">Python</th>
-    <th class="tg-bobw">Framework</th>
-    <th class="tg-bobw">Version</th>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td class="tg-nrix" rowspan="18">Cascade Lake<br><br>Cooper Lake<br><br>Skylake<br><br>Ice Lake</td>
-    <td class="tg-nrix" rowspan="18">CentOS 8.3<br><br>Ubuntu 18.04</td>
-    <td class="tg-nrix" rowspan="18">3.6<br><br>3.7<br><br>3.8<br><br>3.9</td>
-    <td class="tg-cly1" rowspan="9">TensorFlow</td>
-    <td class="tg-7zrl">2.5.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">2.4.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">2.3.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">2.2.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">2.1.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.15.0 UP1</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.15.0 UP2</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.15.0 UP3</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.15.2</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl" rowspan="4">PyTorch</td>
-    <td class="tg-7zrl">1.5.0+cpu</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.6.0+cpu</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.8.0+cpu</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">IPEX</td>
-  </tr>
-  <tr>
-    <td class="tg-cly1" rowspan="2">MXNet</td>
-    <td class="tg-7zrl">1.7.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.6.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl" rowspan="3">ONNX Runtime</td>
-    <td class="tg-7zrl">1.6.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.7.0</td>
-  </tr>
-  <tr>
-    <td class="tg-7zrl">1.8.0</td>
-  </tr>
-</tbody>
-</table>
-
-## Validated Models
-
-Intel® Neural Compressor provides numerous examples to show promising accuracy loss with the best performance gain. A full quantized model list on various frameworks is available in the [Model List](validated_model_list.md).
-
-<table class="docutils">
-<thead>
-  <tr>
-    <th rowspan="2">Framework</th>
-    <th rowspan="2">version</th>
-    <th rowspan="2">Model</th>
-    <th rowspan="2">dataset</th>
-    <th colspan="3">Accuracy</th>
-    <th>Performance speed up</th>
-  </tr>
-  <tr>
-    <td>INT8 Tuning Accuracy</td>
-    <td>FP32 Accuracy Baseline</td>
-    <td>Acc Ratio[(INT8-FP32)/FP32]</td>
-    <td>Realtime Latency Ratio[FP32/INT8]</td>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>resnet50v1.5</td>
-    <td>ImageNet</td>
-    <td>76.70%</td>
-    <td>76.50%</td>
-    <td>0.26%</td>
-    <td>3.23x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>Resnet101</td>
-    <td>ImageNet</td>
-    <td>77.20%</td>
-    <td>76.40%</td>
-    <td>1.05%</td>
-    <td>2.42x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>inception_v1</td>
-    <td>ImageNet</td>
-    <td>70.10%</td>
-    <td>69.70%</td>
-    <td>0.57%</td>
-    <td>1.88x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>inception_v2</td>
-    <td>ImageNet</td>
-    <td>74.10%</td>
-    <td>74.00%</td>
-    <td>0.14%</td>
-    <td>1.96x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>inception_v3</td>
-    <td>ImageNet</td>
-    <td>77.20%</td>
-    <td>76.70%</td>
-    <td>0.65%</td>
-    <td>2.36x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>inception_v4</td>
-    <td>ImageNet</td>
-    <td>80.00%</td>
-    <td>80.30%</td>
-    <td>-0.37%</td>
-    <td>2.59x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>inception_resnet_v2</td>
-    <td>ImageNet</td>
-    <td>80.10%</td>
-    <td>80.40%</td>
-    <td>-0.37%</td>
-    <td>1.97x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>Mobilenetv1</td>
-    <td>ImageNet</td>
-    <td>71.10%</td>
-    <td>71.00%</td>
-    <td>0.14%</td>
-    <td>2.88x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>ssd_resnet50_v1</td>
-    <td>Coco</td>
-    <td>37.90%</td>
-    <td>38.00%</td>
-    <td>-0.26%</td>
-    <td>2.97x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>mask_rcnn_inception_v2</td>
-    <td>Coco</td>
-    <td>28.90%</td>
-    <td>29.10%</td>
-    <td>-0.69%</td>
-    <td>2.66x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>vgg16</td>
-    <td>ImageNet</td>
-    <td>72.50%</td>
-    <td>70.90%</td>
-    <td>2.26%</td>
-    <td>3.75x</td>
-  </tr>
-  <tr>
-    <td>tensorflow</td>
-    <td>2.4.0</td>
-    <td>vgg19</td>
-    <td>ImageNet</td>
-    <td>72.40%</td>
-    <td>71.00%</td>
-    <td>1.97%</td>
-    <td>3.79x</td>
-  </tr>
-</tbody>
-</table>
-
-
-<table class="docutils">
-<thead>
-  <tr>
-    <th rowspan="2">Framework</th>
-    <th rowspan="2">version</th>
-    <th rowspan="2">model</th>
-    <th rowspan="2">dataset</th>
-    <th colspan="3">Accuracy</th>
-    <th>Performance speed up</th>
-  </tr>
-  <tr>
-    <td>INT8 Tuning Accuracy</td>
-    <td>FP32 Accuracy Baseline</td>
-    <td>Acc Ratio[(INT8-FP32)/FP32]</td>
-    <td>Realtime Latency Ratio[FP32/INT8]</td>
-  </tr>
-</thead>
-<tbody>
-  <tr>
-    <td>pytorch</td>
-    <td>1.5.0+cpu</td>
-    <td>resnet50</td>
-    <td>ImageNet</td>
-    <td>75.96%</td>
-    <td>76.13%</td>
-    <td>-0.23%</td>
-    <td>2.63x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.5.0+cpu</td>
-    <td>resnext101_32x8d</td>
-    <td>ImageNet</td>
-    <td>79.12%</td>
-    <td>79.31%</td>
-    <td>-0.24%</td>
-    <td>2.61x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_base_mrpc</td>
-    <td>MRPC</td>
-    <td>88.90%</td>
-    <td>88.73%</td>
-    <td>0.19%</td>
-    <td>1.98x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_base_cola</td>
-    <td>COLA</td>
-    <td>59.06%</td>
-    <td>58.84%</td>
-    <td>0.37%</td>
-    <td>2.19x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_base_sts-b</td>
-    <td>STS-B</td>
-    <td>88.40%</td>
-    <td>89.27%</td>
-    <td>-0.97%</td>
-    <td>2.28x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_base_sst-2</td>
-    <td>SST-2</td>
-    <td>91.51%</td>
-    <td>91.86%</td>
-    <td>-0.37%</td>
-    <td>2.30x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_base_rte</td>
-    <td>RTE</td>
-    <td>69.31%</td>
-    <td>69.68%</td>
-    <td>-0.52%</td>
-    <td>2.15x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_large_mrpc</td>
-    <td>MRPC</td>
-    <td>87.45%</td>
-    <td>88.33%</td>
-    <td>-0.99%</td>
-    <td>2.73x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_large_squad</td>
-    <td>SQUAD</td>
-    <td>92.85%</td>
-    <td>93.05%</td>
-    <td>-0.21%</td>
-    <td>2.01x</td>
-  </tr>
-  <tr>
-    <td>pytorch</td>
-    <td>1.6.0a0+24aac32</td>
-    <td>bert_large_qnli</td>
-    <td>QNLI</td>
-    <td>91.20%</td>
-    <td>91.82%</td>
-    <td>-0.68%</td>
-    <td>2.69x</td>
-  </tr>
-</tbody>
-</table>
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 2ab2e1d8bbb..afcf722f21d 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -11,10 +11,8 @@ Sections
    :maxdepth: 1
 
    README.md
-   tutorial.md
    examples_readme.md
    api-documentation/apis.rst
-   doclist.rst
    releases_info.md
    contributions.md
    legal_information.md
diff --git a/docs/source/legal_information.md b/docs/source/legal_information.md
index 5c595853b8a..c9ede70d378 100644
--- a/docs/source/legal_information.md
+++ b/docs/source/legal_information.md
@@ -16,7 +16,7 @@ See the accompanying [license](https://github.com/intel/neural-compressor/tree/m
 
 ## Citation
 
-If you use Intel® Neural Compressor in your research or you wish to refer to the tuning results published in the [Validated Models](getting_started.md#validated-models), use the following BibTeX entry.
+If you use Intel® Neural Compressor in your research or you wish to refer to the tuning results published in the [Validated Models](getting_started.md), use the following BibTeX entry.
 
 ```
 @misc{Intel® Neural Compressor,
diff --git a/docs/source/quantization.md b/docs/source/quantization.md
index 951c6e4e5d1..cae3e0845f8 100644
--- a/docs/source/quantization.md
+++ b/docs/source/quantization.md
@@ -80,7 +80,7 @@ Currently `accuracy aware tuning` supports `post training quantization`, `quanti
 
 User could refer to below chart to understand the whole tuning flow.
 
-<img src="../docs/imgs/accuracy_aware_tuning_flow.png" width=914 height=480 alt="accuracy aware tuning working flow">
+<img src="./_static/imgs/accuracy_aware_tuning_flow.png" width=914 height=480 alt="accuracy aware tuning working flow">
 
 ## Supported Feature Matrix
 
diff --git a/docs/source/releases_info.md b/docs/source/releases_info.md
index 81d078a8229..7367fa284b7 100644
--- a/docs/source/releases_info.md
+++ b/docs/source/releases_info.md
@@ -15,6 +15,6 @@ The MSE tuning strategy does not work with the PyTorch adaptor layer. This strat
 
 [Neural Compressor v1.2](https://github.com/intel/neural-compressor/tree/v1.2) introduces incompatible changes in user facing APIs. Please refer to [incompatible changes](incompatible_changes.md) to know which incompatible changes are made in v1.2.
 
-[Neural Compressor v1.2.1](https://github.com/intel/neural-compressor/tree/v1.2.1) solves this backward compatible issues introduced in v1.2 by moving new user facing APIs to neural_compressor.experimental package and keep old one as is. Please refer to [API documentation](/api-documentation/api-introduction.md) to know the details of user-facing APIs.
+[Neural Compressor v1.2.1](https://github.com/intel/neural-compressor/tree/v1.2.1) solves this backward compatible issues introduced in v1.2 by moving new user facing APIs to neural_compressor.experimental package and keep old one as is. Please refer to [API documentation](./api-documentation/apis.rst) to know the details of user-facing APIs.
 
 [Neural Compressor v1.7](https://github.com/intel/neural-compressor/tree/v1.7) renames the pip/conda package name from lpot to neural_compressor. To run old examples on latest software, please replace package name for compatibility with `sed -i "s|lpot|neural_compressor|g" your_script.py`

Platform	OS	Python	Framework	Version
Cascade Lake Cooper Lake Skylake Ice Lake	CentOS 8.3 Ubuntu 18.04	3.6 3.7 3.8 3.9	TensorFlow	2.5.0
				2.4.0
				2.3.0
				2.2.0
				2.1.0
				1.15.0 UP1
				1.15.0 UP2
				1.15.0 UP3
				1.15.2
			PyTorch	1.5.0+cpu
				1.6.0+cpu
				1.8.0+cpu
				IPEX
			MXNet	1.7.0
			MXNet	1.6.0
			ONNX Runtime	1.6.0
				1.7.0
				1.8.0
Framework	version	Model	dataset	Accuracy			Performance speed up
Framework	version	Model	dataset	INT8 Tuning Accuracy	FP32 Accuracy Baseline	Acc Ratio[(INT8-FP32)/FP32]	Realtime Latency Ratio[FP32/INT8]
tensorflow	2.4.0	resnet50v1.5	ImageNet	76.70%	76.50%	0.26%	3.23x
tensorflow	2.4.0	Resnet101	ImageNet	77.20%	76.40%	1.05%	2.42x
tensorflow	2.4.0	inception_v1	ImageNet	70.10%	69.70%	0.57%	1.88x
tensorflow	2.4.0	inception_v2	ImageNet	74.10%	74.00%	0.14%	1.96x
tensorflow	2.4.0	inception_v3	ImageNet	77.20%	76.70%	0.65%	2.36x
tensorflow	2.4.0	inception_v4	ImageNet	80.00%	80.30%	-0.37%	2.59x
tensorflow	2.4.0	inception_resnet_v2	ImageNet	80.10%	80.40%	-0.37%	1.97x
tensorflow	2.4.0	Mobilenetv1	ImageNet	71.10%	71.00%	0.14%	2.88x
tensorflow	2.4.0	ssd_resnet50_v1	Coco	37.90%	38.00%	-0.26%	2.97x
tensorflow	2.4.0	mask_rcnn_inception_v2	Coco	28.90%	29.10%	-0.69%	2.66x
tensorflow	2.4.0	vgg16	ImageNet	72.50%	70.90%	2.26%	3.75x
tensorflow	2.4.0	vgg19	ImageNet	72.40%	71.00%	1.97%	3.79x