diff --git a/export.patch b/export.patch
new file mode 100644
index 00000000000..a4e5d4a6c41
--- /dev/null
+++ b/export.patch
@@ -0,0 +1,881 @@
+diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
+index 2be75293..911d8a06 100644
+--- a/neural_compressor/adaptor/pytorch.py
++++ b/neural_compressor/adaptor/pytorch.py
+@@ -2850,6 +2850,10 @@ class PyTorch_FXAdaptor(TemplateAdaptor):
+                 quantized_ops[op[0]] = torch.quantization.default_dynamic_qconfig
+             else:
+                 quantized_ops[op[0]] = q_cfgs
++        # build for fetching scale and zeropoint 
++        op_config_dict = {}
++        for op in quantizable_ops:
++            op_config_dict[op] = {'weight': {'dtype': 'int8'}, 'activation': {'dtype': 'uint8'}}
+         if self.version.release < Version("1.11.0").release:
+             quantized_ops["default_qconfig"] = None
+         else:
+@@ -2895,6 +2899,7 @@ class PyTorch_FXAdaptor(TemplateAdaptor):
+             'framework': 'pytorch_fx',
+             'reduce_range': REDUCE_RANGE,
+             'quantizable_ops': quantizable_ops,
++            'op': op_config_dict,
+             'sub_module_list': self.sub_module_list,
+             'approach': 'quant_aware_training'
+         }
+@@ -2917,6 +2922,11 @@ class PyTorch_FXAdaptor(TemplateAdaptor):
+             PyTorch_FXAdaptor.convert_sub_graph(self.sub_module_list, \
+                                                 self.model._model, prefix='')
+ 
++        if self.approach != 'post_training_dynamic_quant':
++            self._get_scale_zeropoint(self.model._model, self.model.q_config)
++        self._dump_model_op_stats(self.model._model, self.model.q_config, self.approach)
++        torch_utils.util.get_embedding_contiguous(self.model._model)
++
+     def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kwargs):
+         """Execute the train process on the specified model.
+ 
+diff --git a/neural_compressor/config.py b/neural_compressor/config.py
+index 535eb307..e1c759eb 100644
+--- a/neural_compressor/config.py
++++ b/neural_compressor/config.py
+@@ -717,8 +717,8 @@ class ExportConfig:
+         self,
+         dtype="int8",
+         opset_version=14,
+-        quant_mode="'QDQ'",
+-        sample_inputs=None,
++        quant_format="'QDQ'",
++        example_inputs=None,
+         input_names=None,
+         output_names=None,
+         dynamic_axes=None,
+@@ -726,8 +726,8 @@ class ExportConfig:
+     ):
+         self._dtype = dtype
+         self._opset_version = opset_version
+-        self._quant_mode = quant_mode
+-        self._sample_inputs = sample_inputs
++        self._quant_format = quant_format
++        self._example_inputs = example_inputs
+         self._input_names = input_names
+         self._output_names = output_names
+         self._dynamic_axes = dynamic_axes
+@@ -750,20 +750,20 @@ class ExportConfig:
+         self._opset_version = opset_version
+ 
+     @property
+-    def quant_mode(self):
+-        return self._quant_mode
++    def quant_format(self):
++        return self._quant_format
+ 
+-    @quant_mode.setter
+-    def quant_mode(self, quant_mode):
+-        self._quant_mode = quant_mode
++    @quant_format.setter
++    def quant_format(self, quant_format):
++        self._quant_format = quant_format
+ 
+     @property
+-    def sample_inputs(self):
+-        return self._sample_inputs
++    def example_inputs(self):
++        return self._example_inputs
+ 
+-    @sample_inputs.setter
+-    def sample_inputs(self, sample_inputs):
+-        self._sample_inputs = sample_inputs
++    @example_inputs.setter
++    def example_inputs(self, example_inputs):
++        self._example_inputs = example_inputs
+ 
+     @property
+     def input_names(self):
+@@ -783,7 +783,7 @@ class ExportConfig:
+ 
+     @property
+     def dynamic_axes(self):
+-        return self._output_names
++        return self._dynamic_axes
+ 
+     @dynamic_axes.setter
+     def dynamic_axes(self, dynamic_axes):
+@@ -795,8 +795,8 @@ class Torch2ONNXConfig(ExportConfig):
+         self,
+         dtype="int8",
+         opset_version=14,
+-        quant_mode="'QDQ'",
+-        sample_inputs=None,
++        quant_format="'QDQ'",
++        example_inputs=None,
+         input_names=None,
+         output_names=None,
+         dynamic_axes=None,
+@@ -805,8 +805,8 @@ class Torch2ONNXConfig(ExportConfig):
+         super().__init__(
+             dtype=dtype,
+             opset_version=opset_version,
+-            quant_mode=quant_mode,
+-            sample_inputs=sample_inputs,
++            quant_format=quant_format,
++            example_inputs=example_inputs,
+             input_names=input_names,
+             output_names=output_names,
+             dynamic_axes=dynamic_axes,
+@@ -819,8 +819,8 @@ class TF2ONNXConfig(ExportConfig):
+         self,
+         dtype="int8",
+         opset_version=14,
+-        quant_mode="'QDQ'",
+-        sample_inputs=None,
++        quant_format="'QDQ'",
++        example_inputs=None,
+         input_names=None,
+         output_names=None,
+         dynamic_axes=None,
+@@ -829,8 +829,8 @@ class TF2ONNXConfig(ExportConfig):
+         super().__init__(
+             dtype=dtype,
+             opset_version=opset_version,
+-            quant_mode=quant_mode,
+-            sample_inputs=sample_inputs,
++            quant_format=quant_format,
++            example_inputs=example_inputs,
+             input_names=input_names,
+             output_names=output_names,
+             dynamic_axes=dynamic_axes,
+diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py
+new file mode 100644
+index 00000000..cb9f1bc7
+--- /dev/null
++++ b/neural_compressor/experimental/export/torch2onnx.py
+@@ -0,0 +1,425 @@
++#!/usr/bin/env python
++# -*- coding: utf-8 -*-
++#
++# Copyright (c) 2021 Intel Corporation
++#
++# Licensed under the Apache License, Version 2.0 (the "License");
++# you may not use this file except in compliance with the License.
++# You may obtain a copy of the License at
++#
++#   http://www.apache.org/licenses/LICENSE-2.0
++#
++# Unless required by applicable law or agreed to in writing, software
++# distributed under the License is distributed on an "AS IS" BASIS,
++# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
++# See the License for the specific language governing permissions and
++# limitations under the License.
++
++"""Helper functions to export model from PyTorch/TensorFlow to ONNX"""
++import os
++import numpy as np
++from collections import UserDict
++from neural_compressor.adaptor.torch_utils.util import input2tuple
++from neural_compressor.utils import logger
++from neural_compressor.utils.utility import LazyImport
++
++
++torch = LazyImport('torch')
++onnx = LazyImport('onnx')
++ort = LazyImport('onnxruntime')
++ortq = LazyImport('onnxruntime.quantization')
++
++
++class DummyDataReader(ortq.CalibrationDataReader):
++    def __init__(self, fp32_onnx_path):
++        session = ort.InferenceSession(fp32_onnx_path, None)
++        input_tensors = session.get_inputs()
++        input = {}
++        for node in input_tensors:
++            shape = []
++            for dim in node.shape:
++                shape.append(dim if isinstance(dim, int) else 1)
++            tmp = node.type.lstrip('tensor(').rstrip(')')
++            dtype = eval(f'np.{tmp}')
++            input[node.name] = np.ones(shape).astype(dtype)
++        self.data = [input]
++        self.data = iter(self.data)
++    def get_next(self):
++        return next(self.data, None)
++
++def update_weight_bias(
++    int8_model,
++    fp32_onnx_path,
++):
++    """Update wegiht and bias of FP32 ONNX model with QAT INT8 PyTorch model .
++
++    Args:
++        int8_model (torch.nn.module): int8 model.
++        fp32_onnx_path (str): path to fp32 onnx model.
++    """
++    # collect weights, bias from int8 PT model
++    fp32_onnx_model = onnx.load(fp32_onnx_path)
++    model_dict = int8_model.state_dict()
++    int8_model_dict = {}
++    for name, param in model_dict.items():
++        # '_packed_params._packed_weight' is specific for quantized Embedding
++        if '_packed_params._packed_weight' in name:
++            name = name.replace('._packed_params._packed_weight', '').split('.module')[0]
++            int8_model_dict[name+'.weight'] = param.dequantize()
++        # '_packed_params._packed_params' is specific for quantized Linear
++        elif '_packed_params._packed_params' in name and isinstance(param, tuple):
++            name = name.replace('._packed_params._packed_params', '').split('.module')[0]
++            int8_model_dict[name+'.bias'] = param[1]
++            int8_model_dict[name+'.weight'] = param[0].dequantize()
++        # '.weight' and '.bias' is specific for quantized Conv
++        elif '.weight' in name:
++            int8_model_dict[name] = param.dequantize()
++        elif '.bias' in name:
++            int8_model_dict[name] = param
++        else:
++            int8_model_dict[name] = param
++
++    # replace weight and bias in onnx fp32 model for QAT
++    from onnx import helper
++    tensor_list = [tensor for tensor in fp32_onnx_model.graph.initializer]
++    for tensor in tensor_list:
++        if tensor.name in int8_model_dict:
++            np_tensor = int8_model_dict[tensor.name].detach().cpu().numpy()
++            new_tensor = helper.make_tensor(
++                name=tensor.name,
++                data_type=tensor.data_type,
++                dims=tensor.dims,
++                vals=np_tensor,
++            )
++            fp32_onnx_model.graph.initializer.remove(tensor)
++            fp32_onnx_model.graph.initializer.append(new_tensor)
++    onnx.save(fp32_onnx_model, fp32_onnx_path)
++
++
++def set_data_type(
++    dtype,
++):
++    """set data type of activation and weight with string dtype
++
++    Args:
++        dtype (str): data type description
++    """
++    # Get data type for activation and weight from dtype
++    if 'U8U8' in dtype:   # pragma: no cover
++        activation_type = ortq.QuantType.QUInt8
++        weight_type = ortq.QuantType.QUInt8
++    elif 'S8S8' in dtype:   # pragma: no cover
++        activation_type = ortq.QuantType.QInt8
++        weight_type = ortq.QuantType.QInt8
++    elif 'U8S8' in dtype:
++        activation_type = ortq.QuantType.QUInt8
++        weight_type = ortq.QuantType.QInt8
++    else:   # pragma: no cover 
++        logger.error("Right now, we don't support dtype: {}, \
++                        please use U8U8/U8S8/S8S8.".format(dtype))
++    logger.info("Weight type: {}.".format(weight_type))
++    logger.info("Activation type: {}.".format(activation_type))
++    return activation_type, weight_type
++
++def get_quantizable_onnx_ops(
++    q_config,
++    fp32_onnx_path,
++):
++    fp32_onnx_model = onnx.load(fp32_onnx_path)
++    # Clarify ONNX nodes that we can mapping from PyTorch
++    if 'dynamic' in q_config['approach']:
++        op_types_to_quantize=['MatMul', 'Gather', "LSTM"]
++        pytorch_op_types_to_quantize=['Linear', 'Embedding', "LSTM"]
++    else:
++        op_types_to_quantize=['MatMul', 'Gather', 'Conv']
++        pytorch_op_types_to_quantize=['Linear', 'Embedding', 'Conv1d', 'Conv2d']
++
++    addition_op_to_quantize = []
++
++    # if 'U8S8' in dtype:
++    #     op_types_to_quantize.remove('Gather')
++    #     pytorch_op_types_to_quantize.remove('Embedding')
++
++    all_op_types_to_quantize = op_types_to_quantize + addition_op_to_quantize
++
++    from neural_compressor.adaptor.onnxrt import ONNXRTAdaptor
++    # pylint: disable=E1120
++    fp32_onnx_model = ONNXRTAdaptor._replace_gemm_with_matmul(fp32_onnx_model).model
++    onnx.save(fp32_onnx_model, fp32_onnx_path)
++
++    # Get weight name from onnx initializer
++    weight_name_list = []
++    for tensor in fp32_onnx_model.graph.initializer:
++        weight_name_list.append(tensor.name)
++
++    # Match weight name with onnx node name
++    quantize_nodes = []
++    tmp_node_mapping = {}
++    module_node_mapping = {}
++    for node in fp32_onnx_model.graph.node:
++        if node.op_type not in op_types_to_quantize:
++            for inp in node.input:
++                if inp in weight_name_list and 'weight' in inp:
++                    tmp_node_mapping.update({node.output[0] : inp.split('.weight')[0]})
++                elif inp in tmp_node_mapping:
++                    tmp_node_mapping.update({node.output[0] : tmp_node_mapping[inp]})
++        else:
++            for inp in node.input:
++                if inp in weight_name_list and 'weight' in inp:
++                    module_node_mapping.update({inp.split('.weight')[0] : node.name})
++                elif inp in tmp_node_mapping:
++                    module_node_mapping.update({tmp_node_mapping[inp]: node.name})
++
++        # Save all quantizable node name
++        if node.op_type in all_op_types_to_quantize:
++            quantize_nodes.append(node.name)
++
++    # Match pytorch module name with onnx node name for fallbacked fp32 module
++    for k, v in q_config['op'].items():   # pragma: no cover
++        if k[1] not in pytorch_op_types_to_quantize or 'int8' in v['weight']['dtype']:
++            continue
++        k_0 = k[0].split('.module')[0] if k[0] not in module_node_mapping else k[0]
++        if k_0 in module_node_mapping:
++            fallback_op = module_node_mapping[k_0]
++            quantize_nodes.remove(fallback_op)
++    return quantize_nodes, module_node_mapping
++
++
++
++def get_scale_info(
++    int8_model,
++    q_config,
++):
++    # get output scale and zp from module
++    int8_scale_info = {}
++    import torch.nn.quantized.modules as q_modules
++    for name, module in int8_model.named_modules():
++        if isinstance(module, q_modules.Conv1d) or \
++            isinstance(module, q_modules.Conv2d) or \
++            isinstance(module, q_modules.Linear):
++            int8_scale_info[name] = {
++                'output_scale': module.scale,
++                'output_zeropoint': module.zero_point,
++            }
++
++    # a name mapping to avoid '_' and '.' mismatch, we only use '.'.
++    new_name_mapping = {}
++    for name in int8_scale_info.keys():
++        new_name = name.replace("_", '.')
++        new_name_mapping.update({new_name: name})
++
++    # get input scale and zp from q_config
++    for name, value in q_config['get_attr'].items():
++        node_name, node_target = name.split('--')
++        if 'scale' in name:
++            value_dict = {'input_scale': value}
++        if 'zero_point' in name:
++            value_dict = {'input_zeropoint': value}
++        if node_name:
++            node_name = node_name + '.'
++        if '_input_' in node_target:
++            tmp_name = node_name + node_target.split('_input_')[0]
++            tmp_name = tmp_name.replace("_", '.')
++        # avoid layernorm from qat.
++        if tmp_name in new_name_mapping:
++            node_name = new_name_mapping[tmp_name]
++            int8_scale_info[node_name].update(value_dict)
++    return int8_scale_info
++
++def build_scale_mapping(
++    fp32_onnx_path,
++    module_node_mapping,
++    int8_scale_info,
++):
++    node_module_mapping = {}
++    for module_name, node_name in module_node_mapping.items():
++        node_module_mapping[node_name] = module_name
++    # match scale and zeropoint from PyTorch to ONNX node
++    scale_zp_dict = {}
++    fp32_onnx_model = onnx.load(fp32_onnx_path)
++    for node in fp32_onnx_model.graph.node:
++        if node.name in node_module_mapping:
++            module_name = node_module_mapping[node.name]
++            if module_name not in int8_scale_info:
++                module_name = module_name + '.module'
++            if module_name in int8_scale_info:
++                recoder = int8_scale_info[module_name]
++                input_scale_args = node.input[0] + '_scale'
++                input_zp_args = node.input[0] + '_zero_point'
++                scale_zp_dict[input_scale_args] = recoder['input_scale']
++                scale_zp_dict[input_zp_args] = recoder['input_zeropoint']
++                ### We need Matmul+Add to match Linear for output scale and zero-point
++                # output_scale_args = node.output[0] + '_scale'
++                # output_zp_args = node.output[0] + '_zero_point'
++                # scale_zp_dict[output_scale_args] = recoder['output_scale']
++                # scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
++    return scale_zp_dict
++
++def set_scale_info(
++    int8_onnx_path,
++    scale_zp_dict,
++    activation_type,
++):
++    # set scale and zeropoint from PyTorch int8 model to ONNX int8 model
++    from onnx import helper
++    int8_onnx_model = onnx.load(int8_onnx_path)
++    tensor_list = [tensor for tensor in int8_onnx_model.graph.initializer]
++    for tensor in tensor_list:
++        if tensor.name in scale_zp_dict:
++            value = scale_zp_dict[tensor.name]
++            if 'zero_point' in tensor.name and activation_type == ortq.QuantType.QInt8:
++                value -= 128
++            new_tensor = helper.make_tensor(
++                name=tensor.name,
++                data_type=tensor.data_type,
++                dims=tensor.dims,
++                vals=[value],
++            )
++            int8_onnx_model.graph.initializer.remove(tensor)
++            int8_onnx_model.graph.initializer.append(new_tensor)
++    onnx.save(int8_onnx_model, int8_onnx_path)
++
++def torch_to_fp32_onnx(
++    fp32_model,
++    save_path,
++    example_inputs,
++    opset_version=14,
++    dynamic_axes={"input": {0: "batch_size"},
++                  "output": {0: "batch_size"}},
++    input_names=None,
++    output_names=None,
++    do_constant_folding=True,
++    verbose=True,
++):
++    """Export FP32 PyTorch model into FP32 ONNX model.
++
++    Args:
++        fp32_model (torch.nn.module): fp32 model.
++        int8_model (torch.nn.module): int8 model.
++        save_path (str): save path of ONNX model.
++        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
++        opset_version (int, optional): opset version. Defaults to 14.
++        dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, 
++                                                                  "output": {0: "batch_size"}}.
++        input_names (list, optional): input names. Defaults to None.
++        output_names (list, optional): output names. Defaults to None.
++        do_constant_folding (bool, optional): do constant folding or not. Defaults to True.
++        verbose (bool, optional): dump verbose or not. Defaults to True.
++    """
++    if input_names:
++        example_input_names = input_names
++    else:
++        example_input_names = ['input']
++        if isinstance(example_inputs, dict) or isinstance(example_inputs, UserDict):
++            example_input_names = list(example_inputs.keys())
++
++    torch.onnx.export(
++        fp32_model,
++        input2tuple(example_inputs),
++        save_path,
++        opset_version=opset_version,
++        input_names=example_input_names,
++        output_names=output_names,
++        dynamic_axes=dynamic_axes,
++        do_constant_folding=do_constant_folding,
++    )
++    if verbose:
++        info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
++        logger.info("*"*len(info))
++        logger.info(info)
++        logger.info("*"*len(info))
++
++def torch_to_int8_onnx(
++    fp32_model,
++    int8_model,
++    q_config,
++    save_path,
++    example_inputs,
++    opset_version: int = 14,
++    dynamic_axes: dict = {"input": {0: "batch_size"},
++                          "output": {0: "batch_size"}},
++    input_names=None,
++    output_names=None,
++    quant_format: str = 'QDQ',
++    dtype: str = 'U8S8',
++):
++    """Export INT8 PyTorch model into INT8 ONNX model
++
++    Args:
++        fp32_model (torch.nn.module): fp32 model.
++        int8_model (torch.nn.module): int8 model.
++        q_config (dict): containing quantization configuration.
++        save_path (str): save path of ONNX model.
++        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
++        opset_version (int, optional): opset version. Defaults to 14.
++        dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, 
++                                                                  "output": {0: "batch_size"}}.
++        input_names (list, optional): input names. Defaults to None.
++        output_names (list, optional): output names. Defaults to None.
++        quant_format (str, optional): quantization format of ONNX model. Defaults to 'QDQ'.
++        dtype (str, optional): data types of activation and weight of ONNX model. Defaults to 'U8S8'.
++    """
++    if quant_format == 'QDQ' and opset_version < 13:   # pragma: no cover 
++        opset_version = 13
++        logger.warning("QDQ format requires opset_version >= 13, " + 
++                        "we reset opset_version={} here".format(opset_version))
++
++    activation_type, weight_type = set_data_type(dtype)
++
++    # pylint: disable=E1101
++    fp32_onnx_path = save_path + '.tmp' if save_path else 'int8-model.onnx.tmp'
++    torch_to_fp32_onnx(
++        fp32_model,
++        fp32_onnx_path,
++        example_inputs,
++        opset_version=opset_version,
++        input_names=input_names,
++        output_names=output_names,
++        dynamic_axes=dynamic_axes,
++        do_constant_folding=False,
++        verbose=False,
++    )
++
++    quantize_nodes, module_node_mapping = get_quantizable_onnx_ops(q_config, fp32_onnx_path)
++
++    if q_config['approach'] == 'quant_aware_training':
++        update_weight_bias(int8_model, fp32_onnx_path)
++    if q_config['approach'] != 'post_training_dynamic_quant':
++        int8_scale_info = get_scale_info(int8_model, q_config)
++        scale_mapping = build_scale_mapping(fp32_onnx_path, module_node_mapping, int8_scale_info)
++
++    quant_format = ortq.QuantFormat.QOperator if quant_format != 'QDQ' else ortq.QuantFormat.QDQ
++
++    if q_config['approach'] == 'post_training_dynamic_quant':
++        ortq.quantize_dynamic(
++            fp32_onnx_path,
++            save_path,
++            per_channel=True,
++            weight_type=weight_type,
++            nodes_to_quantize=quantize_nodes,
++            nodes_to_exclude=[],
++            extra_options={}
++        )
++
++    else:
++        dummy_datareader = DummyDataReader(fp32_onnx_path)
++        ortq.quantize_static(
++            fp32_onnx_path,
++            save_path,
++            dummy_datareader,
++            quant_format=quant_format,
++            per_channel=True,
++            weight_type=weight_type,
++            activation_type=activation_type,
++            nodes_to_quantize=quantize_nodes,
++            nodes_to_exclude=[],
++            extra_options={'OpTypesToExcludeOutputQuantizatioin': ['MatMul']},
++        )
++
++        set_scale_info(save_path, scale_mapping, activation_type)
++
++    os.remove(fp32_onnx_path)
++    info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
++    logger.info("*"*len(info))
++    logger.info(info)
++    logger.info("*"*len(info))
+diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
+index 06727a92..32a65cbf 100644
+--- a/neural_compressor/model/torch_model.py
++++ b/neural_compressor/model/torch_model.py
+@@ -20,7 +20,6 @@ import os
+ import inspect
+ import sys
+ from collections import OrderedDict, UserDict
+-from abc import abstractmethod
+ from ..adaptor.torch_utils.util import input2tuple
+ from neural_compressor.utils.utility import LazyImport, compute_sparsity
+ from neural_compressor.utils import logger
+@@ -46,8 +45,41 @@ class PyTorchBaseModel(torch.nn.Module, BaseModel):
+         self.q_config = None
+         self._workspace_path = ''
+         self.is_quantized = False
++        try:
++            self.fp32_model = copy.deepcopy(model)
++        except Exception as e:  # pragma: no cover
++            logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(
++                repr(e)))
++            self.fp32_model = model
+         self.kwargs = kwargs if kwargs else None
+ 
++    def __repr__(self):
++        # rewirte this func to avoid printing fp32_model
++        from torch.nn.modules.module import _addindent
++        # We treat the extra repr like the sub-module, one item per line
++        extra_lines = []
++        extra_repr = self.extra_repr()
++        # empty string will be split into list ['']
++        if extra_repr:
++            extra_lines = extra_repr.split('\n')
++        child_lines = []
++        for key, module in self._modules.items():
++            if key == 'fp32_model':
++                continue
++            mod_str = repr(module)
++            mod_str = _addindent(mod_str, 2)
++            child_lines.append('(' + key + '): ' + mod_str)
++        lines = extra_lines + child_lines
++        main_str = self._get_name() + '('
++        if lines:
++            # simple one-liner info, which most builtin Modules will use
++            if len(extra_lines) == 1 and not child_lines:
++                main_str += extra_lines[0]
++            else:
++                main_str += '\n  ' + '\n  '.join(lines) + '\n'
++        main_str += ')'
++        return main_str
++
+     def forward(self, *args, **kwargs):
+         return self._model(*args, **kwargs)
+ 
+@@ -624,9 +656,38 @@ class PyTorchModel(PyTorchBaseModel):
+         save_path: str,
+         conf,
+     ):
+-        # TODO
+-        from neural_compressor.config import Torch2ONNXConfig
+-        pass
++        from neural_compressor.experimental.export.torch2onnx import (
++            torch_to_fp32_onnx, 
++            torch_to_int8_onnx
++        )
++        if conf.dtype == 'int8':
++            torch_to_int8_onnx(
++                self.fp32_model,
++                self.model,
++                self.q_config,
++                save_path,
++                conf.example_inputs,
++                opset_version=conf.opset_version,
++                dynamic_axes=conf.dynamic_axes,
++                input_names=conf.input_names,
++                output_names=conf.output_names,
++                quant_format=conf.quant_format,
++                dtype='U8S8',
++            )
++        elif conf.dtype == 'fp32':
++            torch_to_fp32_onnx(
++                self.fp32_model,
++                save_path,
++                conf.example_inputs,
++                opset_version=conf.opset_version,
++                dynamic_axes=conf.dynamic_axes,
++                input_names=conf.input_names,
++                output_names=conf.output_names,
++                do_constant_folding=True,
++                verbose=True,
++            )
++        else:   # pragma: no cover
++            assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype)
+ 
+ 
+ class PyTorchFXModel(PyTorchModel):
+diff --git a/test/export/test_torch2onnx.py b/test/export/test_torch2onnx.py
+new file mode 100644
+index 00000000..9c874476
+--- /dev/null
++++ b/test/export/test_torch2onnx.py
+@@ -0,0 +1,203 @@
++import os
++import copy
++import shutil
++import torch
++import unittest
++import numpy as np
++from neural_compressor import quantization
++from neural_compressor.experimental.common import Model
++from neural_compressor.config import Torch2ONNXConfig
++from neural_compressor.experimental.data.datasets.dataset import DATASETS
++from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig
++from neural_compressor.training import prepare_compression
++from neural_compressor.data import DATASETS, DATALOADERS
++from transformers import AutoModelForSequenceClassification, AutoTokenizer
++import torch.utils.data as data
++
++
++def train_func_cv(compression_manager, model):
++    compression_manager.callbacks.on_train_begin()
++    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
++    model.train()
++    input = torch.randn(1, 3, 224, 224)
++    output = model(input)
++    loss = output[0].mean() if isinstance(output, tuple) else output.mean()
++    optimizer.zero_grad()
++    loss.backward()
++    optimizer.step()
++    compression_manager.callbacks.on_train_end()
++    return model
++
++def train_func_nlp(compression_manager, model, input):
++    compression_manager.callbacks.on_train_begin()
++    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
++    model.train()
++    output = model(**input)
++    loss = output.logits[0][0]
++    optimizer.zero_grad()
++    loss.backward()
++    optimizer.step()
++    compression_manager.callbacks.on_train_end()
++    return model
++
++def check_CV_onnx(model_path, dataloader):
++    import onnxruntime as ort
++    ort_session = ort.InferenceSession(model_path)
++    it = iter(dataloader)
++    input = next(it)
++    input_dict = {'input': input[0].detach().cpu().numpy()}
++    ort_session.run(None, input_dict)
++    return True
++
++def check_NLP_onnx(model_path, input):
++    import onnxruntime as ort
++    ort_session = ort.InferenceSession(model_path, None)
++    input_dict = {}
++    for k, v in input.items():
++        input_dict[k] = np.array(v)
++    ort_session.run(None, input_dict)
++    return True
++
++
++class DummyNLPDataloader(object):
++    def __init__(self, model_name):
++        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
++        self.sequence_a = "intel-extension-for-transformers is based in SH"
++        self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH"
++        self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt')
++        self.encoded_dict['labels'] = 1
++        self.batch_size = 1
++
++    def __iter__(self):
++        yield self.encoded_dict
++
++    def __next__(self):
++        return self.encoded_dict
++
++class TestPytorch2ONNX(unittest.TestCase):
++    @classmethod
++    def setUpClass(self):
++        from torchvision.models.quantization import resnet18
++        self.cv_model = resnet18()
++        self.cv_dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224))
++        self.cv_dataloader = DATALOADERS["pytorch"](self.cv_dataset)
++        self.nlp_model = AutoModelForSequenceClassification.from_pretrained(
++            "distilbert-base-uncased-finetuned-sst-2-english"
++        )
++        self.nlp_dataloader = DummyNLPDataloader(
++            "distilbert-base-uncased-finetuned-sst-2-english"
++        )
++        input = next(self.nlp_dataloader)
++        input.pop('labels')
++        self.nlp_input = input
++
++    @classmethod
++    def tearDownClass(self):
++        shutil.rmtree('runs', ignore_errors=True)
++        os.remove('fp32-cv-model.onnx')
++        os.remove('int8-cv-model.onnx')
++        os.remove('fp32-nlp-model.onnx')
++        os.remove('int8-nlp-model.onnx')
++        shutil.rmtree("./saved", ignore_errors=True)
++
++    def test_fp32_CV_models(self):
++        model = self.cv_model
++        inc_model = Model(model)
++        fp32_onnx_config = Torch2ONNXConfig(
++            dtype="fp32",
++            example_inputs=torch.randn(1, 3, 224, 224),
++            input_names=['input'],
++            output_names=['output'],
++            dynamic_axes={"input": {0: "batch_size"},
++                            "output": {0: "batch_size"}},
++        )
++        inc_model.export('fp32-cv-model.onnx', fp32_onnx_config)
++        check_CV_onnx('fp32-cv-model.onnx', self.cv_dataloader)
++
++    def test_int8_CV_models(self):
++        #for fake_yaml in ["dynamic", "qat", "static"]:
++        for fake_yaml in ["dynamic"]:
++            model = self.cv_model
++            if fake_yaml == "qat":
++                quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx')
++                compression_manager = prepare_compression(copy.deepcopy(model), quant_conf)
++                q_model = train_func_cv(compression_manager, compression_manager.model)
++            else:
++                if fake_yaml == "dynamic":
++                    quant_conf = PostTrainingQuantConfig(approach="dynamic")
++                elif fake_yaml == "static":
++                    quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx')
++                q_model = quantization.fit(
++                    model,
++                    quant_conf,
++                    calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None)
++
++            int8_onnx_config = Torch2ONNXConfig(
++                dtype="int8",
++                opset_version=14,
++                quant_format="'QDQ'",
++                example_inputs=torch.randn(1, 3, 224, 224),
++                input_names=['input'],
++                output_names=['output'],
++                dynamic_axes={"input": {0: "batch_size"},
++                              "output": {0: "batch_size"}},
++            )
++            q_model.export('int8-cv-model.onnx', int8_onnx_config)
++            check_CV_onnx('int8-cv-model.onnx', self.cv_dataloader)
++
++    def test_fp32_NLP_models(self):
++        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
++        dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()}
++
++        model = self.nlp_model
++        inc_model = Model(model)
++        fp32_onnx_config = Torch2ONNXConfig(
++            dtype="fp32",
++            example_inputs=tuple(self.nlp_input.values()),
++            input_names=list(self.nlp_input.keys()),
++            output_names=['labels'],
++            dynamic_axes=dynamic_axes,
++        )
++        inc_model.export('fp32-nlp-model.onnx', fp32_onnx_config)
++        check_NLP_onnx('fp32-nlp-model.onnx', self.nlp_input)
++
++    def test_int8_NLP_models(self):
++        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
++        dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()}
++
++        for fake_yaml in ["dynamic", "static", "qat"]:
++            model = self.nlp_model
++            if fake_yaml == "qat":
++                quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx')
++                compression_manager = prepare_compression(copy.deepcopy(model), quant_conf)
++                q_model = train_func_nlp(
++                    compression_manager,
++                    compression_manager.model,
++                    self.nlp_input
++                )
++            else:
++                if fake_yaml == "dynamic":
++                    quant_conf = PostTrainingQuantConfig(approach="dynamic")
++                elif fake_yaml == "static":
++                    quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx')
++                q_model = quantization.fit(
++                    model,
++                    quant_conf,
++                    calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None)
++
++            int8_onnx_config = Torch2ONNXConfig(
++                dtype="int8",
++                opset_version=14,
++                quant_format="'QDQ'",
++                example_inputs=tuple(self.nlp_input.values()),
++                input_names=list(self.nlp_input.keys()),
++                output_names=['labels'],
++                dynamic_axes=dynamic_axes,
++            )
++            q_model.export('int8-nlp-model.onnx', int8_onnx_config)
++            check_NLP_onnx('int8-nlp-model.onnx', self.nlp_input)
++
++if __name__ == "__main__":
++    unittest.main()
++
++
diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 01885ebe682..68c63a5afc1 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -2851,6 +2851,10 @@ def _pre_hook_for_qat(self, dataloader=None):
                 quantized_ops[op[0]] = torch.quantization.default_dynamic_qconfig
             else:
                 quantized_ops[op[0]] = q_cfgs
+        # build for fetching scale and zeropoint 
+        op_config_dict = {}
+        for op in quantizable_ops:
+            op_config_dict[op] = {'weight': {'dtype': 'int8'}, 'activation': {'dtype': 'uint8'}}
         if self.version.release < Version("1.11.0").release:
             quantized_ops["default_qconfig"] = None
         else:
@@ -2896,6 +2900,7 @@ def _pre_hook_for_qat(self, dataloader=None):
             'framework': 'pytorch_fx',
             'reduce_range': REDUCE_RANGE,
             'quantizable_ops': quantizable_ops,
+            'op': op_config_dict,
             'sub_module_list': self.sub_module_list,
             'approach': 'quant_aware_training'
         }
@@ -2918,6 +2923,11 @@ def _post_hook_for_qat(self):
             PyTorch_FXAdaptor.convert_sub_graph(self.sub_module_list, \
                                                 self.model._model, prefix='')
 
+        if self.approach != 'post_training_dynamic_quant':
+            self._get_scale_zeropoint(self.model._model, self.model.q_config)
+        self._dump_model_op_stats(self.model._model, self.model.q_config, self.approach)
+        torch_utils.util.get_embedding_contiguous(self.model._model)
+
     def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, **kwargs):
         """Execute the train process on the specified model.
 
diff --git a/neural_compressor/config.py b/neural_compressor/config.py
index 535eb307a28..e1c759eb676 100644
--- a/neural_compressor/config.py
+++ b/neural_compressor/config.py
@@ -717,8 +717,8 @@ def __init__(
         self,
         dtype="int8",
         opset_version=14,
-        quant_mode="'QDQ'",
-        sample_inputs=None,
+        quant_format="'QDQ'",
+        example_inputs=None,
         input_names=None,
         output_names=None,
         dynamic_axes=None,
@@ -726,8 +726,8 @@ def __init__(
     ):
         self._dtype = dtype
         self._opset_version = opset_version
-        self._quant_mode = quant_mode
-        self._sample_inputs = sample_inputs
+        self._quant_format = quant_format
+        self._example_inputs = example_inputs
         self._input_names = input_names
         self._output_names = output_names
         self._dynamic_axes = dynamic_axes
@@ -750,20 +750,20 @@ def opset_version(self, opset_version):
         self._opset_version = opset_version
 
     @property
-    def quant_mode(self):
-        return self._quant_mode
+    def quant_format(self):
+        return self._quant_format
 
-    @quant_mode.setter
-    def quant_mode(self, quant_mode):
-        self._quant_mode = quant_mode
+    @quant_format.setter
+    def quant_format(self, quant_format):
+        self._quant_format = quant_format
 
     @property
-    def sample_inputs(self):
-        return self._sample_inputs
+    def example_inputs(self):
+        return self._example_inputs
 
-    @sample_inputs.setter
-    def sample_inputs(self, sample_inputs):
-        self._sample_inputs = sample_inputs
+    @example_inputs.setter
+    def example_inputs(self, example_inputs):
+        self._example_inputs = example_inputs
 
     @property
     def input_names(self):
@@ -783,7 +783,7 @@ def output_names(self, output_names):
 
     @property
     def dynamic_axes(self):
-        return self._output_names
+        return self._dynamic_axes
 
     @dynamic_axes.setter
     def dynamic_axes(self, dynamic_axes):
@@ -795,8 +795,8 @@ def __init__(
         self,
         dtype="int8",
         opset_version=14,
-        quant_mode="'QDQ'",
-        sample_inputs=None,
+        quant_format="'QDQ'",
+        example_inputs=None,
         input_names=None,
         output_names=None,
         dynamic_axes=None,
@@ -805,8 +805,8 @@ def __init__(
         super().__init__(
             dtype=dtype,
             opset_version=opset_version,
-            quant_mode=quant_mode,
-            sample_inputs=sample_inputs,
+            quant_format=quant_format,
+            example_inputs=example_inputs,
             input_names=input_names,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
@@ -819,8 +819,8 @@ def __init__(
         self,
         dtype="int8",
         opset_version=14,
-        quant_mode="'QDQ'",
-        sample_inputs=None,
+        quant_format="'QDQ'",
+        example_inputs=None,
         input_names=None,
         output_names=None,
         dynamic_axes=None,
@@ -829,8 +829,8 @@ def __init__(
         super().__init__(
             dtype=dtype,
             opset_version=opset_version,
-            quant_mode=quant_mode,
-            sample_inputs=sample_inputs,
+            quant_format=quant_format,
+            example_inputs=example_inputs,
             input_names=input_names,
             output_names=output_names,
             dynamic_axes=dynamic_axes,
diff --git a/neural_compressor/experimental/export/torch2onnx.py b/neural_compressor/experimental/export/torch2onnx.py
new file mode 100644
index 00000000000..cb9f1bc7f1d
--- /dev/null
+++ b/neural_compressor/experimental/export/torch2onnx.py
@@ -0,0 +1,425 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Helper functions to export model from PyTorch/TensorFlow to ONNX"""
+import os
+import numpy as np
+from collections import UserDict
+from neural_compressor.adaptor.torch_utils.util import input2tuple
+from neural_compressor.utils import logger
+from neural_compressor.utils.utility import LazyImport
+
+
+torch = LazyImport('torch')
+onnx = LazyImport('onnx')
+ort = LazyImport('onnxruntime')
+ortq = LazyImport('onnxruntime.quantization')
+
+
+class DummyDataReader(ortq.CalibrationDataReader):
+    def __init__(self, fp32_onnx_path):
+        session = ort.InferenceSession(fp32_onnx_path, None)
+        input_tensors = session.get_inputs()
+        input = {}
+        for node in input_tensors:
+            shape = []
+            for dim in node.shape:
+                shape.append(dim if isinstance(dim, int) else 1)
+            tmp = node.type.lstrip('tensor(').rstrip(')')
+            dtype = eval(f'np.{tmp}')
+            input[node.name] = np.ones(shape).astype(dtype)
+        self.data = [input]
+        self.data = iter(self.data)
+    def get_next(self):
+        return next(self.data, None)
+
+def update_weight_bias(
+    int8_model,
+    fp32_onnx_path,
+):
+    """Update wegiht and bias of FP32 ONNX model with QAT INT8 PyTorch model .
+
+    Args:
+        int8_model (torch.nn.module): int8 model.
+        fp32_onnx_path (str): path to fp32 onnx model.
+    """
+    # collect weights, bias from int8 PT model
+    fp32_onnx_model = onnx.load(fp32_onnx_path)
+    model_dict = int8_model.state_dict()
+    int8_model_dict = {}
+    for name, param in model_dict.items():
+        # '_packed_params._packed_weight' is specific for quantized Embedding
+        if '_packed_params._packed_weight' in name:
+            name = name.replace('._packed_params._packed_weight', '').split('.module')[0]
+            int8_model_dict[name+'.weight'] = param.dequantize()
+        # '_packed_params._packed_params' is specific for quantized Linear
+        elif '_packed_params._packed_params' in name and isinstance(param, tuple):
+            name = name.replace('._packed_params._packed_params', '').split('.module')[0]
+            int8_model_dict[name+'.bias'] = param[1]
+            int8_model_dict[name+'.weight'] = param[0].dequantize()
+        # '.weight' and '.bias' is specific for quantized Conv
+        elif '.weight' in name:
+            int8_model_dict[name] = param.dequantize()
+        elif '.bias' in name:
+            int8_model_dict[name] = param
+        else:
+            int8_model_dict[name] = param
+
+    # replace weight and bias in onnx fp32 model for QAT
+    from onnx import helper
+    tensor_list = [tensor for tensor in fp32_onnx_model.graph.initializer]
+    for tensor in tensor_list:
+        if tensor.name in int8_model_dict:
+            np_tensor = int8_model_dict[tensor.name].detach().cpu().numpy()
+            new_tensor = helper.make_tensor(
+                name=tensor.name,
+                data_type=tensor.data_type,
+                dims=tensor.dims,
+                vals=np_tensor,
+            )
+            fp32_onnx_model.graph.initializer.remove(tensor)
+            fp32_onnx_model.graph.initializer.append(new_tensor)
+    onnx.save(fp32_onnx_model, fp32_onnx_path)
+
+
+def set_data_type(
+    dtype,
+):
+    """set data type of activation and weight with string dtype
+
+    Args:
+        dtype (str): data type description
+    """
+    # Get data type for activation and weight from dtype
+    if 'U8U8' in dtype:   # pragma: no cover
+        activation_type = ortq.QuantType.QUInt8
+        weight_type = ortq.QuantType.QUInt8
+    elif 'S8S8' in dtype:   # pragma: no cover
+        activation_type = ortq.QuantType.QInt8
+        weight_type = ortq.QuantType.QInt8
+    elif 'U8S8' in dtype:
+        activation_type = ortq.QuantType.QUInt8
+        weight_type = ortq.QuantType.QInt8
+    else:   # pragma: no cover 
+        logger.error("Right now, we don't support dtype: {}, \
+                        please use U8U8/U8S8/S8S8.".format(dtype))
+    logger.info("Weight type: {}.".format(weight_type))
+    logger.info("Activation type: {}.".format(activation_type))
+    return activation_type, weight_type
+
+def get_quantizable_onnx_ops(
+    q_config,
+    fp32_onnx_path,
+):
+    fp32_onnx_model = onnx.load(fp32_onnx_path)
+    # Clarify ONNX nodes that we can mapping from PyTorch
+    if 'dynamic' in q_config['approach']:
+        op_types_to_quantize=['MatMul', 'Gather', "LSTM"]
+        pytorch_op_types_to_quantize=['Linear', 'Embedding', "LSTM"]
+    else:
+        op_types_to_quantize=['MatMul', 'Gather', 'Conv']
+        pytorch_op_types_to_quantize=['Linear', 'Embedding', 'Conv1d', 'Conv2d']
+
+    addition_op_to_quantize = []
+
+    # if 'U8S8' in dtype:
+    #     op_types_to_quantize.remove('Gather')
+    #     pytorch_op_types_to_quantize.remove('Embedding')
+
+    all_op_types_to_quantize = op_types_to_quantize + addition_op_to_quantize
+
+    from neural_compressor.adaptor.onnxrt import ONNXRTAdaptor
+    # pylint: disable=E1120
+    fp32_onnx_model = ONNXRTAdaptor._replace_gemm_with_matmul(fp32_onnx_model).model
+    onnx.save(fp32_onnx_model, fp32_onnx_path)
+
+    # Get weight name from onnx initializer
+    weight_name_list = []
+    for tensor in fp32_onnx_model.graph.initializer:
+        weight_name_list.append(tensor.name)
+
+    # Match weight name with onnx node name
+    quantize_nodes = []
+    tmp_node_mapping = {}
+    module_node_mapping = {}
+    for node in fp32_onnx_model.graph.node:
+        if node.op_type not in op_types_to_quantize:
+            for inp in node.input:
+                if inp in weight_name_list and 'weight' in inp:
+                    tmp_node_mapping.update({node.output[0] : inp.split('.weight')[0]})
+                elif inp in tmp_node_mapping:
+                    tmp_node_mapping.update({node.output[0] : tmp_node_mapping[inp]})
+        else:
+            for inp in node.input:
+                if inp in weight_name_list and 'weight' in inp:
+                    module_node_mapping.update({inp.split('.weight')[0] : node.name})
+                elif inp in tmp_node_mapping:
+                    module_node_mapping.update({tmp_node_mapping[inp]: node.name})
+
+        # Save all quantizable node name
+        if node.op_type in all_op_types_to_quantize:
+            quantize_nodes.append(node.name)
+
+    # Match pytorch module name with onnx node name for fallbacked fp32 module
+    for k, v in q_config['op'].items():   # pragma: no cover
+        if k[1] not in pytorch_op_types_to_quantize or 'int8' in v['weight']['dtype']:
+            continue
+        k_0 = k[0].split('.module')[0] if k[0] not in module_node_mapping else k[0]
+        if k_0 in module_node_mapping:
+            fallback_op = module_node_mapping[k_0]
+            quantize_nodes.remove(fallback_op)
+    return quantize_nodes, module_node_mapping
+
+
+
+def get_scale_info(
+    int8_model,
+    q_config,
+):
+    # get output scale and zp from module
+    int8_scale_info = {}
+    import torch.nn.quantized.modules as q_modules
+    for name, module in int8_model.named_modules():
+        if isinstance(module, q_modules.Conv1d) or \
+            isinstance(module, q_modules.Conv2d) or \
+            isinstance(module, q_modules.Linear):
+            int8_scale_info[name] = {
+                'output_scale': module.scale,
+                'output_zeropoint': module.zero_point,
+            }
+
+    # a name mapping to avoid '_' and '.' mismatch, we only use '.'.
+    new_name_mapping = {}
+    for name in int8_scale_info.keys():
+        new_name = name.replace("_", '.')
+        new_name_mapping.update({new_name: name})
+
+    # get input scale and zp from q_config
+    for name, value in q_config['get_attr'].items():
+        node_name, node_target = name.split('--')
+        if 'scale' in name:
+            value_dict = {'input_scale': value}
+        if 'zero_point' in name:
+            value_dict = {'input_zeropoint': value}
+        if node_name:
+            node_name = node_name + '.'
+        if '_input_' in node_target:
+            tmp_name = node_name + node_target.split('_input_')[0]
+            tmp_name = tmp_name.replace("_", '.')
+        # avoid layernorm from qat.
+        if tmp_name in new_name_mapping:
+            node_name = new_name_mapping[tmp_name]
+            int8_scale_info[node_name].update(value_dict)
+    return int8_scale_info
+
+def build_scale_mapping(
+    fp32_onnx_path,
+    module_node_mapping,
+    int8_scale_info,
+):
+    node_module_mapping = {}
+    for module_name, node_name in module_node_mapping.items():
+        node_module_mapping[node_name] = module_name
+    # match scale and zeropoint from PyTorch to ONNX node
+    scale_zp_dict = {}
+    fp32_onnx_model = onnx.load(fp32_onnx_path)
+    for node in fp32_onnx_model.graph.node:
+        if node.name in node_module_mapping:
+            module_name = node_module_mapping[node.name]
+            if module_name not in int8_scale_info:
+                module_name = module_name + '.module'
+            if module_name in int8_scale_info:
+                recoder = int8_scale_info[module_name]
+                input_scale_args = node.input[0] + '_scale'
+                input_zp_args = node.input[0] + '_zero_point'
+                scale_zp_dict[input_scale_args] = recoder['input_scale']
+                scale_zp_dict[input_zp_args] = recoder['input_zeropoint']
+                ### We need Matmul+Add to match Linear for output scale and zero-point
+                # output_scale_args = node.output[0] + '_scale'
+                # output_zp_args = node.output[0] + '_zero_point'
+                # scale_zp_dict[output_scale_args] = recoder['output_scale']
+                # scale_zp_dict[output_zp_args] = recoder['output_zeropoint']
+    return scale_zp_dict
+
+def set_scale_info(
+    int8_onnx_path,
+    scale_zp_dict,
+    activation_type,
+):
+    # set scale and zeropoint from PyTorch int8 model to ONNX int8 model
+    from onnx import helper
+    int8_onnx_model = onnx.load(int8_onnx_path)
+    tensor_list = [tensor for tensor in int8_onnx_model.graph.initializer]
+    for tensor in tensor_list:
+        if tensor.name in scale_zp_dict:
+            value = scale_zp_dict[tensor.name]
+            if 'zero_point' in tensor.name and activation_type == ortq.QuantType.QInt8:
+                value -= 128
+            new_tensor = helper.make_tensor(
+                name=tensor.name,
+                data_type=tensor.data_type,
+                dims=tensor.dims,
+                vals=[value],
+            )
+            int8_onnx_model.graph.initializer.remove(tensor)
+            int8_onnx_model.graph.initializer.append(new_tensor)
+    onnx.save(int8_onnx_model, int8_onnx_path)
+
+def torch_to_fp32_onnx(
+    fp32_model,
+    save_path,
+    example_inputs,
+    opset_version=14,
+    dynamic_axes={"input": {0: "batch_size"},
+                  "output": {0: "batch_size"}},
+    input_names=None,
+    output_names=None,
+    do_constant_folding=True,
+    verbose=True,
+):
+    """Export FP32 PyTorch model into FP32 ONNX model.
+
+    Args:
+        fp32_model (torch.nn.module): fp32 model.
+        int8_model (torch.nn.module): int8 model.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, 
+                                                                  "output": {0: "batch_size"}}.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        do_constant_folding (bool, optional): do constant folding or not. Defaults to True.
+        verbose (bool, optional): dump verbose or not. Defaults to True.
+    """
+    if input_names:
+        example_input_names = input_names
+    else:
+        example_input_names = ['input']
+        if isinstance(example_inputs, dict) or isinstance(example_inputs, UserDict):
+            example_input_names = list(example_inputs.keys())
+
+    torch.onnx.export(
+        fp32_model,
+        input2tuple(example_inputs),
+        save_path,
+        opset_version=opset_version,
+        input_names=example_input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        do_constant_folding=do_constant_folding,
+    )
+    if verbose:
+        info = "The FP32 ONNX Model exported to path: {0}".format(save_path)
+        logger.info("*"*len(info))
+        logger.info(info)
+        logger.info("*"*len(info))
+
+def torch_to_int8_onnx(
+    fp32_model,
+    int8_model,
+    q_config,
+    save_path,
+    example_inputs,
+    opset_version: int = 14,
+    dynamic_axes: dict = {"input": {0: "batch_size"},
+                          "output": {0: "batch_size"}},
+    input_names=None,
+    output_names=None,
+    quant_format: str = 'QDQ',
+    dtype: str = 'U8S8',
+):
+    """Export INT8 PyTorch model into INT8 ONNX model
+
+    Args:
+        fp32_model (torch.nn.module): fp32 model.
+        int8_model (torch.nn.module): int8 model.
+        q_config (dict): containing quantization configuration.
+        save_path (str): save path of ONNX model.
+        example_inputs (dict|list|tuple|torch.Tensor): used to trace torch model.
+        opset_version (int, optional): opset version. Defaults to 14.
+        dynamic_axes (dict, optional): dynamic axes. Defaults to {"input": {0: "batch_size"}, 
+                                                                  "output": {0: "batch_size"}}.
+        input_names (list, optional): input names. Defaults to None.
+        output_names (list, optional): output names. Defaults to None.
+        quant_format (str, optional): quantization format of ONNX model. Defaults to 'QDQ'.
+        dtype (str, optional): data types of activation and weight of ONNX model. Defaults to 'U8S8'.
+    """
+    if quant_format == 'QDQ' and opset_version < 13:   # pragma: no cover 
+        opset_version = 13
+        logger.warning("QDQ format requires opset_version >= 13, " + 
+                        "we reset opset_version={} here".format(opset_version))
+
+    activation_type, weight_type = set_data_type(dtype)
+
+    # pylint: disable=E1101
+    fp32_onnx_path = save_path + '.tmp' if save_path else 'int8-model.onnx.tmp'
+    torch_to_fp32_onnx(
+        fp32_model,
+        fp32_onnx_path,
+        example_inputs,
+        opset_version=opset_version,
+        input_names=input_names,
+        output_names=output_names,
+        dynamic_axes=dynamic_axes,
+        do_constant_folding=False,
+        verbose=False,
+    )
+
+    quantize_nodes, module_node_mapping = get_quantizable_onnx_ops(q_config, fp32_onnx_path)
+
+    if q_config['approach'] == 'quant_aware_training':
+        update_weight_bias(int8_model, fp32_onnx_path)
+    if q_config['approach'] != 'post_training_dynamic_quant':
+        int8_scale_info = get_scale_info(int8_model, q_config)
+        scale_mapping = build_scale_mapping(fp32_onnx_path, module_node_mapping, int8_scale_info)
+
+    quant_format = ortq.QuantFormat.QOperator if quant_format != 'QDQ' else ortq.QuantFormat.QDQ
+
+    if q_config['approach'] == 'post_training_dynamic_quant':
+        ortq.quantize_dynamic(
+            fp32_onnx_path,
+            save_path,
+            per_channel=True,
+            weight_type=weight_type,
+            nodes_to_quantize=quantize_nodes,
+            nodes_to_exclude=[],
+            extra_options={}
+        )
+
+    else:
+        dummy_datareader = DummyDataReader(fp32_onnx_path)
+        ortq.quantize_static(
+            fp32_onnx_path,
+            save_path,
+            dummy_datareader,
+            quant_format=quant_format,
+            per_channel=True,
+            weight_type=weight_type,
+            activation_type=activation_type,
+            nodes_to_quantize=quantize_nodes,
+            nodes_to_exclude=[],
+            extra_options={'OpTypesToExcludeOutputQuantizatioin': ['MatMul']},
+        )
+
+        set_scale_info(save_path, scale_mapping, activation_type)
+
+    os.remove(fp32_onnx_path)
+    info = "The INT8 ONNX Model is exported to path: {0}".format(save_path)
+    logger.info("*"*len(info))
+    logger.info(info)
+    logger.info("*"*len(info))
diff --git a/neural_compressor/model/torch_model.py b/neural_compressor/model/torch_model.py
index 06727a92a0c..32a65cbfb01 100644
--- a/neural_compressor/model/torch_model.py
+++ b/neural_compressor/model/torch_model.py
@@ -20,7 +20,6 @@
 import inspect
 import sys
 from collections import OrderedDict, UserDict
-from abc import abstractmethod
 from ..adaptor.torch_utils.util import input2tuple
 from neural_compressor.utils.utility import LazyImport, compute_sparsity
 from neural_compressor.utils import logger
@@ -46,8 +45,41 @@ def __init__(self, model, **kwargs):
         self.q_config = None
         self._workspace_path = ''
         self.is_quantized = False
+        try:
+            self.fp32_model = copy.deepcopy(model)
+        except Exception as e:  # pragma: no cover
+            logger.warning("Fail to deep copy the model due to {}, inplace is used now.".format(
+                repr(e)))
+            self.fp32_model = model
         self.kwargs = kwargs if kwargs else None
 
+    def __repr__(self):
+        # rewirte this func to avoid printing fp32_model
+        from torch.nn.modules.module import _addindent
+        # We treat the extra repr like the sub-module, one item per line
+        extra_lines = []
+        extra_repr = self.extra_repr()
+        # empty string will be split into list ['']
+        if extra_repr:
+            extra_lines = extra_repr.split('\n')
+        child_lines = []
+        for key, module in self._modules.items():
+            if key == 'fp32_model':
+                continue
+            mod_str = repr(module)
+            mod_str = _addindent(mod_str, 2)
+            child_lines.append('(' + key + '): ' + mod_str)
+        lines = extra_lines + child_lines
+        main_str = self._get_name() + '('
+        if lines:
+            # simple one-liner info, which most builtin Modules will use
+            if len(extra_lines) == 1 and not child_lines:
+                main_str += extra_lines[0]
+            else:
+                main_str += '\n  ' + '\n  '.join(lines) + '\n'
+        main_str += ')'
+        return main_str
+
     def forward(self, *args, **kwargs):
         return self._model(*args, **kwargs)
 
@@ -624,9 +656,38 @@ def export(
         save_path: str,
         conf,
     ):
-        # TODO
-        from neural_compressor.config import Torch2ONNXConfig
-        pass
+        from neural_compressor.experimental.export.torch2onnx import (
+            torch_to_fp32_onnx, 
+            torch_to_int8_onnx
+        )
+        if conf.dtype == 'int8':
+            torch_to_int8_onnx(
+                self.fp32_model,
+                self.model,
+                self.q_config,
+                save_path,
+                conf.example_inputs,
+                opset_version=conf.opset_version,
+                dynamic_axes=conf.dynamic_axes,
+                input_names=conf.input_names,
+                output_names=conf.output_names,
+                quant_format=conf.quant_format,
+                dtype='U8S8',
+            )
+        elif conf.dtype == 'fp32':
+            torch_to_fp32_onnx(
+                self.fp32_model,
+                save_path,
+                conf.example_inputs,
+                opset_version=conf.opset_version,
+                dynamic_axes=conf.dynamic_axes,
+                input_names=conf.input_names,
+                output_names=conf.output_names,
+                do_constant_folding=True,
+                verbose=True,
+            )
+        else:   # pragma: no cover
+            assert False, "Not allowed dtype: {}, pleas use 'fp32' or 'int8'.".format(conf.dtype)
 
 
 class PyTorchFXModel(PyTorchModel):
diff --git a/test/export/test_torch2onnx.py b/test/export/test_torch2onnx.py
new file mode 100644
index 00000000000..9c874476395
--- /dev/null
+++ b/test/export/test_torch2onnx.py
@@ -0,0 +1,203 @@
+import os
+import copy
+import shutil
+import torch
+import unittest
+import numpy as np
+from neural_compressor import quantization
+from neural_compressor.experimental.common import Model
+from neural_compressor.config import Torch2ONNXConfig
+from neural_compressor.experimental.data.datasets.dataset import DATASETS
+from neural_compressor import PostTrainingQuantConfig, QuantizationAwareTrainingConfig
+from neural_compressor.training import prepare_compression
+from neural_compressor.data import DATASETS, DATALOADERS
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+import torch.utils.data as data
+
+
+def train_func_cv(compression_manager, model):
+    compression_manager.callbacks.on_train_begin()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+    model.train()
+    input = torch.randn(1, 3, 224, 224)
+    output = model(input)
+    loss = output[0].mean() if isinstance(output, tuple) else output.mean()
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    compression_manager.callbacks.on_train_end()
+    return model
+
+def train_func_nlp(compression_manager, model, input):
+    compression_manager.callbacks.on_train_begin()
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.0001)
+    model.train()
+    output = model(**input)
+    loss = output.logits[0][0]
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+    compression_manager.callbacks.on_train_end()
+    return model
+
+def check_CV_onnx(model_path, dataloader):
+    import onnxruntime as ort
+    ort_session = ort.InferenceSession(model_path)
+    it = iter(dataloader)
+    input = next(it)
+    input_dict = {'input': input[0].detach().cpu().numpy()}
+    ort_session.run(None, input_dict)
+    return True
+
+def check_NLP_onnx(model_path, input):
+    import onnxruntime as ort
+    ort_session = ort.InferenceSession(model_path, None)
+    input_dict = {}
+    for k, v in input.items():
+        input_dict[k] = np.array(v)
+    ort_session.run(None, input_dict)
+    return True
+
+
+class DummyNLPDataloader(object):
+    def __init__(self, model_name):
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+        self.sequence_a = "intel-extension-for-transformers is based in SH"
+        self.sequence_b = "Where is intel-extension-for-transformers based? NYC or SH"
+        self.encoded_dict = self.tokenizer(self.sequence_a, self.sequence_b, return_tensors='pt')
+        self.encoded_dict['labels'] = 1
+        self.batch_size = 1
+
+    def __iter__(self):
+        yield self.encoded_dict
+
+    def __next__(self):
+        return self.encoded_dict
+
+class TestPytorch2ONNX(unittest.TestCase):
+    @classmethod
+    def setUpClass(self):
+        from torchvision.models.quantization import resnet18
+        self.cv_model = resnet18()
+        self.cv_dataset = DATASETS("pytorch")["dummy"]((10, 3, 224, 224))
+        self.cv_dataloader = DATALOADERS["pytorch"](self.cv_dataset)
+        self.nlp_model = AutoModelForSequenceClassification.from_pretrained(
+            "distilbert-base-uncased-finetuned-sst-2-english"
+        )
+        self.nlp_dataloader = DummyNLPDataloader(
+            "distilbert-base-uncased-finetuned-sst-2-english"
+        )
+        input = next(self.nlp_dataloader)
+        input.pop('labels')
+        self.nlp_input = input
+
+    @classmethod
+    def tearDownClass(self):
+        shutil.rmtree('runs', ignore_errors=True)
+        os.remove('fp32-cv-model.onnx')
+        os.remove('int8-cv-model.onnx')
+        os.remove('fp32-nlp-model.onnx')
+        os.remove('int8-nlp-model.onnx')
+        shutil.rmtree("./saved", ignore_errors=True)
+
+    def test_fp32_CV_models(self):
+        model = self.cv_model
+        inc_model = Model(model)
+        fp32_onnx_config = Torch2ONNXConfig(
+            dtype="fp32",
+            example_inputs=torch.randn(1, 3, 224, 224),
+            input_names=['input'],
+            output_names=['output'],
+            dynamic_axes={"input": {0: "batch_size"},
+                            "output": {0: "batch_size"}},
+        )
+        inc_model.export('fp32-cv-model.onnx', fp32_onnx_config)
+        check_CV_onnx('fp32-cv-model.onnx', self.cv_dataloader)
+
+    def test_int8_CV_models(self):
+        #for fake_yaml in ["dynamic", "qat", "static"]:
+        for fake_yaml in ["dynamic"]:
+            model = self.cv_model
+            if fake_yaml == "qat":
+                quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx')
+                compression_manager = prepare_compression(copy.deepcopy(model), quant_conf)
+                q_model = train_func_cv(compression_manager, compression_manager.model)
+            else:
+                if fake_yaml == "dynamic":
+                    quant_conf = PostTrainingQuantConfig(approach="dynamic")
+                elif fake_yaml == "static":
+                    quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx')
+                q_model = quantization.fit(
+                    model,
+                    quant_conf,
+                    calib_dataloader=self.cv_dataloader if fake_yaml == "static" else None)
+
+            int8_onnx_config = Torch2ONNXConfig(
+                dtype="int8",
+                opset_version=14,
+                quant_format="'QDQ'",
+                example_inputs=torch.randn(1, 3, 224, 224),
+                input_names=['input'],
+                output_names=['output'],
+                dynamic_axes={"input": {0: "batch_size"},
+                              "output": {0: "batch_size"}},
+            )
+            q_model.export('int8-cv-model.onnx', int8_onnx_config)
+            check_CV_onnx('int8-cv-model.onnx', self.cv_dataloader)
+
+    def test_fp32_NLP_models(self):
+        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()}
+
+        model = self.nlp_model
+        inc_model = Model(model)
+        fp32_onnx_config = Torch2ONNXConfig(
+            dtype="fp32",
+            example_inputs=tuple(self.nlp_input.values()),
+            input_names=list(self.nlp_input.keys()),
+            output_names=['labels'],
+            dynamic_axes=dynamic_axes,
+        )
+        inc_model.export('fp32-nlp-model.onnx', fp32_onnx_config)
+        check_NLP_onnx('fp32-nlp-model.onnx', self.nlp_input)
+
+    def test_int8_NLP_models(self):
+        symbolic_names = {0: 'batch_size', 1: 'max_seq_len'}
+        dynamic_axes = {k: symbolic_names for k in self.nlp_input.keys()}
+
+        for fake_yaml in ["dynamic", "static", "qat"]:
+            model = self.nlp_model
+            if fake_yaml == "qat":
+                quant_conf = QuantizationAwareTrainingConfig(backend='pytorch_fx')
+                compression_manager = prepare_compression(copy.deepcopy(model), quant_conf)
+                q_model = train_func_nlp(
+                    compression_manager,
+                    compression_manager.model,
+                    self.nlp_input
+                )
+            else:
+                if fake_yaml == "dynamic":
+                    quant_conf = PostTrainingQuantConfig(approach="dynamic")
+                elif fake_yaml == "static":
+                    quant_conf = PostTrainingQuantConfig(approach="static", backend='pytorch_fx')
+                q_model = quantization.fit(
+                    model,
+                    quant_conf,
+                    calib_dataloader=self.nlp_dataloader if fake_yaml == "static" else None)
+
+            int8_onnx_config = Torch2ONNXConfig(
+                dtype="int8",
+                opset_version=14,
+                quant_format="'QDQ'",
+                example_inputs=tuple(self.nlp_input.values()),
+                input_names=list(self.nlp_input.keys()),
+                output_names=['labels'],
+                dynamic_axes=dynamic_axes,
+            )
+            q_model.export('int8-nlp-model.onnx', int8_onnx_config)
+            check_NLP_onnx('int8-nlp-model.onnx', self.nlp_input)
+
+if __name__ == "__main__":
+    unittest.main()
+
+