From 8396fb13fe7d7ff8ae4af011a2855d9dd5bea83d Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 10:20:20 +0800
Subject: [PATCH 01/10] add method to get  config set

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_config.py       | 35 +++++++++++++++++--
 neural_compressor/torch/__init__.py           |  2 +-
 .../torch/algorithms/weight_only/gptq.py      |  2 +-
 .../torch/quantization/autotune.py            | 14 ++++----
 .../torch/quantization/config.py              | 30 +++++++++++++++-
 test/3x/torch/test_autotune.py                | 22 ++++++++++++
 6 files changed, 93 insertions(+), 12 deletions(-)

diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index 6e18d8e6d91..cfdf28202ef 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -44,13 +44,15 @@
     "register_config",
     "BaseConfig",
     "ComposableConfig",
-    "Options",
+    "get_config_set_from_config_registry",
     "options",
 ]
 
-# Dictionary to store registered configurations
 
+CONFIG_SET_TYPE = Union[None, "BaseConfig", List["BaseConfig"]]
 
+
+# Config registry to store all registered configs.
 class ConfigRegistry:
     registered_configs = {}
 
@@ -104,6 +106,13 @@ def get_cls_configs(cls) -> Dict[str, Dict[str, object]]:
                 cls_configs[framework_name][algo_name] = config_data["cls"]
         return cls_configs
 
+    @classmethod
+    def get_all_configs_by_fwk_name(cls, fwk_name: str) -> List[BaseConfig]:
+        configs_cls = []
+        for algo_name, config_pairs in cls.registered_configs.get(fwk_name, {}).items():
+            configs_cls.append(config_pairs["cls"])
+        return configs_cls
+
 
 config_registry = ConfigRegistry()
 
@@ -374,6 +383,11 @@ def _is_op_type(name: str) -> bool:
         # TODO (Yi), ort and tf need override it
         return not isinstance(name, str)
 
+    @classmethod
+    @abstractmethod
+    def get_config_set_for_tuning(cls):
+        raise NotImplementedError
+
 
 class ComposableConfig(BaseConfig):
     name = COMPOSABLE_CONFIG
@@ -421,6 +435,23 @@ def register_supported_configs(cls):
         """Add all supported configs."""
         raise NotImplementedError
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> CONFIG_SET_TYPE:
+        return None
+
+
+def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]:
+    all_registered_config_cls: List[BaseConfig] = config_registry.get_all_configs_by_fwk_name(fwk_name)
+    config_set = []
+    for config_cls in all_registered_config_cls:
+        config_set.append(config_cls.get_config_set_for_tuning())
+    return config_set
+
+
+#######################################################
+####   Options
+#######################################################
+
 
 def _check_value(name, src, supported_type, supported_value=[]):
     """Check if the given object is the given supported type and in the given supported value.
diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py
index 81f131ca114..5dc95af4561 100644
--- a/neural_compressor/torch/__init__.py
+++ b/neural_compressor/torch/__init__.py
@@ -24,4 +24,4 @@
 )
 
 from neural_compressor.common.base_tuning import TuningConfig
-from neural_compressor.torch.quantization.autotune import autotune, get_default_tune_config
+from neural_compressor.torch.quantization.autotune import autotune, get_config_set_for_tuning
diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
index 9c7d3453e8e..f34d3335891 100644
--- a/neural_compressor/torch/algorithms/weight_only/gptq.py
+++ b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -250,7 +250,7 @@ def __init__(
 
         # device
         self.device = device
-        if str(self.model.device).startswith("cuda"):
+        if str(getattr(self.model, "device", "")).startswith("cuda"):
             self.device = self.model.device
         self.is_ready = False
 
diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py
index d2c38357b93..97f32465843 100644
--- a/neural_compressor/torch/quantization/autotune.py
+++ b/neural_compressor/torch/quantization/autotune.py
@@ -17,23 +17,22 @@
 import torch
 
 from neural_compressor.common import Logger
-from neural_compressor.common.base_config import BaseConfig
+from neural_compressor.common.base_config import BaseConfig, get_config_set_from_config_registry
 from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning
 from neural_compressor.torch import quantize
-from neural_compressor.torch.quantization.config import GPTQConfig, RTNConfig
+from neural_compressor.torch.quantization.config import FRAMEWORK_NAME
 
 logger = Logger().get_logger()
 
 
 __all__ = [
-    "get_default_tune_config",
     "autotune",
+    "get_config_set_for_tuning",
 ]
 
 
-def get_default_tune_config() -> TuningConfig:
-    # TODO use the registered default tuning config in the next PR
-    return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNConfig(weight_bits=[4, 8])])
+def get_config_set_for_tuning() -> Union[BaseConfig, List[BaseConfig]]:
+    return get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
 
 
 def autotune(
@@ -52,7 +51,8 @@ def autotune(
     for trial_index, quant_config in enumerate(config_loader):
         tuning_logger.trial_start(trial_index=trial_index)
         tuning_logger.quantization_start()
-        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args)
+        logger.info(f"quant config: {quant_config}")
+        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=False)
         tuning_logger.quantization_end()
         tuning_logger.evaluation_start()
         eval_result: float = evaluator.evaluate(q_model)
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index 10799a30bc4..943a79cbc60 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -23,11 +23,24 @@
 
 import torch
 
-from neural_compressor.common.base_config import BaseConfig, config_registry, register_config
+from neural_compressor.common.base_config import (
+    BaseConfig,
+    config_registry,
+    get_config_set_from_config_registry,
+    register_config,
+)
 from neural_compressor.common.utils import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN
 from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN
 from neural_compressor.torch.utils.utility import is_hpex_avaliable, logger
 
+__all__ = [
+    "RTNConfig",
+    "get_default_rtn_config",
+    "GPTQConfig",
+    "get_default_gptq_config",
+]
+
+
 FRAMEWORK_NAME = "torch"
 DTYPE_RANGE = Union[torch.dtype, List[torch.dtype]]
 
@@ -165,6 +178,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
         logger.debug(f"Get model info: {filter_result}")
         return filter_result
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]:
+        # TODO fwk owner needs to update it.
+        return RTNConfig(weight_bits=[4, 6])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 RTNConfig.register_supported_configs()
@@ -297,6 +315,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
         logger.debug(f"Get model info: {filter_result}")
         return filter_result
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig"]]:
+        # TODO fwk owner needs to update it.
+        return GPTQConfig(weight_bits=[4, 6])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 GPTQConfig.register_supported_configs()
@@ -384,6 +407,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
             logger.debug(f"Get model info: {filter_result}")
             return filter_result
 
+        @classmethod
+        def get_config_set_for_tuning(cls) -> Union[None, "FP8QConfig", List["FP8QConfig"]]:
+            # TODO fwk owner needs to update it.
+            return FP8QConfig(act_dtype=[torch.float8_e4m3fn])
+
     # TODO(Yi) run `register_supported_configs` for all registered config.
     FP8QConfig.register_supported_configs()
 
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index cbdf587d2c4..1e345901ed3 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -99,6 +99,28 @@ def eval_perf_fn(model) -> float:
         self.assertIsNotNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 2)
 
+    @reset_tuning_target
+    def test_autotune_get_config_set_api(self):
+        from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning
+
+        def eval_acc_fn(model) -> float:
+            return 1.0
+
+        def eval_perf_fn(model) -> float:
+            return 1.0
+
+        eval_fns = [
+            {"eval_fn": eval_acc_fn, "weight": 0.5, "name": "accuracy"},
+            {
+                "eval_fn": eval_perf_fn,
+                "weight": 0.5,
+            },
+        ]
+
+        custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=2)
+        best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
+        self.assertIsNotNone(best_model)
+
     @reset_tuning_target
     def test_autotune_not_eval_func(self):
         logger.info("test_autotune_api")

From 7f4bcd179a34e9c1420b78543d8a3fda416a69af Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 10:46:29 +0800
Subject: [PATCH 02/10] fixed UTs

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 .../torch/quantization/autotune.py            |  7 +-
 test/3x/torch/test_autotune.py                | 78 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 5 deletions(-)

diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py
index 97f32465843..b362d02939d 100644
--- a/neural_compressor/torch/quantization/autotune.py
+++ b/neural_compressor/torch/quantization/autotune.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from copy import deepcopy
 from typing import Dict, List, Optional, Union
 
 import torch
@@ -52,7 +53,8 @@ def autotune(
         tuning_logger.trial_start(trial_index=trial_index)
         tuning_logger.quantization_start()
         logger.info(f"quant config: {quant_config}")
-        q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=False)
+        # !!! Make sure to use deepcopy only when inplace is set to True.
+        q_model = quantize(deepcopy(model), quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
         tuning_logger.quantization_end()
         tuning_logger.evaluation_start()
         eval_result: float = evaluator.evaluate(q_model)
@@ -60,7 +62,8 @@ def autotune(
         tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
         if tuning_monitor.need_stop():
             best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
-            quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
+            # !!! Make sure to use deepcopy only when inplace is set to True.
+            quantize(deepcopy(model), quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
             best_quant_model = model  # quantize model inplace
         tuning_logger.trial_end(trial_index)
     tuning_logger.tuning_end()
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 1e345901ed3..2537fa6d2e0 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -40,6 +40,62 @@ def forward(self, x):
     return model
 
 
+def get_gpt_j():
+    import transformers
+
+    tiny_gptj = transformers.AutoModelForCausalLM.from_pretrained(
+        "hf-internal-testing/tiny-random-GPTJForCausalLM",
+        torchscript=True,
+    )
+    return tiny_gptj
+
+
+class GPTQLLMDataLoader:
+    def __init__(self, length=512):
+        self.batch_size = 1
+        self.length = length
+
+    def __iter__(self):
+        for i in range(10):
+            yield torch.ones([1, self.length], dtype=torch.long)
+
+
+class GPTQLLMDataLoaderList(GPTQLLMDataLoader):
+    def __iter__(self):
+        for i in range(10):
+            yield (torch.ones([1, self.length], dtype=torch.long), torch.ones([1, self.length], dtype=torch.long))
+
+
+class GPTQLLMDataLoaderDict(GPTQLLMDataLoader):
+    def __iter__(self):
+        for i in range(10):
+            yield {
+                "input_ids": torch.ones([1, self.length], dtype=torch.long),
+                "attention_mask": torch.ones([1, self.length], dtype=torch.long),
+            }
+
+
+from tqdm import tqdm
+
+from neural_compressor.torch.algorithms.weight_only.gptq import move_input_to_device
+
+
+def run_fn_for_gptq(model, dataloader_for_calibration, *args):
+    logger.info("Collecting calibration inputs...")
+    for batch in tqdm(dataloader_for_calibration):
+        batch = move_input_to_device(batch, device=None)
+        try:
+            if isinstance(batch, tuple) or isinstance(batch, list):
+                model(batch[0])
+            elif isinstance(batch, dict):
+                model(**batch)
+            else:
+                model(batch)
+        except ValueError:
+            pass
+    return
+
+
 class TestAutoTune(unittest.TestCase):
     @classmethod
     def setUpClass(self):
@@ -102,6 +158,17 @@ def eval_perf_fn(model) -> float:
     @reset_tuning_target
     def test_autotune_get_config_set_api(self):
         from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning
+        from neural_compressor.torch.algorithms.weight_only.gptq import DataloaderPreprocessor
+
+        dataloader = GPTQLLMDataLoader()
+
+        model = get_gpt_j()
+        input = torch.ones([1, 512], dtype=torch.long)
+
+        dataloaderPreprocessor = DataloaderPreprocessor(
+            dataloader_original=dataloader, use_max_length=False, pad_max_length=512, nsamples=128
+        )
+        dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader()
 
         def eval_acc_fn(model) -> float:
             return 1.0
@@ -116,9 +183,14 @@ def eval_perf_fn(model) -> float:
                 "weight": 0.5,
             },
         ]
-
-        custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=2)
-        best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
+        custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=4)
+        best_model = autotune(
+            model=get_gpt_j(),
+            tune_config=custom_tune_config,
+            eval_fns=eval_fns,
+            run_fn=run_fn_for_gptq,
+            run_args=dataloader_for_calibration,
+        )
         self.assertIsNotNone(best_model)
 
     @reset_tuning_target

From bbc1b7af54c23acbdef58fc1489a3f9c250c1be8 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 11:56:52 +0800
Subject: [PATCH 03/10] add `get_config_set_for_tuning` for ort and tf

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/onnxrt/quantization/config.py     | 5 +++++
 neural_compressor/tensorflow/quantization/config.py | 5 +++++
 neural_compressor/torch/quantization/autotune.py    | 4 ++--
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/onnxrt/quantization/config.py b/neural_compressor/onnxrt/quantization/config.py
index 38c87eb6196..971e0327e95 100644
--- a/neural_compressor/onnxrt/quantization/config.py
+++ b/neural_compressor/onnxrt/quantization/config.py
@@ -157,6 +157,11 @@ def get_model_info(model: Union[onnx.ModelProto, Path, str]) -> List[Tuple[str,
         logger.debug(f"Get model info: {filter_result}")
         return filter_result
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]:
+        # TODO fwk owner needs to update it.
+        return RTNConfig(weight_bits=[4, 6])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 RTNConfig.register_supported_configs()
diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py
index 6422e8a9439..34ae925059f 100644
--- a/neural_compressor/tensorflow/quantization/config.py
+++ b/neural_compressor/tensorflow/quantization/config.py
@@ -143,6 +143,11 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
         )
         cls.supported_configs = supported_configs
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]:
+        # TODO fwk owner needs to update it.
+        return StaticQuantConfig(weight_sym=[True, False])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 StaticQuantConfig.register_supported_configs()
diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py
index b362d02939d..1cc3902c70b 100644
--- a/neural_compressor/torch/quantization/autotune.py
+++ b/neural_compressor/torch/quantization/autotune.py
@@ -53,7 +53,7 @@ def autotune(
         tuning_logger.trial_start(trial_index=trial_index)
         tuning_logger.quantization_start()
         logger.info(f"quant config: {quant_config}")
-        # !!! Make sure to use deepcopy only when inplace is set to True.
+        # !!! Make sure to use deepcopy only when inplace is set to `True`.
         q_model = quantize(deepcopy(model), quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
         tuning_logger.quantization_end()
         tuning_logger.evaluation_start()
@@ -62,7 +62,7 @@ def autotune(
         tuning_monitor.add_trial_result(trial_index, eval_result, quant_config)
         if tuning_monitor.need_stop():
             best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config()
-            # !!! Make sure to use deepcopy only when inplace is set to True.
+            # !!! Make sure to use deepcopy only when inplace is set to `True`.
             quantize(deepcopy(model), quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True)
             best_quant_model = model  # quantize model inplace
         tuning_logger.trial_end(trial_index)

From fcc5660fce23b7997b1edad6b4a7eba6b41d6ab9 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 12:07:06 +0800
Subject: [PATCH 04/10] rename some files

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_config.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index cfdf28202ef..22fc1efcc41 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -21,9 +21,8 @@
 import re
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from copy import deepcopy
 from itertools import product
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union
+from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
 
 from neural_compressor.common import Logger
 from neural_compressor.common.utils import (
@@ -49,9 +48,6 @@
 ]
 
 
-CONFIG_SET_TYPE = Union[None, "BaseConfig", List["BaseConfig"]]
-
-
 # Config registry to store all registered configs.
 class ConfigRegistry:
     registered_configs = {}
@@ -107,7 +103,7 @@ def get_cls_configs(cls) -> Dict[str, Dict[str, object]]:
         return cls_configs
 
     @classmethod
-    def get_all_configs_by_fwk_name(cls, fwk_name: str) -> List[BaseConfig]:
+    def get_all_config_cls_by_fwk_name(cls, fwk_name: str) -> List[Type[BaseConfig]]:
         configs_cls = []
         for algo_name, config_pairs in cls.registered_configs.get(fwk_name, {}).items():
             configs_cls.append(config_pairs["cls"])
@@ -436,12 +432,13 @@ def register_supported_configs(cls):
         raise NotImplementedError
 
     @classmethod
-    def get_config_set_for_tuning(cls) -> CONFIG_SET_TYPE:
+    def get_config_set_for_tuning(cls) -> None:
+        # TODO (Yi) handle the composable config in `tuning_config`
         return None
 
 
 def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]:
-    all_registered_config_cls: List[BaseConfig] = config_registry.get_all_configs_by_fwk_name(fwk_name)
+    all_registered_config_cls: List[BaseConfig] = config_registry.get_all_config_cls_by_fwk_name(fwk_name)
     config_set = []
     for config_cls in all_registered_config_cls:
         config_set.append(config_cls.get_config_set_for_tuning())

From 55efabbc88bd8478ea3b56e37d33316115b57641 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 12:09:39 +0800
Subject: [PATCH 05/10] rename `quant_configs` into `config_set`

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_tuning.py | 14 +++++++-------
 test/3x/torch/test_autotune.py          |  8 ++++----
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py
index eaccc217f10..6145c49379b 100644
--- a/neural_compressor/common/base_tuning.py
+++ b/neural_compressor/common/base_tuning.py
@@ -129,8 +129,8 @@ class Sampler:
 
 
 class ConfigLoader:
-    def __init__(self, quant_configs, sampler: Sampler) -> None:
-        self.quant_configs = quant_configs
+    def __init__(self, config_set, sampler: Sampler) -> None:
+        self.config_set = config_set
         self.sampler = sampler
 
     @staticmethod
@@ -146,7 +146,7 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
     def parse_quant_configs(self) -> List[BaseConfig]:
         # TODO (Yi) separate this functionality into `Sampler` in the next PR
         quant_config_list = []
-        for quant_config in self.quant_configs:
+        for quant_config in self.config_set:
             quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config))
         return quant_config_list
 
@@ -210,14 +210,14 @@ class TuningConfig:
     """Base Class for Tuning Criterion.
 
     Args:
-        quant_configs: quantization configs. Default value is empty.
+        config_set: quantization configs. Default value is empty.
         timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
         max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit.
     """
 
-    def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None:
+    def __init__(self, config_set=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None:
         """Init a TuneCriterion object."""
-        self.quant_configs = quant_configs
+        self.config_set = config_set
         self.timeout = timeout
         self.max_trials = max_trials
         self.sampler = sampler
@@ -265,7 +265,7 @@ def need_stop(self) -> bool:
 
 
 def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]:
-    config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler)
+    config_loader = ConfigLoader(config_set=tuning_config.config_set, sampler=tuning_config.sampler)
     tuning_logger = TuningLogger()
     tuning_monitor = TuningMonitor(tuning_config)
     return config_loader, tuning_logger, tuning_monitor
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index 2537fa6d2e0..d9149908ebd 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -123,7 +123,7 @@ def test_autotune_api(self):
         def eval_acc_fn(model) -> float:
             return 1.0
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(
             model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}]
         )
@@ -150,7 +150,7 @@ def eval_perf_fn(model) -> float:
             },
         ]
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
         best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns)
         self.assertIsNotNone(best_model)
         self.assertEqual(len(evaluator.eval_fn_registry), 2)
@@ -183,7 +183,7 @@ def eval_perf_fn(model) -> float:
                 "weight": 0.5,
             },
         ]
-        custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=4)
+        custom_tune_config = TuningConfig(config_set=get_config_set_for_tuning(), max_trials=4)
         best_model = autotune(
             model=get_gpt_j(),
             tune_config=custom_tune_config,
@@ -198,7 +198,7 @@ def test_autotune_not_eval_func(self):
         logger.info("test_autotune_api")
         from neural_compressor.torch import RTNConfig, TuningConfig, autotune
 
-        custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
+        custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2)
 
         # Use assertRaises to check that an AssertionError is raised
         with self.assertRaises(AssertionError) as context:

From 6f88c550d8c53c68fc05bc9728bc4388fb1b263d Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Tue, 23 Jan 2024 14:23:35 +0800
Subject: [PATCH 06/10] disable some check

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/onnxrt/quantization/config.py     | 2 +-
 neural_compressor/tensorflow/quantization/config.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/neural_compressor/onnxrt/quantization/config.py b/neural_compressor/onnxrt/quantization/config.py
index 971e0327e95..8f0e64a7ec7 100644
--- a/neural_compressor/onnxrt/quantization/config.py
+++ b/neural_compressor/onnxrt/quantization/config.py
@@ -158,7 +158,7 @@ def get_model_info(model: Union[onnx.ModelProto, Path, str]) -> List[Tuple[str,
         return filter_result
 
     @classmethod
-    def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]:
+    def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]:  # pragma: no cover
         # TODO fwk owner needs to update it.
         return RTNConfig(weight_bits=[4, 6])
 
diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py
index 34ae925059f..33d0a67e28d 100644
--- a/neural_compressor/tensorflow/quantization/config.py
+++ b/neural_compressor/tensorflow/quantization/config.py
@@ -144,7 +144,9 @@ def register_supported_configs(cls) -> List[OperatorConfig]:
         cls.supported_configs = supported_configs
 
     @classmethod
-    def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]:
+    def get_config_set_for_tuning(
+        cls,
+    ) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]:  # pragma: no cover
         # TODO fwk owner needs to update it.
         return StaticQuantConfig(weight_sym=[True, False])
 

From 30691d4f0b8430d419b640dc119622f1445e8fad Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 24 Jan 2024 11:41:57 +0800
Subject: [PATCH 07/10] add more UTs

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/torch/algorithms/weight_only/gptq.py | 2 +-
 test/3x/onnxrt/test_config.py                          | 8 ++++++++
 test/3x/tensorflow/test_config.py                      | 8 ++++++++
 3 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py
index f34d3335891..9c7d3453e8e 100644
--- a/neural_compressor/torch/algorithms/weight_only/gptq.py
+++ b/neural_compressor/torch/algorithms/weight_only/gptq.py
@@ -250,7 +250,7 @@ def __init__(
 
         # device
         self.device = device
-        if str(getattr(self.model, "device", "")).startswith("cuda"):
+        if str(self.model.device).startswith("cuda"):
             self.device = self.model.device
         self.is_ready = False
 
diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py
index 1bb51c141c7..277277a223c 100644
--- a/test/3x/onnxrt/test_config.py
+++ b/test/3x/onnxrt/test_config.py
@@ -328,6 +328,14 @@ def test_expand_config(self):
         self.assertEqual(expand_config_list[0].weight_bits, 4)
         self.assertEqual(expand_config_list[1].weight_bits, 8)
 
+    def test_config_set_api(self):
+        # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled.
+        from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry
+        from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME
+
+        config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
+        self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME]))
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py
index fe9c7830356..e24c7698c28 100644
--- a/test/3x/tensorflow/test_config.py
+++ b/test/3x/tensorflow/test_config.py
@@ -315,6 +315,14 @@ def test_expand_config(self):
         self.assertEqual(expand_config_list[0].weight_granularity, "per_channel")
         self.assertEqual(expand_config_list[1].weight_granularity, "per_tensor")
 
+    def test_config_set_api(self):
+        # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled.
+        from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry
+        from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME
+
+        config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
+        self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME]))
+
 
 if __name__ == "__main__":
     unittest.main()

From b84bd416bf9ae0bd6a925bfc4c00fd548b650a0f Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 24 Jan 2024 11:48:35 +0800
Subject: [PATCH 08/10] rename some funcs

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/common/base_config.py          | 4 ++--
 neural_compressor/torch/__init__.py              | 2 +-
 neural_compressor/torch/quantization/autotune.py | 8 ++++----
 neural_compressor/torch/quantization/config.py   | 7 +------
 test/3x/onnxrt/test_config.py                    | 4 ++--
 test/3x/tensorflow/test_config.py                | 4 ++--
 test/3x/torch/test_autotune.py                   | 4 ++--
 7 files changed, 14 insertions(+), 19 deletions(-)

diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py
index d3ded2eed58..dd0236f39c3 100644
--- a/neural_compressor/common/base_config.py
+++ b/neural_compressor/common/base_config.py
@@ -43,7 +43,7 @@
     "register_config",
     "BaseConfig",
     "ComposableConfig",
-    "get_config_set_from_config_registry",
+    "get_all_config_set_from_config_registry",
     "options",
 ]
 
@@ -436,7 +436,7 @@ def get_config_set_for_tuning(cls) -> None:
         return None
 
 
-def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]:
+def get_all_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]:
     all_registered_config_cls: List[BaseConfig] = config_registry.get_all_config_cls_by_fwk_name(fwk_name)
     config_set = []
     for config_cls in all_registered_config_cls:
diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py
index 5dc95af4561..a7adadbbe08 100644
--- a/neural_compressor/torch/__init__.py
+++ b/neural_compressor/torch/__init__.py
@@ -24,4 +24,4 @@
 )
 
 from neural_compressor.common.base_tuning import TuningConfig
-from neural_compressor.torch.quantization.autotune import autotune, get_config_set_for_tuning
+from neural_compressor.torch.quantization.autotune import autotune, get_all_config_set
diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py
index 1cc3902c70b..bb48f0685c6 100644
--- a/neural_compressor/torch/quantization/autotune.py
+++ b/neural_compressor/torch/quantization/autotune.py
@@ -18,7 +18,7 @@
 import torch
 
 from neural_compressor.common import Logger
-from neural_compressor.common.base_config import BaseConfig, get_config_set_from_config_registry
+from neural_compressor.common.base_config import BaseConfig, get_all_config_set_from_config_registry
 from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning
 from neural_compressor.torch import quantize
 from neural_compressor.torch.quantization.config import FRAMEWORK_NAME
@@ -28,12 +28,12 @@
 
 __all__ = [
     "autotune",
-    "get_config_set_for_tuning",
+    "get_all_config_set",
 ]
 
 
-def get_config_set_for_tuning() -> Union[BaseConfig, List[BaseConfig]]:
-    return get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
+def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]:
+    return get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
 
 
 def autotune(
diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index 628c57a1558..c1ab71ed59a 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -23,12 +23,7 @@
 
 import torch
 
-from neural_compressor.common.base_config import (
-    BaseConfig,
-    config_registry,
-    get_config_set_from_config_registry,
-    register_config,
-)
+from neural_compressor.common.base_config import BaseConfig, config_registry, register_config
 from neural_compressor.common.utils import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN
 from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN
 from neural_compressor.torch.utils.utility import is_hpex_avaliable, logger
diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py
index 277277a223c..3530a938c5c 100644
--- a/test/3x/onnxrt/test_config.py
+++ b/test/3x/onnxrt/test_config.py
@@ -330,10 +330,10 @@ def test_expand_config(self):
 
     def test_config_set_api(self):
         # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled.
-        from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry
+        from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry
         from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME
 
-        config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
+        config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
         self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME]))
 
 
diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py
index e24c7698c28..6a7bd7afeab 100644
--- a/test/3x/tensorflow/test_config.py
+++ b/test/3x/tensorflow/test_config.py
@@ -317,10 +317,10 @@ def test_expand_config(self):
 
     def test_config_set_api(self):
         # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled.
-        from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry
+        from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry
         from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME
 
-        config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
+        config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
         self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME]))
 
 
diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py
index d9149908ebd..e1b717e3163 100644
--- a/test/3x/torch/test_autotune.py
+++ b/test/3x/torch/test_autotune.py
@@ -157,7 +157,7 @@ def eval_perf_fn(model) -> float:
 
     @reset_tuning_target
     def test_autotune_get_config_set_api(self):
-        from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning
+        from neural_compressor.torch import TuningConfig, autotune, get_all_config_set
         from neural_compressor.torch.algorithms.weight_only.gptq import DataloaderPreprocessor
 
         dataloader = GPTQLLMDataLoader()
@@ -183,7 +183,7 @@ def eval_perf_fn(model) -> float:
                 "weight": 0.5,
             },
         ]
-        custom_tune_config = TuningConfig(config_set=get_config_set_for_tuning(), max_trials=4)
+        custom_tune_config = TuningConfig(config_set=get_all_config_set(), max_trials=4)
         best_model = autotune(
             model=get_gpt_j(),
             tune_config=custom_tune_config,

From dc14a9508bf7a1fc39681064255ca83b552ea056 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Wed, 24 Jan 2024 12:18:25 +0800
Subject: [PATCH 09/10] fixed import erro

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 test/3x/onnxrt/test_config.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py
index 3530a938c5c..dfc8f00dea5 100644
--- a/test/3x/onnxrt/test_config.py
+++ b/test/3x/onnxrt/test_config.py
@@ -331,7 +331,7 @@ def test_expand_config(self):
     def test_config_set_api(self):
         # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled.
         from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry
-        from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME
+        from neural_compressor.onnxrt.quantization.config import FRAMEWORK_NAME
 
         config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME)
         self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME]))

From 3f60cffd0542a3954c7fccc0c11ea4c49457d742 Mon Sep 17 00:00:00 2001
From: yiliu30 <yi4.liu@intel.com>
Date: Thu, 25 Jan 2024 14:59:41 +0800
Subject: [PATCH 10/10] add config set for sq and static quant

Signed-off-by: yiliu30 <yi4.liu@intel.com>
---
 neural_compressor/torch/quantization/config.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py
index 60e01b2ea9d..e4ee3130587 100644
--- a/neural_compressor/torch/quantization/config.py
+++ b/neural_compressor/torch/quantization/config.py
@@ -370,6 +370,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
         logger.debug(f"Get model info: {filter_result}")
         return filter_result
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]:
+        # TODO fwk owner needs to update it.
+        return StaticQuantConfig(w_sym=[True, False])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 StaticQuantConfig.register_supported_configs()
@@ -479,6 +484,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]:
         logger.debug(f"Get model info: {filter_result}")
         return filter_result
 
+    @classmethod
+    def get_config_set_for_tuning(cls) -> Union[None, "SmoothQuantConfig", List["SmoothQuantConfig"]]:
+        # TODO fwk owner needs to update it.
+        return SmoothQuantConfig(alpha=[0.1, 0.5])
+
 
 # TODO(Yi) run `register_supported_configs` for all registered config.
 SmoothQuantConfig.register_supported_configs()