From 8396fb13fe7d7ff8ae4af011a2855d9dd5bea83d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 10:20:20 +0800 Subject: [PATCH 01/10] add method to get config set Signed-off-by: yiliu30 --- neural_compressor/common/base_config.py | 35 +++++++++++++++++-- neural_compressor/torch/__init__.py | 2 +- .../torch/algorithms/weight_only/gptq.py | 2 +- .../torch/quantization/autotune.py | 14 ++++---- .../torch/quantization/config.py | 30 +++++++++++++++- test/3x/torch/test_autotune.py | 22 ++++++++++++ 6 files changed, 93 insertions(+), 12 deletions(-) diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py index 6e18d8e6d91..cfdf28202ef 100644 --- a/neural_compressor/common/base_config.py +++ b/neural_compressor/common/base_config.py @@ -44,13 +44,15 @@ "register_config", "BaseConfig", "ComposableConfig", - "Options", + "get_config_set_from_config_registry", "options", ] -# Dictionary to store registered configurations +CONFIG_SET_TYPE = Union[None, "BaseConfig", List["BaseConfig"]] + +# Config registry to store all registered configs. class ConfigRegistry: registered_configs = {} @@ -104,6 +106,13 @@ def get_cls_configs(cls) -> Dict[str, Dict[str, object]]: cls_configs[framework_name][algo_name] = config_data["cls"] return cls_configs + @classmethod + def get_all_configs_by_fwk_name(cls, fwk_name: str) -> List[BaseConfig]: + configs_cls = [] + for algo_name, config_pairs in cls.registered_configs.get(fwk_name, {}).items(): + configs_cls.append(config_pairs["cls"]) + return configs_cls + config_registry = ConfigRegistry() @@ -374,6 +383,11 @@ def _is_op_type(name: str) -> bool: # TODO (Yi), ort and tf need override it return not isinstance(name, str) + @classmethod + @abstractmethod + def get_config_set_for_tuning(cls): + raise NotImplementedError + class ComposableConfig(BaseConfig): name = COMPOSABLE_CONFIG @@ -421,6 +435,23 @@ def register_supported_configs(cls): """Add all supported configs.""" raise NotImplementedError + @classmethod + def get_config_set_for_tuning(cls) -> CONFIG_SET_TYPE: + return None + + +def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]: + all_registered_config_cls: List[BaseConfig] = config_registry.get_all_configs_by_fwk_name(fwk_name) + config_set = [] + for config_cls in all_registered_config_cls: + config_set.append(config_cls.get_config_set_for_tuning()) + return config_set + + +####################################################### +#### Options +####################################################### + def _check_value(name, src, supported_type, supported_value=[]): """Check if the given object is the given supported type and in the given supported value. diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 81f131ca114..5dc95af4561 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -24,4 +24,4 @@ ) from neural_compressor.common.base_tuning import TuningConfig -from neural_compressor.torch.quantization.autotune import autotune, get_default_tune_config +from neural_compressor.torch.quantization.autotune import autotune, get_config_set_for_tuning diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py index 9c7d3453e8e..f34d3335891 100644 --- a/neural_compressor/torch/algorithms/weight_only/gptq.py +++ b/neural_compressor/torch/algorithms/weight_only/gptq.py @@ -250,7 +250,7 @@ def __init__( # device self.device = device - if str(self.model.device).startswith("cuda"): + if str(getattr(self.model, "device", "")).startswith("cuda"): self.device = self.model.device self.is_ready = False diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index d2c38357b93..97f32465843 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -17,23 +17,22 @@ import torch from neural_compressor.common import Logger -from neural_compressor.common.base_config import BaseConfig +from neural_compressor.common.base_config import BaseConfig, get_config_set_from_config_registry from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning from neural_compressor.torch import quantize -from neural_compressor.torch.quantization.config import GPTQConfig, RTNConfig +from neural_compressor.torch.quantization.config import FRAMEWORK_NAME logger = Logger().get_logger() __all__ = [ - "get_default_tune_config", "autotune", + "get_config_set_for_tuning", ] -def get_default_tune_config() -> TuningConfig: - # TODO use the registered default tuning config in the next PR - return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNConfig(weight_bits=[4, 8])]) +def get_config_set_for_tuning() -> Union[BaseConfig, List[BaseConfig]]: + return get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) def autotune( @@ -52,7 +51,8 @@ def autotune( for trial_index, quant_config in enumerate(config_loader): tuning_logger.trial_start(trial_index=trial_index) tuning_logger.quantization_start() - q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args) + logger.info(f"quant config: {quant_config}") + q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=False) tuning_logger.quantization_end() tuning_logger.evaluation_start() eval_result: float = evaluator.evaluate(q_model) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 10799a30bc4..943a79cbc60 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -23,11 +23,24 @@ import torch -from neural_compressor.common.base_config import BaseConfig, config_registry, register_config +from neural_compressor.common.base_config import ( + BaseConfig, + config_registry, + get_config_set_from_config_registry, + register_config, +) from neural_compressor.common.utils import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN from neural_compressor.torch.utils.utility import is_hpex_avaliable, logger +__all__ = [ + "RTNConfig", + "get_default_rtn_config", + "GPTQConfig", + "get_default_gptq_config", +] + + FRAMEWORK_NAME = "torch" DTYPE_RANGE = Union[torch.dtype, List[torch.dtype]] @@ -165,6 +178,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]: + # TODO fwk owner needs to update it. + return RTNConfig(weight_bits=[4, 6]) + # TODO(Yi) run `register_supported_configs` for all registered config. RTNConfig.register_supported_configs() @@ -297,6 +315,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "GPTQConfig", List["GPTQConfig"]]: + # TODO fwk owner needs to update it. + return GPTQConfig(weight_bits=[4, 6]) + # TODO(Yi) run `register_supported_configs` for all registered config. GPTQConfig.register_supported_configs() @@ -384,6 +407,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "FP8QConfig", List["FP8QConfig"]]: + # TODO fwk owner needs to update it. + return FP8QConfig(act_dtype=[torch.float8_e4m3fn]) + # TODO(Yi) run `register_supported_configs` for all registered config. FP8QConfig.register_supported_configs() diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index cbdf587d2c4..1e345901ed3 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -99,6 +99,28 @@ def eval_perf_fn(model) -> float: self.assertIsNotNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 2) + @reset_tuning_target + def test_autotune_get_config_set_api(self): + from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning + + def eval_acc_fn(model) -> float: + return 1.0 + + def eval_perf_fn(model) -> float: + return 1.0 + + eval_fns = [ + {"eval_fn": eval_acc_fn, "weight": 0.5, "name": "accuracy"}, + { + "eval_fn": eval_perf_fn, + "weight": 0.5, + }, + ] + + custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=2) + best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) + self.assertIsNotNone(best_model) + @reset_tuning_target def test_autotune_not_eval_func(self): logger.info("test_autotune_api") From 7f4bcd179a34e9c1420b78543d8a3fda416a69af Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 10:46:29 +0800 Subject: [PATCH 02/10] fixed UTs Signed-off-by: yiliu30 --- .../torch/quantization/autotune.py | 7 +- test/3x/torch/test_autotune.py | 78 ++++++++++++++++++- 2 files changed, 80 insertions(+), 5 deletions(-) diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index 97f32465843..b362d02939d 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -12,6 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. +from copy import deepcopy from typing import Dict, List, Optional, Union import torch @@ -52,7 +53,8 @@ def autotune( tuning_logger.trial_start(trial_index=trial_index) tuning_logger.quantization_start() logger.info(f"quant config: {quant_config}") - q_model = quantize(model, quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=False) + # !!! Make sure to use deepcopy only when inplace is set to True. + q_model = quantize(deepcopy(model), quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=True) tuning_logger.quantization_end() tuning_logger.evaluation_start() eval_result: float = evaluator.evaluate(q_model) @@ -60,7 +62,8 @@ def autotune( tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) if tuning_monitor.need_stop(): best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() - quantize(model, quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) + # !!! Make sure to use deepcopy only when inplace is set to True. + quantize(deepcopy(model), quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) best_quant_model = model # quantize model inplace tuning_logger.trial_end(trial_index) tuning_logger.tuning_end() diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 1e345901ed3..2537fa6d2e0 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -40,6 +40,62 @@ def forward(self, x): return model +def get_gpt_j(): + import transformers + + tiny_gptj = transformers.AutoModelForCausalLM.from_pretrained( + "hf-internal-testing/tiny-random-GPTJForCausalLM", + torchscript=True, + ) + return tiny_gptj + + +class GPTQLLMDataLoader: + def __init__(self, length=512): + self.batch_size = 1 + self.length = length + + def __iter__(self): + for i in range(10): + yield torch.ones([1, self.length], dtype=torch.long) + + +class GPTQLLMDataLoaderList(GPTQLLMDataLoader): + def __iter__(self): + for i in range(10): + yield (torch.ones([1, self.length], dtype=torch.long), torch.ones([1, self.length], dtype=torch.long)) + + +class GPTQLLMDataLoaderDict(GPTQLLMDataLoader): + def __iter__(self): + for i in range(10): + yield { + "input_ids": torch.ones([1, self.length], dtype=torch.long), + "attention_mask": torch.ones([1, self.length], dtype=torch.long), + } + + +from tqdm import tqdm + +from neural_compressor.torch.algorithms.weight_only.gptq import move_input_to_device + + +def run_fn_for_gptq(model, dataloader_for_calibration, *args): + logger.info("Collecting calibration inputs...") + for batch in tqdm(dataloader_for_calibration): + batch = move_input_to_device(batch, device=None) + try: + if isinstance(batch, tuple) or isinstance(batch, list): + model(batch[0]) + elif isinstance(batch, dict): + model(**batch) + else: + model(batch) + except ValueError: + pass + return + + class TestAutoTune(unittest.TestCase): @classmethod def setUpClass(self): @@ -102,6 +158,17 @@ def eval_perf_fn(model) -> float: @reset_tuning_target def test_autotune_get_config_set_api(self): from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning + from neural_compressor.torch.algorithms.weight_only.gptq import DataloaderPreprocessor + + dataloader = GPTQLLMDataLoader() + + model = get_gpt_j() + input = torch.ones([1, 512], dtype=torch.long) + + dataloaderPreprocessor = DataloaderPreprocessor( + dataloader_original=dataloader, use_max_length=False, pad_max_length=512, nsamples=128 + ) + dataloader_for_calibration = dataloaderPreprocessor.get_prepared_dataloader() def eval_acc_fn(model) -> float: return 1.0 @@ -116,9 +183,14 @@ def eval_perf_fn(model) -> float: "weight": 0.5, }, ] - - custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=2) - best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) + custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=4) + best_model = autotune( + model=get_gpt_j(), + tune_config=custom_tune_config, + eval_fns=eval_fns, + run_fn=run_fn_for_gptq, + run_args=dataloader_for_calibration, + ) self.assertIsNotNone(best_model) @reset_tuning_target From bbc1b7af54c23acbdef58fc1489a3f9c250c1be8 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 11:56:52 +0800 Subject: [PATCH 03/10] add `get_config_set_for_tuning` for ort and tf Signed-off-by: yiliu30 --- neural_compressor/onnxrt/quantization/config.py | 5 +++++ neural_compressor/tensorflow/quantization/config.py | 5 +++++ neural_compressor/torch/quantization/autotune.py | 4 ++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/neural_compressor/onnxrt/quantization/config.py b/neural_compressor/onnxrt/quantization/config.py index 38c87eb6196..971e0327e95 100644 --- a/neural_compressor/onnxrt/quantization/config.py +++ b/neural_compressor/onnxrt/quantization/config.py @@ -157,6 +157,11 @@ def get_model_info(model: Union[onnx.ModelProto, Path, str]) -> List[Tuple[str, logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]: + # TODO fwk owner needs to update it. + return RTNConfig(weight_bits=[4, 6]) + # TODO(Yi) run `register_supported_configs` for all registered config. RTNConfig.register_supported_configs() diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index 6422e8a9439..34ae925059f 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -143,6 +143,11 @@ def register_supported_configs(cls) -> List[OperatorConfig]: ) cls.supported_configs = supported_configs + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]: + # TODO fwk owner needs to update it. + return StaticQuantConfig(weight_sym=[True, False]) + # TODO(Yi) run `register_supported_configs` for all registered config. StaticQuantConfig.register_supported_configs() diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index b362d02939d..1cc3902c70b 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -53,7 +53,7 @@ def autotune( tuning_logger.trial_start(trial_index=trial_index) tuning_logger.quantization_start() logger.info(f"quant config: {quant_config}") - # !!! Make sure to use deepcopy only when inplace is set to True. + # !!! Make sure to use deepcopy only when inplace is set to `True`. q_model = quantize(deepcopy(model), quant_config=quant_config, run_fn=run_fn, run_args=run_args, inplace=True) tuning_logger.quantization_end() tuning_logger.evaluation_start() @@ -62,7 +62,7 @@ def autotune( tuning_monitor.add_trial_result(trial_index, eval_result, quant_config) if tuning_monitor.need_stop(): best_quant_config: BaseConfig = tuning_monitor.get_best_quant_config() - # !!! Make sure to use deepcopy only when inplace is set to True. + # !!! Make sure to use deepcopy only when inplace is set to `True`. quantize(deepcopy(model), quant_config=best_quant_config, run_fn=run_fn, run_args=run_args, inplace=True) best_quant_model = model # quantize model inplace tuning_logger.trial_end(trial_index) From fcc5660fce23b7997b1edad6b4a7eba6b41d6ab9 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 12:07:06 +0800 Subject: [PATCH 04/10] rename some files Signed-off-by: yiliu30 --- neural_compressor/common/base_config.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py index cfdf28202ef..22fc1efcc41 100644 --- a/neural_compressor/common/base_config.py +++ b/neural_compressor/common/base_config.py @@ -21,9 +21,8 @@ import re from abc import ABC, abstractmethod from collections import OrderedDict -from copy import deepcopy from itertools import product -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union from neural_compressor.common import Logger from neural_compressor.common.utils import ( @@ -49,9 +48,6 @@ ] -CONFIG_SET_TYPE = Union[None, "BaseConfig", List["BaseConfig"]] - - # Config registry to store all registered configs. class ConfigRegistry: registered_configs = {} @@ -107,7 +103,7 @@ def get_cls_configs(cls) -> Dict[str, Dict[str, object]]: return cls_configs @classmethod - def get_all_configs_by_fwk_name(cls, fwk_name: str) -> List[BaseConfig]: + def get_all_config_cls_by_fwk_name(cls, fwk_name: str) -> List[Type[BaseConfig]]: configs_cls = [] for algo_name, config_pairs in cls.registered_configs.get(fwk_name, {}).items(): configs_cls.append(config_pairs["cls"]) @@ -436,12 +432,13 @@ def register_supported_configs(cls): raise NotImplementedError @classmethod - def get_config_set_for_tuning(cls) -> CONFIG_SET_TYPE: + def get_config_set_for_tuning(cls) -> None: + # TODO (Yi) handle the composable config in `tuning_config` return None def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]: - all_registered_config_cls: List[BaseConfig] = config_registry.get_all_configs_by_fwk_name(fwk_name) + all_registered_config_cls: List[BaseConfig] = config_registry.get_all_config_cls_by_fwk_name(fwk_name) config_set = [] for config_cls in all_registered_config_cls: config_set.append(config_cls.get_config_set_for_tuning()) From 55efabbc88bd8478ea3b56e37d33316115b57641 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 12:09:39 +0800 Subject: [PATCH 05/10] rename `quant_configs` into `config_set` Signed-off-by: yiliu30 --- neural_compressor/common/base_tuning.py | 14 +++++++------- test/3x/torch/test_autotune.py | 8 ++++---- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/neural_compressor/common/base_tuning.py b/neural_compressor/common/base_tuning.py index eaccc217f10..6145c49379b 100644 --- a/neural_compressor/common/base_tuning.py +++ b/neural_compressor/common/base_tuning.py @@ -129,8 +129,8 @@ class Sampler: class ConfigLoader: - def __init__(self, quant_configs, sampler: Sampler) -> None: - self.quant_configs = quant_configs + def __init__(self, config_set, sampler: Sampler) -> None: + self.config_set = config_set self.sampler = sampler @staticmethod @@ -146,7 +146,7 @@ def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]: def parse_quant_configs(self) -> List[BaseConfig]: # TODO (Yi) separate this functionality into `Sampler` in the next PR quant_config_list = [] - for quant_config in self.quant_configs: + for quant_config in self.config_set: quant_config_list.extend(ConfigLoader.parse_quant_config(quant_config)) return quant_config_list @@ -210,14 +210,14 @@ class TuningConfig: """Base Class for Tuning Criterion. Args: - quant_configs: quantization configs. Default value is empty. + config_set: quantization configs. Default value is empty. timeout: Tuning timeout (seconds). Default value is 0 which means early stop. max_trials: Max tuning times. Default value is 100. Combine with timeout field to decide when to exit. """ - def __init__(self, quant_configs=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None: + def __init__(self, config_set=None, timeout=0, max_trials=100, sampler: Sampler = None) -> None: """Init a TuneCriterion object.""" - self.quant_configs = quant_configs + self.config_set = config_set self.timeout = timeout self.max_trials = max_trials self.sampler = sampler @@ -265,7 +265,7 @@ def need_stop(self) -> bool: def init_tuning(tuning_config: TuningConfig) -> Tuple[ConfigLoader, TuningLogger, TuningMonitor]: - config_loader = ConfigLoader(quant_configs=tuning_config.quant_configs, sampler=tuning_config.sampler) + config_loader = ConfigLoader(config_set=tuning_config.config_set, sampler=tuning_config.sampler) tuning_logger = TuningLogger() tuning_monitor = TuningMonitor(tuning_config) return config_loader, tuning_logger, tuning_monitor diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index 2537fa6d2e0..d9149908ebd 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -123,7 +123,7 @@ def test_autotune_api(self): def eval_acc_fn(model) -> float: return 1.0 - custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune( model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=[{"eval_fn": eval_acc_fn}] ) @@ -150,7 +150,7 @@ def eval_perf_fn(model) -> float: }, ] - custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2) best_model = autotune(model=build_simple_torch_model(), tune_config=custom_tune_config, eval_fns=eval_fns) self.assertIsNotNone(best_model) self.assertEqual(len(evaluator.eval_fn_registry), 2) @@ -183,7 +183,7 @@ def eval_perf_fn(model) -> float: "weight": 0.5, }, ] - custom_tune_config = TuningConfig(quant_configs=get_config_set_for_tuning(), max_trials=4) + custom_tune_config = TuningConfig(config_set=get_config_set_for_tuning(), max_trials=4) best_model = autotune( model=get_gpt_j(), tune_config=custom_tune_config, @@ -198,7 +198,7 @@ def test_autotune_not_eval_func(self): logger.info("test_autotune_api") from neural_compressor.torch import RTNConfig, TuningConfig, autotune - custom_tune_config = TuningConfig(quant_configs=[RTNConfig(weight_bits=[4, 6])], max_trials=2) + custom_tune_config = TuningConfig(config_set=[RTNConfig(weight_bits=[4, 6])], max_trials=2) # Use assertRaises to check that an AssertionError is raised with self.assertRaises(AssertionError) as context: From 6f88c550d8c53c68fc05bc9728bc4388fb1b263d Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Tue, 23 Jan 2024 14:23:35 +0800 Subject: [PATCH 06/10] disable some check Signed-off-by: yiliu30 --- neural_compressor/onnxrt/quantization/config.py | 2 +- neural_compressor/tensorflow/quantization/config.py | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/neural_compressor/onnxrt/quantization/config.py b/neural_compressor/onnxrt/quantization/config.py index 971e0327e95..8f0e64a7ec7 100644 --- a/neural_compressor/onnxrt/quantization/config.py +++ b/neural_compressor/onnxrt/quantization/config.py @@ -158,7 +158,7 @@ def get_model_info(model: Union[onnx.ModelProto, Path, str]) -> List[Tuple[str, return filter_result @classmethod - def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]: + def get_config_set_for_tuning(cls) -> Union[None, "RTNConfig", List["RTNConfig"]]: # pragma: no cover # TODO fwk owner needs to update it. return RTNConfig(weight_bits=[4, 6]) diff --git a/neural_compressor/tensorflow/quantization/config.py b/neural_compressor/tensorflow/quantization/config.py index 34ae925059f..33d0a67e28d 100644 --- a/neural_compressor/tensorflow/quantization/config.py +++ b/neural_compressor/tensorflow/quantization/config.py @@ -144,7 +144,9 @@ def register_supported_configs(cls) -> List[OperatorConfig]: cls.supported_configs = supported_configs @classmethod - def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]: + def get_config_set_for_tuning( + cls, + ) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]: # pragma: no cover # TODO fwk owner needs to update it. return StaticQuantConfig(weight_sym=[True, False]) From 30691d4f0b8430d419b640dc119622f1445e8fad Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 24 Jan 2024 11:41:57 +0800 Subject: [PATCH 07/10] add more UTs Signed-off-by: yiliu30 --- neural_compressor/torch/algorithms/weight_only/gptq.py | 2 +- test/3x/onnxrt/test_config.py | 8 ++++++++ test/3x/tensorflow/test_config.py | 8 ++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/neural_compressor/torch/algorithms/weight_only/gptq.py b/neural_compressor/torch/algorithms/weight_only/gptq.py index f34d3335891..9c7d3453e8e 100644 --- a/neural_compressor/torch/algorithms/weight_only/gptq.py +++ b/neural_compressor/torch/algorithms/weight_only/gptq.py @@ -250,7 +250,7 @@ def __init__( # device self.device = device - if str(getattr(self.model, "device", "")).startswith("cuda"): + if str(self.model.device).startswith("cuda"): self.device = self.model.device self.is_ready = False diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py index 1bb51c141c7..277277a223c 100644 --- a/test/3x/onnxrt/test_config.py +++ b/test/3x/onnxrt/test_config.py @@ -328,6 +328,14 @@ def test_expand_config(self): self.assertEqual(expand_config_list[0].weight_bits, 4) self.assertEqual(expand_config_list[1].weight_bits, 8) + def test_config_set_api(self): + # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled. + from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry + from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME + + config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) + self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME])) + if __name__ == "__main__": unittest.main() diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py index fe9c7830356..e24c7698c28 100644 --- a/test/3x/tensorflow/test_config.py +++ b/test/3x/tensorflow/test_config.py @@ -315,6 +315,14 @@ def test_expand_config(self): self.assertEqual(expand_config_list[0].weight_granularity, "per_channel") self.assertEqual(expand_config_list[1].weight_granularity, "per_tensor") + def test_config_set_api(self): + # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled. + from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry + from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME + + config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) + self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME])) + if __name__ == "__main__": unittest.main() From b84bd416bf9ae0bd6a925bfc4c00fd548b650a0f Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 24 Jan 2024 11:48:35 +0800 Subject: [PATCH 08/10] rename some funcs Signed-off-by: yiliu30 --- neural_compressor/common/base_config.py | 4 ++-- neural_compressor/torch/__init__.py | 2 +- neural_compressor/torch/quantization/autotune.py | 8 ++++---- neural_compressor/torch/quantization/config.py | 7 +------ test/3x/onnxrt/test_config.py | 4 ++-- test/3x/tensorflow/test_config.py | 4 ++-- test/3x/torch/test_autotune.py | 4 ++-- 7 files changed, 14 insertions(+), 19 deletions(-) diff --git a/neural_compressor/common/base_config.py b/neural_compressor/common/base_config.py index d3ded2eed58..dd0236f39c3 100644 --- a/neural_compressor/common/base_config.py +++ b/neural_compressor/common/base_config.py @@ -43,7 +43,7 @@ "register_config", "BaseConfig", "ComposableConfig", - "get_config_set_from_config_registry", + "get_all_config_set_from_config_registry", "options", ] @@ -436,7 +436,7 @@ def get_config_set_for_tuning(cls) -> None: return None -def get_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]: +def get_all_config_set_from_config_registry(fwk_name: str) -> Union[BaseConfig, List[BaseConfig]]: all_registered_config_cls: List[BaseConfig] = config_registry.get_all_config_cls_by_fwk_name(fwk_name) config_set = [] for config_cls in all_registered_config_cls: diff --git a/neural_compressor/torch/__init__.py b/neural_compressor/torch/__init__.py index 5dc95af4561..a7adadbbe08 100644 --- a/neural_compressor/torch/__init__.py +++ b/neural_compressor/torch/__init__.py @@ -24,4 +24,4 @@ ) from neural_compressor.common.base_tuning import TuningConfig -from neural_compressor.torch.quantization.autotune import autotune, get_config_set_for_tuning +from neural_compressor.torch.quantization.autotune import autotune, get_all_config_set diff --git a/neural_compressor/torch/quantization/autotune.py b/neural_compressor/torch/quantization/autotune.py index 1cc3902c70b..bb48f0685c6 100644 --- a/neural_compressor/torch/quantization/autotune.py +++ b/neural_compressor/torch/quantization/autotune.py @@ -18,7 +18,7 @@ import torch from neural_compressor.common import Logger -from neural_compressor.common.base_config import BaseConfig, get_config_set_from_config_registry +from neural_compressor.common.base_config import BaseConfig, get_all_config_set_from_config_registry from neural_compressor.common.base_tuning import TuningConfig, evaluator, init_tuning from neural_compressor.torch import quantize from neural_compressor.torch.quantization.config import FRAMEWORK_NAME @@ -28,12 +28,12 @@ __all__ = [ "autotune", - "get_config_set_for_tuning", + "get_all_config_set", ] -def get_config_set_for_tuning() -> Union[BaseConfig, List[BaseConfig]]: - return get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) +def get_all_config_set() -> Union[BaseConfig, List[BaseConfig]]: + return get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) def autotune( diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 628c57a1558..c1ab71ed59a 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -23,12 +23,7 @@ import torch -from neural_compressor.common.base_config import ( - BaseConfig, - config_registry, - get_config_set_from_config_registry, - register_config, -) +from neural_compressor.common.base_config import BaseConfig, config_registry, register_config from neural_compressor.common.utils import DEFAULT_WHITE_LIST, FP8_QUANT, GPTQ, OP_NAME_OR_MODULE_TYPE, RTN from neural_compressor.torch.utils.constants import PRIORITY_GPTQ, PRIORITY_RTN from neural_compressor.torch.utils.utility import is_hpex_avaliable, logger diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py index 277277a223c..3530a938c5c 100644 --- a/test/3x/onnxrt/test_config.py +++ b/test/3x/onnxrt/test_config.py @@ -330,10 +330,10 @@ def test_expand_config(self): def test_config_set_api(self): # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled. - from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry + from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME - config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) + config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME])) diff --git a/test/3x/tensorflow/test_config.py b/test/3x/tensorflow/test_config.py index e24c7698c28..6a7bd7afeab 100644 --- a/test/3x/tensorflow/test_config.py +++ b/test/3x/tensorflow/test_config.py @@ -317,10 +317,10 @@ def test_expand_config(self): def test_config_set_api(self): # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled. - from neural_compressor.common.base_config import config_registry, get_config_set_from_config_registry + from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME - config_set = get_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) + config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME])) diff --git a/test/3x/torch/test_autotune.py b/test/3x/torch/test_autotune.py index d9149908ebd..e1b717e3163 100644 --- a/test/3x/torch/test_autotune.py +++ b/test/3x/torch/test_autotune.py @@ -157,7 +157,7 @@ def eval_perf_fn(model) -> float: @reset_tuning_target def test_autotune_get_config_set_api(self): - from neural_compressor.torch import TuningConfig, autotune, get_config_set_for_tuning + from neural_compressor.torch import TuningConfig, autotune, get_all_config_set from neural_compressor.torch.algorithms.weight_only.gptq import DataloaderPreprocessor dataloader = GPTQLLMDataLoader() @@ -183,7 +183,7 @@ def eval_perf_fn(model) -> float: "weight": 0.5, }, ] - custom_tune_config = TuningConfig(config_set=get_config_set_for_tuning(), max_trials=4) + custom_tune_config = TuningConfig(config_set=get_all_config_set(), max_trials=4) best_model = autotune( model=get_gpt_j(), tune_config=custom_tune_config, From dc14a9508bf7a1fc39681064255ca83b552ea056 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Wed, 24 Jan 2024 12:18:25 +0800 Subject: [PATCH 09/10] fixed import erro Signed-off-by: yiliu30 --- test/3x/onnxrt/test_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/3x/onnxrt/test_config.py b/test/3x/onnxrt/test_config.py index 3530a938c5c..dfc8f00dea5 100644 --- a/test/3x/onnxrt/test_config.py +++ b/test/3x/onnxrt/test_config.py @@ -331,7 +331,7 @@ def test_expand_config(self): def test_config_set_api(self): # *Note: this test is only for improving the code coverage and can be removed once the test_common is enabled. from neural_compressor.common.base_config import config_registry, get_all_config_set_from_config_registry - from neural_compressor.tensorflow.quantization.config import FRAMEWORK_NAME + from neural_compressor.onnxrt.quantization.config import FRAMEWORK_NAME config_set = get_all_config_set_from_config_registry(fwk_name=FRAMEWORK_NAME) self.assertEqual(len(config_set), len(config_registry.registered_configs[FRAMEWORK_NAME])) From 3f60cffd0542a3954c7fccc0c11ea4c49457d742 Mon Sep 17 00:00:00 2001 From: yiliu30 Date: Thu, 25 Jan 2024 14:59:41 +0800 Subject: [PATCH 10/10] add config set for sq and static quant Signed-off-by: yiliu30 --- neural_compressor/torch/quantization/config.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/neural_compressor/torch/quantization/config.py b/neural_compressor/torch/quantization/config.py index 60e01b2ea9d..e4ee3130587 100644 --- a/neural_compressor/torch/quantization/config.py +++ b/neural_compressor/torch/quantization/config.py @@ -370,6 +370,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "StaticQuantConfig", List["StaticQuantConfig"]]: + # TODO fwk owner needs to update it. + return StaticQuantConfig(w_sym=[True, False]) + # TODO(Yi) run `register_supported_configs` for all registered config. StaticQuantConfig.register_supported_configs() @@ -479,6 +484,11 @@ def get_model_info(model: torch.nn.Module) -> List[Tuple[str, Callable]]: logger.debug(f"Get model info: {filter_result}") return filter_result + @classmethod + def get_config_set_for_tuning(cls) -> Union[None, "SmoothQuantConfig", List["SmoothQuantConfig"]]: + # TODO fwk owner needs to update it. + return SmoothQuantConfig(alpha=[0.1, 0.5]) + # TODO(Yi) run `register_supported_configs` for all registered config. SmoothQuantConfig.register_supported_configs()